/******************************************************************-*-c-*-
 * Myricom GM networking software and documentation                      *
 * Copyright (c) 1996, 1997 by Myricom, Inc.                             *
 * All rights reserved.  See the file `COPYING' for copyright notice.    *
 *************************************************************************/

/* author: glenn@myri.com */

/****************************************************************
 * RDMA state machine
 ****************************************************************/

#define GM_NACK_IT(port, p, size) do {					\
  if ((port)->unacceptable_recv_sizes[GM_SUBPORT_PRIORITY		\
				     ((p)->target_subport_id)]		\
      & (1 << (size)))							\
    {									\
      if (GM_DEBUG_RECV_TOKENS)						\
	{								\
	  gm_printf (GM_STR ("rejected receive (priority: 0x%x,"	\
			     " size: 0x%x)\n"),				\
		  GM_SUBPORT_PRIORITY ((p)->target_subport_id), size);	\
	  fflush (stdout);						\
	}								\
      rdma_nack_reject (LZERO, LONE);					\
    }									\
  else									\
    {									\
      if (!((port)->open))						\
        {                                                               \
          if (GM_DEBUG_RECV_TOKENS)				      	\
	    {								\
	      gm_printf (GM_STR ("port closed\n"));	                \
	      fflush (stdout);						\
	    }								\
          rdma_nack_down (LZERO, LONE);   			      	\
        }                                                               \
      else                                                              \
        {                                                               \
          if (GM_DEBUG_RECV_TOKENS)					\
	    {								\
	      gm_printf (GM_STR("no receive token (priority: 0x%x,"	\
		    	 " size: 0x%x)\n"),				\
		         GM_SUBPORT_PRIORITY ((p)->target_subport_id),  \
                                               size);                   \
            }								\
          rdma_nack (LZERO, LONE);					\
        }                                                               \
    }									\
} while (0)

/* This macro cause the p and c variables to be recomputed for error
   handling.  This HACK prevents the compiler from pushing the
   earlier computed values during the critical path code. */

MARK_LABEL (L_rdma__rdma_chunk_, LZERO)
{
  unsigned int subtype;

  ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
  gm_assert (gm.free_recv_chunk_cnt != 2);	/* this chunk is not free */
  gm_assert (NOTICED_NOT (RDMAING));
  gm_assert (NOTICED_NOT (SDMAING));
  gm_assert (NOTICED (RDMA_PENDING));

  gm_puts ("gm_rdma: received a chunk.\n");

  /* Immediately switch on the message type, since this may not really
     be a legitimate GM message, but actually a hacked raw receive
     message with only a valid GM type field. */

  /* pre */ subtype = gm.recv_chunk[LZERO].packet.as_gm.header.subtype;
#ifdef POLL_PENDING
  NOTICE (POLL_PENDING);
  GM_STBAR ();
#else
  ;
  ;
#endif
  switch (subtype)
    {
      /* Handle control messages for probing a connection */

#if GM_ENABLE_DIRECTED_SEND
      /************
       * Directed sends
       ************/

      /* multiple tags allow future optimization */
    case GM_DIRECTED_DATA_SUBTYPE:
    case GM_DIRECTED_HEAD_DATA_SUBTYPE:
    case GM_DIRECTED_BODY_DATA_SUBTYPE:
    case GM_DIRECTED_TAIL_DATA_SUBTYPE:
      {
	gm_directed_packet_t *dp;
	gm_packet_header_t *p;
	gm_connection_t *c;
	gm_up_t target_addr;
	gm_port_protected_lanai_side_t *port;
	gm_u32_t c__ack_packet_sexno_whole;
	gm_u32_t p__sexno_whole;
	unsigned int p__length;
	unsigned int p__sender_node_id;
	unsigned int p__target_subport_id;
	unsigned int port_id;
	unsigned int wasted_bytes;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	/* pre */ p__sender_node_id = p->sender_node_id;
	/* pre */ p__sexno_whole = p->sexno.whole;
	/* pre */ p__target_subport_id = p->target_subport_id;
	c = &gm_connection[p__sender_node_id];
	/* pre */ p__length = p->length;
	dp = (gm_directed_packet_t *) p;
	target_addr = dp->target_addr;
	/* pre */ c__ack_packet_sexno_whole = c->ack_packet.sexno.whole;
	wasted_bytes = target_addr & (GM_MAX_DMA_GRANULARITY - 1);
	port_id = GM_SUBPORT_PORT (p__target_subport_id);

	/* Check for out-of-sequence messages. */

	if (p__sexno_whole == c__ack_packet_sexno_whole)
	  {
	    gm_lp_t lar, end_lar;

	    lar = (gm_lp_t) (dp->payload + wasted_bytes);
	    end_lar = GM_HEADER_PAYLOAD (p) + p__length;
	    gm_assert (lar < end_lar);

	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("received directed data:"));
		gm_hex_dump (lar, (char *) end_lar - (char *) lar);
	      }

#if GM_ENABLE_SECURITY
#error missing directed send checks
#endif

	    if (GM_DMA_GRANULARITY == 1)
	      {
		USER_RDMA (lar, target_addr, (char *) end_lar - (char *) lar,
			   port_id,
			   GM_REFERENCE_LABEL (L_rdma__continue_chunk),
			   GM_REFERENCE_LABEL (L_rdma__chunk_done), 0);
	      }
	    else if (0 == ((GM_DMA_GRANULARITY - 1)
			   & ((gm_u32_t) lar | (gm_u32_t) end_lar)))
	      {
		/* Got lucky: start & end of packet are DMA aligned. */
		USER_RDMA (lar, target_addr, (char *) end_lar - (char *) lar,
			   port_id,
			   GM_REFERENCE_LABEL (L_rdma__continue_chunk),
			   GM_REFERENCE_LABEL (L_rdma__chunk_done),
			   0);
	      }
	    else
	      {
		gm_recv_token_t *port__free_recv_tokens;
		gm_recv_token_t *rt;

		gm_assert (!recv_token_hash_verify (p->sender_node_id,
						    p->sender_subport_id,
						    p->target_subport_id));
		/* DMA the DMAable portion of the packet into host memory */

		/* pre */ p__length = p->length;

		gm_assert (GM_POWER_OF_TWO (GM_DMA_GRANULARITY));
		gm_assert (GM_POWER_OF_TWO (GM_MAX_DMA_GRANULARITY));

		port = &gm_port[port_id];
		rt = alloc_lanai_recv_token ((GM_SUBPORT_PRIORITY
					      (p->target_subport_id)),
					     port, GM_DIRECTED_TAG_SIZE);
		if (rt)
		  {

		    /* Allocated a recv token. */

		    /* Packet boundaries are not DMA aligned. */
		    gm_lp_t dma_start, dma_end;

		    /* DMA the interior of the packet. */
		    dma_start = GM_ROUNDUP (lp, lar, GM_MAX_DMA_GRANULARITY);
		    dma_end = GM_ALIGN (lp, end_lar, GM_MAX_DMA_GRANULARITY);

		    if (dma_start < dma_end)
		      {
			if (GM_DEBUG_DIRECTED_SEND)
			  {
			    gm_printf (GM_STR ("dmaing for directed send\n"));
			  }
			USER_RDMA
			  (dma_start,
			   target_addr + ((char *) dma_start - (char *) lar),
			   (char *) dma_end - (char *) dma_start,
			   port_id,
			   GM_REFERENCE_LABEL (GM_SUB_N_CAT
					       (L_rdma__continue_last_chunk_,
						LZERO)),
			   GM_REFERENCE_LABEL (GM_SUB_N_CAT
					       (L_rdma__last_chunk_done_,
						LZERO)),
			   0);
		      }
		    else
		      {
			if (GM_DEBUG_DIRECTED_SEND)
			  {
			    gm_printf
			      (GM_STR ("not dmaing for directed send\n"));
			  }
			SET_HANDLER (FINISH_RDMA_EVENT,
				     L_rdma__last_chunk_done_, LZERO);
		      }

		    /* Construct a recv event structure describing the
		       receive.  It will be DMAd to the host as soon
		       as the current DMA is completed. */

		    {
		      gm_u32_t a, b;
		      gm_directed_send_notification_t *e;
		      unsigned int p__sender_subport_id;

		      e = (void *) &gm.recv_token_dma_stage;
		      p__sender_subport_id = p->sender_subport_id;

		      /* Copy the bytes the host may have to copy.
		         HACK: this block of code takes advantage of
		         the fact that the LANai automatically aligns
		         word accesses.  HACK: This block clobbers
		         other fields in the event. */
		      gm_assert (GM_MAX_DMA_GRANULARITY == 8);

		      a = ((gm_u32_t *) dma_end)[0];
		      b = ((gm_u32_t *) dma_end)[1];

		      if (GM_DEBUG_DIRECTED_SEND)
			{
			  gm_printf (GM_STR ("bounds: %08x %08x"), a, b);
			}
		      
		      /* pre */ p__sender_node_id = p->sender_node_id;
		      ((gm_u32_t *) e->end_copy_bytes)[0] = a;
		      ((gm_u32_t *) e->end_copy_bytes)[1] = b;
		      a = ((gm_u32_t *) dma_start)[-2];
		      b = ((gm_u32_t *) dma_start)[-1];

		      if (GM_DEBUG_DIRECTED_SEND)
			{
			  gm_printf (GM_STR ("...%08x %08x\n"), a, b);
			}

		      e->sender_node_id = p__sender_node_id;
		      ((gm_u32_t *) e->start_copy_bytes)[0] = a;
		      ((gm_u32_t *) e->start_copy_bytes)[1] = b;
		      e->type = _GM_NEW_DIRECTED_SEND_NOTIFICATION_EVENT;
		      e->buffer = target_addr;
		      e->length = (char *) end_lar - (char *) lar;;

		      /* HACK: write this last, since it is clobbered
		         during the write of e->start_copy_bytes
		         above. */
		      GM_STBAR ();
		      e->sender_port_id
			= GM_SUBPORT_PORT (p__sender_subport_id);
		    }

		    /* Free the recv token & Record the port for which we
		       are RDMAing */

		    /* pre */ port__free_recv_tokens = port->free_recv_tokens;
		    port->free_recv_tokens = rt;
		    gm.current_rdma_port = port;
		    rt->next = port__free_recv_tokens;
		  }
		else
		  {
		    rdma_nack_directed (LZERO, LONE);
		    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		    DISPATCH (3, "nacked directed recv (no recv token)");
		  }
	      }
	    gm_ack (c);
	    NOTICE (RDMAING);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    gm_printf_p ("DMAing only segment for directed send.\n");
	    DISPATCH (4, "started RDMA for directed pkt");
	  }
	else
	  {
	    rdma_handle_out_of_sequence (LZERO, LONE);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (5, "out of sequence directed pkt");
	  }
      }
#endif /* GM_ENABLE_DIRECTED_SEND */

      /************
       * Reliable data packets
       ************/

    case GM_RELIABLE_DATA_SUBTYPE_8...GM_RELIABLE_DATA_SUBTYPE_31:

      if (gm.recv_chunk[LZERO].packet.as_gm.header.length
	  < GM_MAX_FAST_RECV_BYTES)
	{
	  gm_connection_t *c;
	  gm_packet_header_t *p;

    case GM_RELIABLE_DATA_SUBTYPE_0...GM_RELIABLE_DATA_SUBTYPE_7:

	  p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	  c = &gm_connection[p->sender_node_id];

	  /* Check for out-of-sequence messages. */

	  if (p->sexno.whole == c->ack_packet.sexno.whole)
	    {
	      gm_recv_token_t *rt;
	      gm_port_protected_lanai_side_t *port;

	      /* The message is small enough to DMA directly into the
	         receive queue. */

	      gm_assert (!recv_token_hash_verify (p->sender_node_id,
						  p->sender_subport_id,
						  p->target_subport_id));

	      port = &gm_port[GM_SUBPORT_PORT (p->target_subport_id)];
	      rt = (alloc_lanai_recv_token
		    (GM_SUBPORT_PRIORITY (p->target_subport_id),
		     port, p->subtype - GM_RELIABLE_DATA_SUBTYPE_0));
	      if (rt)
		{
		  gm_recv_token_t *port__free_recv_tokens;
		  gm_up_t target_addr;
		  unsigned int dma_len;

		  /* Allocated a recv token. */

		  /* The message fits entirely within a recv token.  DMA the
		     message and event structure into the next recv queue slot
		     such that the end of the event structure is aligned with
		     the END of the buffer, and the last byte in the buffer is
		     the receive type.  This allows trivial fast completion
		     detection. */

		  /* Construct the event descriptor after the message. */
		  {
		    struct _gm_fast_recv_event *e;
		    unsigned int sport, rt__tag;

		    sport = GM_SUBPORT_PORT (p->sender_subport_id);

		    /* prefetch */ target_addr = rt->orig_ptr;

		    dma_len
		      = GM_RDMA_ROUNDUP (u32,
					 (p->length
					  + sizeof (struct
						    _gm_fast_recv_event)));
		    e = (((struct _gm_fast_recv_event *)
			  (GM_HEADER_PAYLOAD (p) + dma_len)) - 1);
		    e->size = p->subtype - GM_RELIABLE_DATA_SUBTYPE_0;
#if 0
#warning FIXME
		    asm volatile ("!begin bad math code");
		    e->message = (RECV_QUEUE_SLOT_HOST_ADDR (port)
				  + sizeof (gm_recv_queue_slot_t) - dma_len);
		    asm volatile ("!end bad math code");
#else
		    e->message = (RECV_QUEUE_SLOT_HOST_ADDR (port)
				  + sizeof (gm_recv_queue_slot_t) - dma_len);
#endif
		    e->buffer = target_addr;
		    e->sender_node_id = p->sender_node_id;
		    e->sender_port_id = sport;
		    /* prefetch */ port__free_recv_tokens
		      = port->free_recv_tokens;
		    /* pre */ rt__tag = rt->tag;
		    e->type = (GM_NEW_FAST_RECV_EVENT
			       + GM_SUBPORT_PRIORITY (p->target_subport_id)
			       + 2 * (sport == (GM_SUBPORT_PORT
						(p->target_subport_id))));
		    e->length = p->length;
		    e->tag = rt__tag;
#if GM_ENABLE_GALVANTECH_WORKAROUND
		    e->ip_checksum = p->ip_checksum;
#endif
		  }

		  /* DMA both the message and the receive descriptor
		     into host memory in one fell swoop. */

		  gm_assert (GM_DMA_ROUNDUP (u32, p->length)
			     <= GM_MAX_FAST_RECV_BYTES);
		  GM_STBAR ();
		  start_GRANULAR_RDMA (GM_HEADER_PAYLOAD (p),
				       (RECV_QUEUE_SLOT_DMA_ADDR (port)
					+ sizeof (gm_recv_queue_slot_t)
					- dma_len), dma_len);

		  /* Free the recv token. */

		  rt->next = port__free_recv_tokens;
		  port->free_recv_tokens = rt;

		  /* Advance the receive queue. */

		  RECV_QUEUE_ADVANCE (port);

		  gm_printf_p ("Received message %d for subport %d\n",
			       p->sexno.parts.seqno, p->target_subport_id);

		  /* Record the port so we can wake the host after the receive
		     completes, if needed. */

		  gm.current_rdma_port = port;

		  gm_ack (c);
		  NOTICE (RDMAING);
		  SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		  ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
		  gm_printf_p ("DMAing only chunk for message into "
			       "command queue.\n");
		  DISPATCH (6, "RDMAing fast recv event (including msg)");
		}
	      else
		{
		  /* Could not alloc a recv token. */
		  GM_INCR_ERROR_CNT (gm.drop_cnt);
		  GM_INCR_ERROR_CNT (gm.no_match_for_reliable_recv_cnt);
		  GM_NACK_IT (port, p,
			      p->subtype - GM_RELIABLE_DATA_SUBTYPE_0);
		  SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		  DISPATCH (7, "nacked monolithic pkt (no recv token)");
		}
	    }
	  else
	    {
	      rdma_handle_out_of_sequence (LZERO, LONE);
	      SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	      DISPATCH (8, "out of sequence monolithic pkt");
	    }
	}

      {
	gm_connection_t *c;
	gm_packet_header_t *p;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	c = &gm_connection[p->sender_node_id];

	/* Check for out-of-sequence messages. */

	if (p->sexno.whole == c->ack_packet.sexno.whole)
	  {
	    gm_recv_token_t *rt;
	    gm_port_protected_lanai_side_t *port;
	    unsigned int size, priority;

	    gm_assert (!recv_token_hash_verify (p->sender_node_id,
						p->sender_subport_id,
						p->target_subport_id));

	    priority = GM_SUBPORT_PRIORITY (p->target_subport_id);
	    size = p->subtype - GM_RELIABLE_DATA_SUBTYPE_0;
	    port = &gm_port[GM_SUBPORT_PORT (p->target_subport_id)];
	    rt = alloc_lanai_recv_token (priority, port, size);

	    if (rt)
	      {
		/* Allocated a recv token. */

		/* The message is too big to fit in the receive queue, so a
		   receive queue entry must be DMAd seperately. */

		gm_assert (GM_DMA_ALIGNED (rt->recv_ptr));
		gm_assert (GM_DMA_ALIGNED (GM_HEADER_PAYLOAD (p)));
		gm_assert (p->length >= GM_DMA_GRANULARITY);
		USER_RDMA
		  (GM_HEADER_PAYLOAD (p),
		   rt->recv_ptr,
		   p->length,
		   GM_SUBPORT_PORT (p->target_subport_id),
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__continue_last_chunk_, LZERO)),
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__last_chunk_done_, LZERO)),
		   GM_DMA_GRANULARITY_ROUNDUP);

		/* Construct a recv event structure describing the
		   receive.  It will be DMAd to the host as soon as
		   the current DMA is completed. */

		{
		  gm_recv_token_t *port__free_recv_tokens;
		  struct _gm_recv_event *e = &gm.recv_token_dma_stage;
		  unsigned sport, spriority, rt__tag;
		  gm_up_t user_buffer;

		  sport = GM_SUBPORT_PORT (p->sender_subport_id);
		  spriority = GM_SUBPORT_PRIORITY (p->target_subport_id);

		  /* prefetch */ user_buffer = rt->orig_ptr;
		  e->size = p->subtype - GM_RELIABLE_DATA_SUBTYPE_0;
		  e->sender_node_id = p->sender_node_id;
		  e->sender_port_id = sport;
		  e->type = (GM_NEW_RECV_EVENT
			     + spriority
			     + 2 * (sport == (GM_SUBPORT_PORT
					      (p->target_subport_id))));
		  /* prefetch */ port__free_recv_tokens
		    = port->free_recv_tokens;
		  /* prefetch */ rt__tag = rt->tag;
		  e->buffer = user_buffer;
		  e->length = p->length;
		  e->tag = rt__tag;
#if GM_ENABLE_GALVANTECH_WORKAROUND
		  e->ip_checksum = p->ip_checksum;
#endif

		  /* Free the recv token. */

		  rt->next = port__free_recv_tokens;
		  port->free_recv_tokens = rt;
		}

		/* Record the port for which we are RDMAing */
		gm.current_rdma_port = port;

		gm_ack (c);
		NOTICE (RDMAING);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		gm_printf_p ("DMAing only segment for message.\n");
		DISPATCH (9, "RDMAing only segment for message");
	      }
	    else
	      {
		/* Could not alloc a recv token. */
		GM_INCR_ERROR_CNT (gm.drop_cnt);
		GM_INCR_ERROR_CNT (gm.no_match_for_reliable_recv_cnt);

		GM_NACK_IT (port, p, p->subtype - GM_RELIABLE_DATA_SUBTYPE_0);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		DISPATCH (10, "NACKed monolithic packet (no recv token)");
	      }
	  }
	else
	  {
	    rdma_handle_out_of_sequence (LZERO, LONE);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (11, "out of sequence monolithic packet");
	  }
      }

    case GM_RELIABLE_HEAD_DATA_SUBTYPE_13...GM_RELIABLE_HEAD_DATA_SUBTYPE_31:
      {
	gm_connection_t *c;
	gm_packet_header_t *p;
	unsigned int p__length;
	unsigned int p__sender_node_id;
	unsigned int size;
	gm_u32_t p__sexno_whole;
	gm_u32_t c__ack_packet_sexno_whole;
	unsigned int p__target_subport_id;
	gm_port_protected_lanai_side_t *port;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	/* pre */ p__sender_node_id = p->sender_node_id;
	/* pre */ p__sexno_whole = p->sexno.whole;
	/* pre */ p__length = p->length;
	c = &gm_connection[p__sender_node_id];
	/* pre */ size = subtype - GM_RELIABLE_HEAD_DATA_SUBTYPE_13 + 13;
	/* pre */ p__target_subport_id = p->target_subport_id;
	c__ack_packet_sexno_whole = c->ack_packet.sexno.whole;
	;
	port = &gm_port[GM_SUBPORT_PORT (p__target_subport_id)];

	/* Check for out-of-sequence messages. */

	if (p__sexno_whole == c__ack_packet_sexno_whole)
	  {
	    gm_recv_token_t *rt;

	    gm_assert (!recv_token_hash_verify (p->sender_node_id,
						p->sender_subport_id,
						p->target_subport_id));

	    rt = alloc_lanai_recv_token ((GM_SUBPORT_PRIORITY
					  (p__target_subport_id)),
					 port, size);
	    if (rt)
	      {
		/* start the DMA */

		gm_up_t dmaable_target_addr;

		dmaable_target_addr = rt->recv_ptr;
		USER_RDMA (GM_HEADER_PAYLOAD (p),
			   dmaable_target_addr,
			   p__length,
			   GM_SUBPORT_PORT (p->target_subport_id),
			   GM_REFERENCE_LABEL (L_rdma__continue_chunk),
			   GM_REFERENCE_LABEL (L_rdma__chunk_done),
			   0);

		/* Advance the receive token info */

		rt->recv_ptr = dmaable_target_addr + p__length;

		/* Record the receive size so we will know the size to report
		   to the host. */

		rt->size = size;

		/* Insert token in hash table */

		recv_token_hash_insert (rt, p->sender_node_id,
					p->sender_subport_id,
					p->target_subport_id);

		/* ??? May wish to ack after DMA to allow DMA to fail. */

		gm_ack (c);
		NOTICE (RDMAING);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		gm_printf_p ("Started to rdma first segment.\n");
		DISPATCH (12, "RDMAing head of message");
	      }
	    else
	      {
		/* Could not alloc a recv token. */
		GM_INCR_ERROR_CNT (gm.drop_cnt);
		GM_INCR_ERROR_CNT (gm.no_match_for_reliable_recv_cnt);

		GM_NACK_IT (port, p, size);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		DISPATCH (13, "NACKing head of message (no recv token)");
	      }
	  }
	else
	  {
	    rdma_handle_out_of_sequence (LZERO, LONE);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (14, "out of sequence head of message");
	  }
      }

    case GM_RELIABLE_BODY_DATA_SUBTYPE:
      {
	gm_connection_t *c;
	gm_packet_header_t *p;
	unsigned int p__length;
	unsigned int p__sender_node_id;
	unsigned int size;
	gm_u32_t p__sexno_whole;
	gm_u32_t c__ack_packet_sexno_whole;
	unsigned int p__sender_subport_id;
	unsigned int p__target_subport_id;
	gm_port_protected_lanai_side_t *port;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	/* pre */ p__sender_node_id = p->sender_node_id;
	/* pre */ p__sexno_whole = p->sexno.whole;
	/* pre */ p__length = p->length;
	c = &gm_connection[p__sender_node_id];
	/* pre */ size = subtype - GM_RELIABLE_HEAD_DATA_SUBTYPE_13 + 13;
	/* pre */ p__target_subport_id = p->target_subport_id;
	c__ack_packet_sexno_whole = c->ack_packet.sexno.whole;
	/* pre */ p__sender_subport_id = p->sender_subport_id;
	port = &gm_port[GM_SUBPORT_PORT (p__target_subport_id)];

	/* Check for out-of-sequence messages. */

	if (p__sexno_whole == c__ack_packet_sexno_whole)
	  {
	    gm_recv_token_t *rt;

	    rt = recv_token_hash_get (p__sender_node_id, p__sender_subport_id,
				      p__target_subport_id);

	    if (rt)
	      {
		/* start the DMA */

		gm_up_t dmaable_target_addr;

		dmaable_target_addr = rt->recv_ptr;
		USER_RDMA (GM_HEADER_PAYLOAD (p),
			   dmaable_target_addr,
			   p__length,
			   GM_SUBPORT_PORT (p->target_subport_id),
			   GM_REFERENCE_LABEL (L_rdma__continue_chunk),
			   GM_REFERENCE_LABEL (L_rdma__chunk_done),
			   0);

		/* Advance the receive token info */

		rt->recv_ptr = dmaable_target_addr + p__length;

		gm_ack (c);
		NOTICE (RDMAING);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		gm_printf_p ("Started to rdma another segment.\n");
		DISPATCH (15, "RDMAing body of message");
	      }
	    else
	      {
		/* The port has been closed in the middle of a receive.
		   Send a special NACK to inform the sender of the fact. */

		rdma_nack_down (LZERO, LONE);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		DISPATCH (16, "nacking body of message (port closed)");
	      }
	  }
	else
	  {
	    rdma_handle_out_of_sequence (LZERO, LONE);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (17, "out of sequence body packet");
	  }
      }

    case GM_RELIABLE_TAIL_DATA_SUBTYPE:
      {
	gm_connection_t *c;
	gm_packet_header_t *p;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	c = &gm_connection[p->sender_node_id];

	/* Check for out-of-sequence messages. */

	if (p->sexno.whole == c->ack_packet.sexno.whole)
	  {
	    gm_recv_token_t *rt;

	    rt =
	      recv_token_hash_remove (p->sender_node_id, p->sender_subport_id,
				      p->target_subport_id);
	    if (rt)
	      {
		gm_up_t dmaable_target_addr;

		/* start the DMA */
		dmaable_target_addr = rt->recv_ptr;
		USER_RDMA
		  (GM_HEADER_PAYLOAD (p),
		   dmaable_target_addr,
		   p->length,
		   GM_SUBPORT_PORT (p->target_subport_id),
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__continue_last_chunk_, LZERO)),
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__last_chunk_done_, LZERO)),
		   GM_DMA_GRANULARITY_ROUNDUP);

		/* Construct a recv event structure describing the
		   receive.  It will be DMAd to the host as soon as
		   the current DMA is completed. */

		{
		  struct _gm_recv_event *e = &gm.recv_token_dma_stage;
		  unsigned sport, spriority, rt__tag;
		  gm_up_t user_buffer;
		  gm_u32_t rt__size;
		  gm_port_protected_lanai_side_t *port;
		  gm_recv_token_t *port__free_recv_tokens;

		  /* prefetch */ rt__size = rt->size;
		  /* prefetch */ user_buffer = rt->orig_ptr;
		  sport = GM_SUBPORT_PORT (p->sender_subport_id);
		  spriority = GM_SUBPORT_PRIORITY (p->target_subport_id);

		  e->size = rt__size;
		  e->sender_node_id = p->sender_node_id;
		  e->sender_port_id = sport;
		  e->type = GM_NEW_RECV_EVENT
		    + spriority
		    + 2 * (sport == GM_SUBPORT_PORT (p->target_subport_id));
		  e->buffer = user_buffer;
		  /* prefetch */ port = &gm_port[GM_SUBPORT_PORT
						 (p->target_subport_id)];
		  /* prefetch */ port__free_recv_tokens
		    = port->free_recv_tokens;
		  /* prefetch */ rt__tag = rt->tag;
		  e->length = dmaable_target_addr + p->length - user_buffer;
		  e->tag = rt__tag;
#if GM_ENABLE_GALVANTECH_WORKAROUND
		  e->ip_checksum = p->ip_checksum;
#endif

		  /* Free the recv token. */

		  rt->next = port__free_recv_tokens;
		  port->free_recv_tokens = rt;

		  /* Record the port for which we are RDMAing */

		  gm.current_rdma_port = port;
		}

		gm_ack (c);
		NOTICE (RDMAING);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		gm_printf_p ("Started to rdma last segment.\n");
		DISPATCH (18, "DMAing tail of message");
	      }
	    else
	      {
		/* The port has been closed in the middle of a receive.
		   Send a special NACK to inform the sender of the fact. */
		rdma_nack_down (LZERO, LONE);
		SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
		DISPATCH (19, "NACKing tail of message (port closed)");
	      }
	  }
	else
	  {
	    rdma_handle_out_of_sequence (LZERO, LONE);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (20, "out of sequence tail of message");
	  }
      }

#if GM_ENABLE_ETHERNET
    case GM_ETHERNET_HACK_SUBTYPE:
      {
	gm_ethernet_recv_token_t *ert;
	gm_s32_t dma_len;

	gm_puts ("gm_rdma: Received an ethernet packet.\n");

	/* Reconstruct the hacked raw message. */
	gm.recv_chunk[LZERO].packet.as_gm.header.subtype
	  = gm.recv_chunk[LZERO].raw_type_HACK;

	/* find the ethernet recv token to use */
	ert = gm.ethernet.recv.token_slot;
	if (!ert->segment[0].len)
	  {
	    /* No posted ethernet receive.  Drop the chunk */
	    gm_assert (gm.free_recv_chunk_cnt == 0
		       || gm.free_recv_chunk_cnt == 1);
	    TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
	    gm.free_recv_chunk_cnt++;
	    GM_INCR_ERROR_CNT (gm.drop_cnt);
	    GM_INCR_ERROR_CNT (gm.no_match_for_ether_recv_cnt);

	    NOTICE (FREE_RECV_CHUNK);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    gm_printf_p ("Dropped ethernet message\n");
	    DISPATCH (21, "dropping ethernet packet (no ether recv token)");
	  }

	/* Compute the amount to DMA.  Either DMA the rest of the
	   packet or the length of the segment, whichever is
	   smaller. */
	gm_assert (GM_DMA_ALIGNED (ert->segment[0].len));
	dma_len
	  = gm_s32_min (ert->segment[0].len,
			GM_DMA_ROUNDUP (u32,
					gm.recv_chunk[LZERO].raw_length));

	gm_puts ("gm_rdma: Starting DMA.\n");

	/* Start DMA */
	start_RDMA (gm.recv_chunk[LZERO].packet.as_bytes,
		    ert->segment[0].ptr, dma_len);

	/* Record details for setting up next DMA in scatter. */
	gm.ethernet.recv.remaining_len
	  = GM_DMA_ROUNDUP (u32, gm.recv_chunk[LZERO].raw_length) - dma_len;
	gm.ethernet.recv.next_lar = (gm.recv_chunk[LZERO].packet.as_bytes
				     + dma_len);

	/* record the packet length */
	gm.ethernet.recv.total_len = gm.recv_chunk[LZERO].raw_length;


	gm_puts ("gm_rdma: updating state");

	/* update state */
	gm.current_rdma_port = &gm_port[GM_ETHERNET_PORT_ID];
	gm_assert (gm.current_rdma_port->id == GM_ETHERNET_PORT_ID);
	gm.ethernet.recv.scatter_pos = 0;
	gm.ethernet.recv.checksum = 0;
	NOTICE (RDMAING);
	SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__maybe_finish_ethernet_rdma,);
	SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	gm_printf_p ("Started to rdma first ethernet segment.\n");
	DISPATCH (22, "RDMAing first ethernet segment");
      }
#endif /* GM_ENABLE_ETHERNET */

    case GM_RAW_HACK_SUBTYPE:
      {
	gm_port_protected_lanai_side_t *port;
	unsigned int port_id;
	gm_recv_token_t *rt;

	/* The message is a "raw" unrecognized message. If a port has
	   been registered as accepting raw receives, verify that the
	   message is no more than GM_MTU bytes long, and DMA it into a
	   buffer specified in a raw receive token. */

	/* Reconstruct the hacked raw message. */

	gm.recv_chunk[LZERO].packet.as_gm.header.subtype
	  = gm.recv_chunk[LZERO].raw_type_HACK;

	port = gm.registered_raw_recv_port;
	if (!port)
	  {
	    /* No port accepts raw receives.  drop the chunk */

	    TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
	    gm.free_recv_chunk_cnt++;
	    NOTICE (FREE_RECV_CHUNK);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    gm_printf_p ("Dropped raw message since there is no "
			 "registered receiver.\n");
	    if (gm.recv_chunk[LZERO].raw_type_HACK != GM_MAPPING_PACKET_TYPE)
	      {
		GM_INCR_ERROR_CNT (gm.drop_cnt);
		GM_INCR_ERROR_CNT (gm.no_match_for_raw_recv_cnt);
	      }
	    DISPATCH (23, "dropping raw pkt (no raw recv port)");
	  }

	/* prefetch */ port_id = port->id;

	rt = alloc_lanai_recv_token (GM_HIGH_PRIORITY, port, GM_RAW_TAG_SIZE);
	if (!rt)
	  {
	    /* No registered raw receive token.  drop the chunk */
	    TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
	    GM_INCR_ERROR_CNT (gm.drop_cnt);
	    GM_INCR_ERROR_CNT (gm.no_match_for_raw_recv_cnt);

	    gm.free_recv_chunk_cnt++;
	    NOTICE (FREE_RECV_CHUNK);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    gm_printf_p ("Dropped raw message since no raw receive token.\n");
	    DISPATCH (24, "dropped raw pkt (no recv token)");
	  }
	/* Allocated a recv token. */

	/* DMA the raw data into the receive buffer */

	USER_RDMA (&gm.recv_chunk[LZERO].packet.as_bytes[0],
		   rt->recv_ptr,
		   GM_DMA_ROUNDUP (u32, gm.recv_chunk[LZERO].raw_length),
		   port_id,
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__continue_last_chunk_, LZERO)),
		   GM_REFERENCE_LABEL (GM_SUB_N_CAT
				       (L_rdma__last_chunk_done_, LZERO)),
		   GM_DMA_GRANULARITY_ROUNDUP);

	/* Construct a recv event structure describing the receive.
	   It will be DMAd to the host as soon as the current DMA is
	   completed. */

	{
	  gm_up_t user_buffer;
	  gm_u32_t gm_recv_chunk_LZERO__raw_length;
	  gm_recv_token_t *port__free_recv_tokens;

	  user_buffer = rt->orig_ptr;
	  gm_recv_chunk_LZERO__raw_length = gm.recv_chunk[LZERO].raw_length;
	  port__free_recv_tokens = port->free_recv_tokens;

	  gm.recv_token_dma_stage.type = GM_NEW_RAW_RECV_EVENT;
	  gm.recv_token_dma_stage.buffer = user_buffer;
	  gm.recv_token_dma_stage.length = gm_recv_chunk_LZERO__raw_length;

	  /* Free the recv token. */

	  rt->next = port__free_recv_tokens;
	  port->free_recv_tokens = rt;
	}

	/* Record the port for which we are RDMAing */

	gm.current_rdma_port = port;

	NOTICE (RDMAING);
	SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	gm_printf_p ("DMAing only segment for message.\n");
	DISPATCH (25, "RDMAing raw pkt");
      }

#if GM_ENABLE_DATAGRAMS
      /************
       * Reliable data packets
       ************/

    case GM_DATAGRAM_SUBTYPE_0...GM_DATAGRAM_SUBTYPE_31:

#if 0
#warning fast datagram receive disabled
#else
      if (gm.recv_chunk[LZERO].packet.as_gm.header.length
	  < GM_MAX_FAST_RECV_BYTES)
	{
	  gm_connection_t *c;
	  gm_packet_header_t *p;

	  gm_recv_token_t *rt;
	  gm_port_protected_lanai_side_t *port;

	  p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	  c = &gm_connection[p->sender_node_id];

	  /* The message is small enough to DMA directly into the
	     receive queue. */

	  gm_assert (!recv_token_hash_verify (p->sender_node_id,
					      p->sender_subport_id,
					      p->target_subport_id));

	  port = &gm_port[GM_SUBPORT_PORT (p->target_subport_id)];
	  rt = (alloc_lanai_recv_token
		(GM_SUBPORT_PRIORITY (p->target_subport_id),
		 port, p->subtype - GM_DATAGRAM_SUBTYPE_0));
	  if (rt)
	    {
	      gm_recv_token_t *port__free_recv_tokens;
	      gm_up_t target_addr;
	      unsigned int dma_len;

	      /* Allocated a recv token. */

	      /* The message fits entirely within a recv token.  DMA the
	         message and event structure into the next recv queue slot
	         such that the end of the event structure is aligned with
	         the END of the buffer, and the last byte in the buffer is
	         the receive type.  This allows trivial fast completion
	         detection. */

	      /* Construct the event descriptor after the message. */
	      {
		struct _gm_fast_recv_event *e;
		unsigned int sport, rt__tag;

		sport = GM_SUBPORT_PORT (p->sender_subport_id);

		/* prefetch */ target_addr = rt->orig_ptr;

		dma_len
		  = GM_RDMA_ROUNDUP (u32,
				     (p->length
				      + sizeof (struct _gm_fast_recv_event)));
		e = (((struct _gm_fast_recv_event *)
		      (GM_HEADER_PAYLOAD (p) + dma_len)) - 1);
		e->size = p->subtype - GM_DATAGRAM_SUBTYPE_0;
		e->message = (RECV_QUEUE_SLOT_HOST_ADDR (port)
			      + sizeof (gm_recv_queue_slot_t) - dma_len);
		e->buffer = target_addr;
		e->sender_node_id = p->sender_node_id;
		e->sender_port_id = sport;
		/* prefetch */ port__free_recv_tokens
		  = port->free_recv_tokens;
		/* pre */ rt__tag = rt->tag;
		e->type = (GM_NEW_FAST_RECV_EVENT
			   + GM_SUBPORT_PRIORITY (p->target_subport_id)
			   + 2 * (sport == (GM_SUBPORT_PORT
					    (p->target_subport_id))));
		e->length = p->length;
		e->tag = rt__tag;
	      }

	      /* DMA both the message and the receive descriptor
	         into host memory in one fell swoop. */

	      gm_assert (GM_DMA_ROUNDUP (u32, p->length)
			 <= GM_MAX_FAST_RECV_BYTES);
	      GM_STBAR ();
	      start_GRANULAR_RDMA (GM_HEADER_PAYLOAD (p),
				   (RECV_QUEUE_SLOT_DMA_ADDR (port)
				    + sizeof (gm_recv_queue_slot_t)
				    - dma_len), dma_len);

	      /* Free the recv token. */

	      rt->next = port__free_recv_tokens;
	      port->free_recv_tokens = rt;

	      /* Advance the receive queue. */

	      RECV_QUEUE_ADVANCE (port);

	      gm_printf_p ("Received message %d for subport %d\n",
			   p->sexno.parts.seqno, p->target_subport_id);

	      /* Record the port so we can wake the host after the receive
	         completes, if needed. */

	      gm.current_rdma_port = port;

	      NOTICE (RDMAING);
	      SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	      ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
	      gm_printf_p ("DMAing only chunk for message into "
			   "command queue.\n");
	      DISPATCH (26, "DMAing fast datagram into recv queue");
	    }
	  else
	    {
	      /* Could not alloc a recv token, so drop packet. */

	      GM_INCR_ERROR_CNT (gm.drop_cnt);
	      GM_INCR_ERROR_CNT (gm.no_match_for_datagram_recv_cnt);
	      TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
	      gm.free_recv_chunk_cnt++;
	      NOTICE (FREE_RECV_CHUNK);
	      SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	      DISPATCH (27, "dropping small datagram (no recv token)");
	    }
	}
#endif

      {
	gm_connection_t *c;
	gm_packet_header_t *p;
	gm_recv_token_t *rt;
	gm_port_protected_lanai_side_t *port;
	unsigned int size, priority;

	p = &gm.recv_chunk[LZERO].packet.as_gm.header;
	c = &gm_connection[p->sender_node_id];

	gm_assert (!recv_token_hash_verify (p->sender_node_id,
					    p->sender_subport_id,
					    p->target_subport_id));

	priority = GM_SUBPORT_PRIORITY (p->target_subport_id);
	size = p->subtype - GM_DATAGRAM_SUBTYPE_0;
	port = &gm_port[GM_SUBPORT_PORT (p->target_subport_id)];
	rt = alloc_lanai_recv_token (priority, port, size);

	if (rt)
	  {
	    /* Allocated a recv token. */

	    /* The message is too big to fit in the receive queue, so a
	       receive queue entry must be DMAd separately. */

	    gm_assert (GM_DMA_ALIGNED (rt->recv_ptr));
	    gm_assert (GM_DMA_ALIGNED (GM_HEADER_PAYLOAD (p)));
	    gm_assert (p->length >= GM_DMA_GRANULARITY);
	    USER_RDMA (GM_HEADER_PAYLOAD (p),
		       rt->recv_ptr,
		       p->length,
		       GM_SUBPORT_PORT (p->target_subport_id),
		       (GM_REFERENCE_LABEL
			(GM_SUB_N_CAT (L_rdma__continue_last_chunk_, LZERO))),
		       (GM_REFERENCE_LABEL
			(GM_SUB_N_CAT (L_rdma__last_chunk_done_, LZERO))),
		       GM_DMA_GRANULARITY_ROUNDUP);

	    /* Construct a recv event structure describing the
	       receive.  It will be DMAd to the host as soon as
	       the current DMA is completed. */

	    {
	      gm_recv_token_t *port__free_recv_tokens;
	      struct _gm_recv_event *e = &gm.recv_token_dma_stage;
	      unsigned sport, spriority, rt__tag;
	      gm_up_t user_buffer;

	      sport = GM_SUBPORT_PORT (p->sender_subport_id);
	      spriority = GM_SUBPORT_PRIORITY (p->target_subport_id);

	      /* prefetch */ user_buffer = rt->orig_ptr;
	      e->size = p->subtype - GM_DATAGRAM_SUBTYPE_0;
	      e->sender_node_id = p->sender_node_id;
	      e->sender_port_id = sport;
	      e->type = (GM_NEW_RECV_EVENT
			 + spriority
			 + 2 * (sport == (GM_SUBPORT_PORT
					  (p->target_subport_id))));
	      /* prefetch */ port__free_recv_tokens
		= port->free_recv_tokens;
	      /* prefetch */ rt__tag = rt->tag;
	      e->buffer = user_buffer;
	      e->length = p->length;
	      e->tag = rt__tag;

	      /* Free the recv token. */

	      rt->next = port__free_recv_tokens;
	      port->free_recv_tokens = rt;
	    }

	    /* Record the port for which we are RDMAing */

	    gm.current_rdma_port = port;

	    NOTICE (RDMAING);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    gm_printf_p ("DMAing only segment for message.\n");
	    DISPATCH (28, "RDMAing datagram into buffer");
	  }
	else
	  {
	    /* Could not alloc a recv token, so drop packet. */

	    if (GM_DEBUG_DATAGRAMS)
	      {
		gm_printf (GM_STR ("No recv token: dropping datagram.\n"));
		fflush (stdout);
	      }

	    GM_INCR_ERROR_CNT (gm.drop_cnt);
	    GM_INCR_ERROR_CNT (gm.no_match_for_datagram_recv_cnt);
	    TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
	    gm.free_recv_chunk_cnt++;
	    NOTICE (FREE_RECV_CHUNK);
	    SET_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
	    DISPATCH (29, "dropping datagram (no recv token)");
	  }
      }
#endif /* GM_ENABLE_DATAGRAMS */

      /* Unrecognized types should never reach this state machine. */

    default:
      gm_printf ("%s:%d bad subtype found subtype=%d \n",
		 __FILE__, __LINE__, subtype);
      fflush (stdout);
      gm_hex_dump (gm.recv_chunk[LZERO].packet.as_bytes, 16);
      gm_always_assert (0);
    }

  /* Unrecognized types should never reach this state machine. */

  gm_always_assert (0);
  DISPATCH (0, "never get here");
}
GM_END_HANDLER;

/* could eliminate this handler if GM_ENABLE_VM == 0 */

MARK_LABEL (L_rdma__continue_last_chunk_, LZERO)
{
  gm_u32_t ctr, port_id;
  gm_up_t hp;
  gm_dp_t ear;
  gm_lp_t lar;

  port_id = gm.remaining_rdma_port_id;
  hp = gm.remaining_rdma_hp;
  ctr = gm.remaining_rdma_ctr;
  lar = gm.remaining_rdma_lar;

  ear = rdma_addr (hp, port_id);
  GM_PRINT (0, ("critical dp=0x%qx\n", (gm_u64_t) ear));
  if (GM_MTU <= GM_PAGE_LEN	/* short circuit if always true */
      || ctr <= GM_PAGE_LEN)
    {
      start_RDMA (lar, ear, ctr);
      SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__last_chunk_done_, LZERO);
      DISPATCH (30, "continue last RDMA chunk (last segment)");
    }
  else
    {
      start_RDMA (lar, ear, ctr);
      gm.remaining_rdma_ctr = ctr - GM_PAGE_LEN;
      gm.remaining_rdma_hp = hp + GM_PAGE_LEN;
      gm.remaining_rdma_lar = (char *) lar + GM_PAGE_LEN;
      DISPATCH (31, "continue last RDMA chunk (not last segment)");
    }
}
GM_END_HANDLER;

MARK_LABEL (L_rdma__last_chunk_done_, LZERO)
{
  gm_port_protected_lanai_side_t *port;

  gm_assert (NOTICED (RDMAING));
  gm_assert (NOTICED_NOT (SDMAING));
  gm_assert (gm.free_recv_chunk_cnt != 2);

  /* Start DMAing the recv token. */

  port = gm.current_rdma_port;
  gm_assert (port);

  gm_assert (sizeof (gm.recv_token_dma_stage) < GM_PAGE_LEN);
  start_GRANULAR_RDMA (&gm.recv_token_dma_stage,
		       (RECV_QUEUE_SLOT_DMA_ADDR (port)
			+ sizeof (gm_recv_queue_slot_t)
			- sizeof (struct _gm_recv_event)),
		       (unsigned int) sizeof (struct _gm_recv_event));
  gm_assert (NOTICED (RDMAING));

  /* Advance the receive queue */

  RECV_QUEUE_ADVANCE (port);

  gm_assert (NOTICED (RDMAING));
  SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
  ASSERT_HANDLER (START_RDMA_EVENT, L_rdma__rdma_chunk_, LONE);
  gm_printf_p ("DMAed last segment for message. DMAing token for message.\n");
  DISPATCH (32, "last RDMA chunk done");
}
GM_END_HANDLER;

#if LONE==1

/* could eliminate this handler if GM_ENABLE_VM == 0 */

MARK_LABEL (L_rdma__continue_chunk,)
{
  gm_u32_t ctr, port_id;
  gm_up_t hp;
  gm_dp_t ear;
  gm_lp_t lar;

  port_id = gm.remaining_rdma_port_id;
  hp = gm.remaining_rdma_hp;
  ctr = gm.remaining_rdma_ctr;
  lar = gm.remaining_rdma_lar;

  ear = rdma_addr (hp, port_id);
  GM_PRINT (0, ("critical dp=0x%qx\n", (gm_u64_t) ear));
  if (GM_MTU <= GM_PAGE_LEN	/* short circuit if always true */
      || ctr <= GM_PAGE_LEN)
    {
      start_RDMA (lar, ear, ctr);
      SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__chunk_done,);
      DISPATCH (33, "RDMA continue chunk (last segment)");
    }
  else
    {
      start_RDMA (lar, ear, ctr);
      gm.remaining_rdma_ctr = ctr - GM_PAGE_LEN;
      gm.remaining_rdma_hp = hp + GM_PAGE_LEN;
      gm.remaining_rdma_lar = (char *) lar + GM_PAGE_LEN;
      DISPATCH (34, "RDMA continue last chunk (not last segment)");
    }
}
GM_END_HANDLER;

MARK_LABEL (L_rdma__chunk_done,)
{
  gm_assert (NOTICED (RDMAING));
  gm_assert (NOTICED_NOT (SDMAING));
  gm_assert (gm.free_recv_chunk_cnt != 2);

  /* Free the chunk that that we just DMAd */

  TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
  gm.free_recv_chunk_cnt++;
  NOTICE (FREE_RECV_CHUNK);

  NOTICE_NOT (RDMAING);
  SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
  gm_printf_p ("Finished DMAing a segment for message.\n");
  DISPATCH (35, "RDMA chunk done");
}
GM_END_HANDLER;

#if 1
MARK_LABEL (L_rdma__token_done,)
{
  gm_port_protected_lanai_side_t *port;
  gm_u32_t gm_free_recv_chunk_cnt;
  unsigned int offset;
  gm_handler_t handler;
  gm_u32_t isr;
  int wake;

  gm_assert (NOTICED (RDMAING));
  gm_assert (NOTICED_NOT (SDMAING));

  /*          dispatch */ isr = get_ISR ();
  /* prefetch */ gm_free_recv_chunk_cnt = gm.free_recv_chunk_cnt;
  NOTICE (FREE_RECV_CHUNK);
  NOTICE_NOT (RDMAING);
  TOGGLE_STATE (gm_free_recv_chunk_cnt * RDMA_PENDING);
  ++gm_free_recv_chunk_cnt;
  /*          dispatch */ offset = DISPATCH_OFFSET (isr);

  /* prefetch */ port = gm.current_rdma_port;
  /* prefetch */ wake = port->wake_host;
  gm.free_recv_chunk_cnt = gm_free_recv_chunk_cnt;
  gm_assert (gm_free_recv_chunk_cnt == 1 || gm_free_recv_chunk_cnt == 2);
  if (!wake)
    {
      LOG_DISPATCH (36, "RDMAd token (no wake)");
      GOTO_HANDLER_AT_OFFSET (offset);
    }
  else
    {
      GM_STBAR ();
      wake_port (port);

      ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
      gm_printf_p ("Finished DMAing token for message.\n");
      LOG_DISPATCH (37, "RDMAd token (with wake)");
      GOTO_HANDLER_AT_OFFSET (offset);
    }
}
GM_END_HANDLER;

#else
MARK_LABEL (L_rdma__token_done,)
{
  gm_u32_t gm_free_recv_chunk_cnt;
  gm_port_protected_lanai_side_t *port;
  int wake_host;
  gm_u32_t isr;


  /* dispatch */ isr = get_ISR ();
  port = gm.current_rdma_port;
  gm_free_recv_chunk_cnt = gm.free_recv_chunk_cnt;
  NOTICE (FREE_RECV_CHUNK);
  NOTICE_NOT (RDMAING);
  TOGGLE_STATE (gm_free_recv_chunk_cnt * RDMA_PENDING);
  /*dispatch */ offset = DISPATCH_OFFSET (isr);

  gm_assert (NOTICED (RDMAING));
  gm_assert (NOTICED_NOT (SDMAING));

  /* Free last recv chunk DMAd */
  gm_assert (gm_free_recv_chunk_cnt == 0 || gm_free_recv_chunk_cnt == 1);
  wake_host = port->wake_host;
  ++gm_free_recv_chunk_cnt;
  gm.free_recv_chunk_cnt = gm_free_recv_chunk_cnt;
  if (!wake_host)
    {
      ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
      gm_printf_p ("Finished DMAing token for message.\n");
      LOG_DISPATCH (38, "RDMA token done (no wake)");
      GOTO_HANDLER (offset);
    }
  else
    {
      GM_STBAR ();
      wake_port (gm.current_rdma_port);

      ASSERT_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);
      gm_printf_p ("Finished DMAing token for message.\n");
      LOG_DISPATCH (39, "RDMA token done (with wake)");
      GOTO_HANDLER (offset);
    }
}
GM_END_HANDLER;
#endif

#if GM_ENABLE_ETHERNET
MARK_LABEL (L_rdma__maybe_finish_ethernet_rdma,)
{
  gm_ethernet_recv_token_lp_t ert;
  gm_u32_t checksum;

  gm_puts ("gm_rdma: in L_rdma__maybe_finish_ethernet_rdma");
  ert = gm.ethernet.recv.token_slot;

  /* compute the 16-bit 1's complement checksum over the packet so far
     and put the checksum in the 16 lsb's of checksum. */
  checksum = gm.ethernet.recv.checksum;
  checksum += rdma_checksum ();
  checksum += checksum >> 16;

  if (gm.ethernet.recv.remaining_len > 0)
    {
      gm_u32_t pos;
      gm_u32_t dma_len;

      gm_puts ("gm_rdma: have more to DMA");

      /* Need to DMA more for the ethernet packet */

      pos = ++gm.ethernet.recv.scatter_pos;
      gm_assert (pos < GM_MAX_ETHERNET_SCATTER_CNT);
      if (ert->segment[pos].len)
	{
	  gm_assert (ert->segment[pos].ptr);
	  gm_assert (GM_DMA_ALIGNED (ert->segment[pos].ptr));
	  gm_assert (gm.ethernet.recv.remaining_len > 0);
	  dma_len = gm_s32_min (ert->segment[pos].len,
				gm.ethernet.recv.remaining_len);

	  /* Start DMAing next piece of ethernet packet */
	  gm_assert (GM_DMA_ALIGNED (dma_len));
	  gm_assert (dma_len);
	  start_RDMA (gm.ethernet.recv.next_lar,
		      ert->segment[pos].ptr, dma_len);

	  /* Record info for starting next packet */
	  gm.ethernet.recv.next_lar
	    = (char *) gm.ethernet.recv.next_lar + dma_len;
	  gm.ethernet.recv.remaining_len -= dma_len;
	  gm.ethernet.recv.checksum = checksum;

	  /* Wait for DMA engine to be free again. */
	  gm_assert (NOTICED (RDMAING));
	  gm_printf_p ("Finished RDMAing another piece of an ethernet "
		       "packet.\n");
	  DISPATCH (40, "RDMAing an ethernet segment");
	}
      else
	{
#if 0
#warning BAD debugging HACK: This report should be removed.
	  /* gm_report_error (port, GM_NEW_BAD_RECV_TOKEN_EVENT); */
	  gm_putstring ("recv ethernet packet bigger than buffer\n");
#endif
	  gm.ethernet.recv.total_len -= gm.ethernet.recv.remaining_len;
	  gm.ethernet.recv.checksum = checksum;
	  gm_assert (gm.ethernet.recv.total_len > 0);
	}
    }
  gm_puts ("gm_rdma: done DMAing packet.  Now DMAing event.\n");

  /* report the receive in an interrupt. */

  prepare_to_interrupt ("ethernet recv");
  gm.interrupt.ethernet_recv.checksum = checksum;
  gm.interrupt.ethernet_recv.len = gm.ethernet.recv.total_len;
  gm_interrupt (GM_ETHERNET_RECV_INTERRUPT);

  /* Advance the ethernet receive token queue */
  gm_puts ("Advancing the ethernet recv token queue. g23754\n");
  ert->segment[0].len = 0;
  GM_STBAR ();
  gm.ethernet.recv.token_slot = ++ert;
  if (ert >= &gm.ethernet.recv.token[GM_NUM_ETHERNET_RECV_TOKENS])
    {
      gm_puts ("wrapping recv token queue g23754\n");
      gm.ethernet.recv.token_slot = &gm.ethernet.recv.token[0];
    }

  /* Free last recv chunk DMAd */
  gm_assert (gm.free_recv_chunk_cnt == 0 || gm.free_recv_chunk_cnt == 1);
  TOGGLE_STATE (gm.free_recv_chunk_cnt * RDMA_PENDING);
  gm.free_recv_chunk_cnt++;
  NOTICE (FREE_RECV_CHUNK);

  NOTICE_NOT (RDMAING);
  SET_HANDLER (FINISH_RDMA_EVENT, L_rdma__token_done,);	/* restore default */
  gm_printf_p ("Interrupted host with ethernet receive.\n");
  DISPATCH (41, "generated ethernet recv interrupt");
}
GM_END_HANDLER;

#endif /* GM_ENABLE_ETHERNET */
#endif /* LONE == 1 */

/*
  This file uses GM standard indentation.

  Local Variables:
  c-file-style:"gnu"
  c-backslash-column:72
  tab-width:8
  End:
*/
