/*****************************************************************-*-c-*-
 * Myricom GM networking software and documentation                      *
 * Copyright (c) 1996, 1997 by Myricom, Inc.                             *
 * All rights reserved.  See the file `COPYING' for copyright notice.    *
 *************************************************************************/

/* author: glenn@myri.com */

/*
   HACKS:

   The send polling loop changes the type of illegitimate sends to 0
   without advancing to the next element in the send queue for that
   port, hanging the buggy sender.


   "L_sdma__detected_send" acts as a subroutine of the tight pollers
   (L_sdma__poll_for_sdma...) and they all share variables.

 */

/****************
 * GM_SEGMENT_LEN()
 ****************/

/* Macro to compute the length of the next packet segment, assuming
   the rest of the packet does not fit in a single segment.  The idea
   is to send subsequent packets of similar lengths to maximize DMA
   pipelining, rather than sending a tiny tail packet that cannot be
   pipelined. */

#if 1
#define GM_SEGMENT_LEN(r) (GM_MTU - (((-(r))&(GM_MTU-16))>>1))
#else
#warning debug hack
#define GM_SEGMENT_LEN(remaining) (GM_MTU)
#endif

#define GM_MAX_SHORTCUT_MESSAGE_LEN 256

/***********************
** SDMA state machine **
***********************/

#define FINISH_FINISH_SDMA_START(dispatch) do {				\
  finish_finish_sdma_start (sr, c, sp);					\
  SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);		\
  dispatch;								\
} while (0)

#define FINISH_SDMA_START(dispatch) do {				\
  finish_sdma_start (sr, c, sp, LZERO, p__length, st);			\
  SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);		\
  dispatch;								\
} while (0)

#ifndef FINISH_KNOWN_GOOD_SEND
#define FINISH_KNOWN_GOOD_SEND(dispatch) do {				\
  /* Now that we know the send is legitimate, commit to using		\
     the send token by removing the token from free list. */		\
									\
  gm_send_token_commit (port, st);					\
  gm_assert (st->common.next == 0);					\
  append_send_token_to_send_queue (st, sp_id, target_node_id);		\
									\
  /* Advance host send token queue */					\
									\
  NOTICE (SDMA_PENDING);						\
  gm_printf_p ("Queued a new send.\n");					\
  dispatch;								\
} while (0)
#endif /* FINISH_KNOWN_GOOD_SEND */

#ifndef BAD_SEND
#define BAD_SEND(port, event) LOG_DISPATCH (0, "BAD_SEND called");  \
                              handle_bad_send ((port), type, (event), \
					       sizeof (*(event)))
#endif /* BAD_SEND */

/************
 * Polling for SDMAs
 ************/
#if GM_DEBUG && GM_DEBUG_LANAI_STRUCT
#define SAVE_TYPE(type) gm.debug.info[3] = type
#else
#define SAVE_TYPE(type)
#endif

#define ETHERNET_SUBPORT GM_SUBPORT (GM_LOW_PRIORITY, GM_ETHERNET_PORT_ID)

/****************
 * SHORTCUT_IF_POSSIBLE()
 ****************/

/* If all the queues are idle, this macro uses a shortcut to DMA and
   send the packet.  Otherwise, it uses the normal GM state machine
   implementation to queue the send. */

#if GM_ENABLE_SHORTCUT

#define SHORTCUT_IF_POSSIBLE(dispatch) do {				\
  gm_connection_t *c;							\
  gm_send_record_t *sr, *sr__next;					\
  gm_packet_header_t *p;						\
  unsigned int c__route_len, this_node_id;				\
  gm_s32_t rtc, sexno;							\
  gm_s16_t seqno;							\
  gm_subport_t *sp, *sp__next;						\
  void *start_sdma_handler;						\
  gm_u32_t isr;								\
									\
  if (NOTICED (SDMAING							\
	       | RDMAING						\
	       | SDMA_PENDING						\
	       | SENDING						\
	       | SEND_PENDING))						\
    {									\
      break;								\
    }									\
									\
  /********								\
   * Shortcut for the idle case: (HACK)					\
   ********/								\
									\
  log_it (254, __FILE__, __LINE__, 0);                           	\
  start_sdma_handler = GET_HANDLER (START_SDMA_EVENT);			\
  c = &gm.connection[target_node_id];					\
  if (start_sdma_handler						\
      == GM_REFERENCE_LABEL (L_sdma__start_sdma_0))			\
    {									\
      p = &gm.send_chunk[0].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 1);		\
    }									\
  else									\
    {									\
      p = &gm.send_chunk[1].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 0);		\
    }									\
									\
  /****									\
   * Build the packet							\
   ****/								\
									\
  /* start DMA */							\
									\
  gm_assert (send_len);							\
  USER_SDMA_NO_CONTINUATION (message, p+1,				\
			     send_len + GM_DMA_GRANULARITY-1,		\
			     GM_SUBPORT_PORT (sp_id));			\
  sexno = c->send_sexno.whole;						\
  /* pre */ this_node_id = gm.this_node_id;				\
									\
  /* build packet header */						\
									\
  copy_route (c->route, ((char *) p - GM_MAX_NETWORK_DIAMETER));	\
  p->type = GM_PACKET_TYPE;						\
  p->subtype = (GM_RELIABLE_DATA_SUBTYPE_0 + size);			\
  p->target_node_id = target_node_id;					\
  p->sender_node_id = this_node_id;					\
  p->sexno.whole = sexno;						\
  p->length = send_len;							\
  /* pre */ c__route_len = c->route_len;				\
  p->target_subport_id = target_subport_id;				\
  p->sender_subport_id = sp_id;						\
  gm_galvantech_set_header_checksum (p);				\
  GM_GALVANTECH_SET_IP_CHECKSUM (p, st->reliable.ip_checksum);		\
									\
  /****									\
   * enqueue the send token and log the send				\
   ****/								\
									\
  /* commit to using the send token. */					\
									\
  /* pre */ sp = gm.free_subports;					\
  gm_send_token_commit (port, st);					\
  gm_assert (sp);							\
  st->reliable.subport = sp;						\
  st->reliable.send_len = 0;						\
									\
  /* build send record */						\
									\
  sr = gm.free_send_records;						\
  gm_assert (sr); /* since idle */					\
  /* pre */ rtc = RTC;							\
  gm_assert (port->active_subport_cnt == 0);				\
  port->active_subport_cnt = 1;						\
  sr__next = sr->next;							\
  sr->next = 0;								\
  sr->send_token = st;							\
  sr->before_ptr = message;						\
  sr->sexno.whole = sexno;						\
  sr->before_len = send_len;						\
  sr->resend_time = rtc;						\
  gm.free_send_records = sr__next;					\
									\
  /* append the send token to the send queue */				\
									\
  GM_INCR_DEBUG_CNT (gm.sends_in_send_queue_cnt);			\
  gm_assert (!gm.first_active_connection);				\
  gm.first_active_connection = c;					\
									\
  seqno = c->send_sexno.parts.seqno;					\
  gm_assert (c->first_send_record == 0);				\
  c->next_active = c;							\
  c->prev_active = c;							\
  c->send_sexno.parts.seqno = seqno + 1;				\
  c->active_subport_bitmask = 1 << sp_id;				\
  c->first_send_record = sr;						\
  c->last_send_record = sr;						\
  c->first_active_send_port = sp;					\
  c->known_alive_time = rtc;						\
									\
  /* init the subport */						\
									\
  sp__next = sp->next;							\
  sp->next = sp;							\
  sp->prev = sp;							\
  /* post */ gm.free_subports = sp__next;				\
  sp->connection = c;							\
  sp->first_send_token = st;						\
  sp->last_send_token = st;						\
  gm_assert (st->common.next == 0);					\
  sp->delay_until = rtc;						\
  sp->id = sp_id;							\
  sp->disabled = 0;							\
  sp->progress_time = rtc;						\
  incr_subport_cnt ();							\
									\
  /* wait for the small packet DMA to complete */			\
									\
  await_free_DMA_engine ();						\
  isr = get_ISR ();							\
  NOTICE (SEND_PENDING + SDMA_PENDING);					\
  gm_assert (gm.free_send_chunk_cnt == 2);				\
  gm.free_send_chunk_cnt = 1;						\
									\
  /****									\
   * send the packet, or arrange for send				\
   ****/								\
									\
  NOTICE (SEND_PENDING);						\
  if (isr & SEND_INT_BIT)						\
    {									\
      void *start_send_handler, *smp, *smlt;				\
									\
      smp = (char *) p - c__route_len;					\
      smlt = (char *) (p+1) + send_len;					\
      SA = (gm_u32_t) smp;						\
      SMP = smp;							\
      if (c__route_len)							\
	set_SMH (((char *) p) - 1);					\
      set_SMLT (smlt);							\
      start_send_handler = GET_HANDLER (START_SEND_EVENT);		\
      NOTICE (SENDING);							\
      if (start_send_handler						\
	  == GM_REFERENCE_LABEL (L_send__start_sending_chunk_0))	\
	{								\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 1);		\
	}								\
      else								\
	{								\
	  gm_assert							\
            (start_send_handler						\
	     == GM_REFERENCE_LABEL (L_send__start_sending_chunk_1));	\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 0);		\
	}								\
    }									\
  dispatch;								\
} while (0)

#else /* not GM_ENABLE_SHORTCUT */

#define SHORTCUT_IF_POSSIBLE(dispatch)

#endif /* not GM_ENABLE_SHORTCUT */

#if GM_ENABLE_SHORTCUT && GM_ENABLE_DATAGRAMS

#define DATAGRAM_SHORTCUT_IF_POSSIBLE(dispatch) do {			\
  gm_connection_t *c;							\
  gm_packet_header_t *p;						\
  unsigned int c__route_len, this_node_id;				\
  gm_s32_t rtc;								\
  gm_subport_t *sp, *sp__next;						\
  void *start_sdma_handler;						\
  gm_u32_t isr;								\
									\
  if (NOTICED (SDMAING							\
	       | RDMAING						\
	       | SDMA_PENDING						\
	       | SENDING						\
	       | SEND_PENDING))						\
    {									\
      break;								\
    }									\
									\
  /********								\
   * Shortcut for the idle case: (HACK)					\
   ********/								\
									\
  start_sdma_handler = GET_HANDLER (START_SDMA_EVENT);			\
  c = &gm.connection[target_node_id];					\
  if (start_sdma_handler						\
      == GM_REFERENCE_LABEL (L_sdma__start_sdma_0))			\
    {									\
      p = &gm.send_chunk[0].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 1);		\
    }									\
  else									\
    {									\
      p = &gm.send_chunk[1].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 0);		\
    }									\
									\
  /****									\
   * Build the packet							\
   ****/								\
									\
  /* start DMA */							\
									\
  gm_assert (send_len);						\
  USER_SDMA_NO_CONTINUATION (message, p+1,				\
			     send_len + GM_DMA_GRANULARITY-1,		\
			     GM_SUBPORT_PORT (sp_id));			\
  /* pre */ this_node_id = gm.this_node_id;				\
									\
  /* build packet header */						\
									\
  copy_route (c->route, ((char *) p - GM_MAX_NETWORK_DIAMETER));	\
  p->type = GM_PACKET_TYPE;						\
  p->subtype = (GM_DATAGRAM_SUBTYPE_0 + size);				\
  p->target_node_id = target_node_id;					\
  p->sender_node_id = this_node_id;					\
  p->length = send_len;							\
  /* pre */ c__route_len = c->route_len;				\
  p->target_subport_id = target_subport_id;				\
  p->sender_subport_id = sp_id;						\
									\
  /* wait for the small packet DMA to complete */			\
									\
  await_free_DMA_engine ();						\
									\
  /****									\
   * send the packet, or arrange for send				\
   ****/								\
									\
  gm_assert (gm.free_send_chunk_cnt == 2);				\
  gm.free_send_chunk_cnt = 1;						\
  isr = get_ISR ();							\
  NOTICE (SEND_PENDING);						\
  if (isr & SEND_INT_BIT)						\
    {									\
      void *start_send_handler, *smp, *smlt;				\
									\
      smp = (char *) p - c__route_len;					\
      smlt = (char *) (p+1) + send_len;					\
      SA = (gm_u32_t) smp;						\
      SMP = smp;							\
      if (c__route_len)							\
        set_SMH (((char *) p) - 1);                                     \
      set_SMLT (smlt);							\
      start_send_handler = GET_HANDLER (START_SEND_EVENT);		\
      NOTICE (SENDING);							\
      if (start_send_handler						\
	  == GM_REFERENCE_LABEL (L_send__start_sending_chunk_0))	\
	{								\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 1);		\
	}								\
      else								\
	{								\
	  gm_assert (start_send_handler					\
		     == (GM_REFERENCE_LABEL				\
			 (L_send__start_sending_chunk_1)));		\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 0);		\
	}								\
    }									\
									\
  /* report successful send completion */				\
									\
  gm_recycle_first_send_token (port, st);				\
									\
  dispatch;								\
} while (0)

#if GM_ENABLE_PIO_DATAGRAMS

#define PIO_DATAGRAM_SHORTCUT_IF_POSSIBLE(dispatch) do {		\
  gm_connection_t *c;							\
  gm_packet_header_t *p;						\
  unsigned int c__route_len, this_node_id;				\
  gm_s32_t rtc;								\
  gm_subport_t *sp, *sp__next;						\
  void *start_sdma_handler;						\
  gm_u32_t isr;								\
									\
  if (NOTICED (SDMAING							\
	       | RDMAING						\
	       | SDMA_PENDING						\
	       | SENDING						\
	       | SEND_PENDING))						\
    {									\
      break;								\
    }									\
									\
  /********								\
   * Shortcut for the idle case: (HACK)					\
   ********/								\
									\
  start_sdma_handler = GET_HANDLER (START_SDMA_EVENT);			\
  c = &gm.connection[target_node_id];					\
  if (start_sdma_handler						\
      == GM_REFERENCE_LABEL (L_sdma__start_sdma_0))			\
    {									\
      p = &gm.send_chunk[0].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 1);		\
    }									\
  else									\
    {									\
      p = &gm.send_chunk[1].packet.as_gm.header;			\
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, 0);		\
    }									\
									\
  /****									\
   * Build the packet							\
   ****/								\
									\
  /* pre */ this_node_id = gm.this_node_id;				\
									\
  /* build packet header */						\
									\
  copy_route (c->route, ((char *) p - GM_MAX_NETWORK_DIAMETER));	\
  p->type = GM_PACKET_TYPE;						\
  p->subtype = (GM_DATAGRAM_SUBTYPE_0 + size);				\
  p->target_node_id = target_node_id;					\
  p->sender_node_id = this_node_id;					\
  p->length = send_len;							\
  /* pre */ c__route_len = c->route_len;				\
  p->target_subport_id = target_subport_id;				\
  p->sender_subport_id = sp_id;						\
  *(gm_u32_t *)(p+1) = data;						\
									\
  /****									\
   * send the packet, or arrange for send				\
   ****/								\
									\
  gm_assert (gm.free_send_chunk_cnt == 2);				\
  gm.free_send_chunk_cnt = 1;						\
  isr = get_ISR ();							\
  NOTICE (SEND_PENDING);						\
  if (isr & SEND_INT_BIT)						\
    {									\
      void *start_send_handler, *smp, *smlt;				\
									\
      smp = (char *) p - c__route_len;					\
      smlt = (char *) (p+1) + send_len;					\
      SA = (gm_u32_t) smp;						\
      SMP = smp;							\
      set_SMLT (smlt);							\
      start_send_handler = GET_HANDLER (START_SEND_EVENT);		\
      NOTICE (SENDING);							\
      if (start_send_handler						\
	  == GM_REFERENCE_LABEL (L_send__start_sending_chunk_0))	\
	{								\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 1);		\
	}								\
      else								\
	{								\
	  gm_assert (start_send_handler					\
		       == (GM_REFERENCE_LABEL				\
			   (L_send__start_sending_chunk_1)));		\
	  SET_HANDLER (START_SEND_EVENT,				\
		       L_send__start_sending_chunk_, 0);		\
	}								\
    }									\
									\
  /* report successful send completion */				\
									\
  gm_recycle_first_send_token (port, st);				\
									\
  dispatch;								\
} while (0)

#endif /* GM_ENABLE_PIO_DATAGRAMS */
#else /* not (GM_ENABLE_SHORTCUT && GM_ENABLE_DATAGRAMS) */

#define DATAGRAM_SHORTCUT_IF_POSSIBLE(dispatch)
#define PIO_DATAGRAM_SHORTCUT_IF_POSSIBLE(dispatch)

#endif /* not (GM_ENABLE_SHORTCUT && GM_ENABLE_DATAGRAMS) */

/* ******************************* */
/* Poll for a new token from host. */
/* ******************************* */

#if LZERO == 0
_MARK_LABEL (L_sdma__poll_for_sdma_0,)
{
  /* Vars for polling. */

  gm_port_protected_lanai_side_t *port;	/* 1 */
  struct gm_send_queue_slot *sqs;	/* 2 */
  gm_send_token_t *st;
  enum gm_send_event_type type;	/* 3 */

  /****************
   * Poll for a new send
   ****************/

  /* Pipelining HACKery is used here to make this as fast as possible,
     as it is effectively the idle dispatch for GM, and any extra
     overhead here makes all handlers less responsive. */


  {

#if L7 && defined POLL_PENDING && !GM_FUNCTION_BASED_DISPATCH
    gm_port_protected_lanai_side_t *port2;	/* 4 */
    struct gm_send_queue_slot *sqs2;	/* 0 */

    /* poll.port = 5
       poll_handler_offset = 6
       next offset = 8
       type offset = 9
     */


    asm volatile ("ld %5 [%?r11], %1\n\t"	/* port */
		  "and %?r12, 0xffff7fff, %?r12\n\t"
		  "sub %?r11, %?r12, %?r21\n\t"
		  "mov 0x104e, %?r25\n\t"
		  "ld %6 [%1], %2\n\t"	/* hst */
		  "bt 3f\n\t"
		  "ld [0xfffffe50], %?r27\n\t"	/* ISR */
		  "uld.b %9 [%2], %3\n"	/* type */
		  "4:\tmov %?r24, %?pc\n\t"
		  "nop\n\t"
		  "nop\n"
		  "3:\tld %8 [%1], %4\n\t"	/* port */
		  "and %?r27, %?r25, %?r27\n\t"	/* and ISR */
		  "uld.b	[%?r21 sub %?r27], %?r26\n\t"	/* index */
		  "sub.f %3, %?r0, %?r0\n\t"	/* type? */
		  "bne 2f\n\t"	/* poll cont */
		  "st %4, %5 [%?r11]\n\t"	/* next */
		  "ld %?r26 [%?r11], %?r24\n\t"	/* handler */
		  "sub.f %?r26, %7, %?r0\n\t"	/* poll? */
		  "bne 4b\n\t"	/* dispatch */
		  "ld %6 [%4], %2\n\t"	/* hst */
		  "ld [0xfffffe50], %?r27\n\t"	/* ISR */
		  "ld %8 [%4], %1\n\t"	/* port */
		  "uld.b %9 [%2], %3\n\t"	/* type */
		  "and %?r27, %?r25, %?r27\n\t"	/* and ISR */
		  "uld.b	[%?r21 sub %?r27], %?r26\n\t"	/* index */
		  "sub.f %3, %?r0, %?r0\n\t"	/* type? */
		  "bne 1f\n\t"	/* poll cont */
		  "st %1, %5 [%?r11]\n\t"	/* next */
		  "ld %?r26 [%?r11], %?r24\n"	/* handler */
		  "0:\tsub.f %?r26, %7, %?r0\n\t"	/* poll? */
		  "bne 4b\n\t"	/* dispatch */
		  "ld %6 [%1], %2\n\t"	/* hst */
		  "ld [0xfffffe50], %?r27\n\t"	/* ISR */
		  "ld %8 [%1], %4\n\t"	/* port */
		  "uld.b %9 [%2], %3\n\t"	/* type */
		  "and %?r27, %?r25, %?r27\n\t"	/* and ISR */
		  "uld.b	[%?r21 sub %?r27], %?r26\n\t"	/* index */
		  "sub.f %3, %?r0, %?r0\n\t"	/* type? */
		  "bne 2f\n\t"	/* poll cont */
		  "st %4, %5 [%?r11]\n\t"	/* next */
		  "ld %?r26 [%?r11], %?r24\n\t"	/* handler */
		  "sub.f %?r26, %7, %?r0\n\t"	/* poll? */
		  "bne 4b\n\t"	/* dispatch */
		  "ld %6 [%4], %2\n\t"	/* hst */
		  "ld [0xfffffe50], %?r27\n\t"	/* ISR */
		  "ld %8 [%4], %1\n\t"	/* port */
		  "uld.b %9 [%2], %3\n\t"	/* type */
		  "and %?r27, %?r25, %?r27\n\t"	/* and ISR */
		  "uld.b	[%?r21 sub %?r27], %?r26\n\t"	/* index */
		  "sub.f %3, %?r0, %?r0\n\t"	/* type? */
		  "beq 0b\n\t"	/* poll cont */
		  "st %1, %5 [%?r11]\n\t"	/* next */
		  "ld %?r26 [%?r11], %?r24\n"	/* handler */
		  "1:\tmov %4, %1\n"
		  "2:":"=r" (sqs2),
		  "=r" (port),
		  "=r" (sqs),
		  "=r" (type),
		  "=r" (port2):"n" (GM_OFFSETOF
				    (gm_lanai_globals_t, poll.port)),
		  "n" (GM_OFFSETOF
		       (gm_port_protected_lanai_side_t,
			send_token_queue_slot)),
		  "n" (GM_OFFSETOF (gm_lanai_globals_t, handler[POLL_EVENT])),
		  "n" (GM_OFFSETOF
		       (gm_port_protected_lanai_side_t, next_to_poll)),
		  "n" (GM_OFFSETOF (struct gm_send_queue_slot, type)):"r27",
		  "r26", "r25", "r24", "r23", "r22", "r21", "isr", "imr",
		  "memory");

#else /* not L7 */
    gm_port_protected_lanai_side_t *port__next_to_poll;
    gm_u32_t isr;
    int offset;
    gm_handler_t handler;


    /* verify that the host and lanai send token queues are in sync. */
    gm_assert (((gm.poll.port->first_free_send_token
		 - gm.poll.port->_send_tokens)
		== (gm.poll.port->send_token_queue_slot
		    - gm.poll.port->PORT->send_token_queue))
	       || (gm.poll.port->first_free_send_token == 0
		   && (gm.poll.port->send_token_queue_slot
		       == FAKE_SEND_QUEUE_SLOT ())));

    port = gm.poll.port;
    /*dispatch */ isr = get_ISR ();
#ifdef POLL_PENDING
    NOTICE_NOT (POLL_PENDING);
    GM_STBAR ();
#else
    ;
#endif
    sqs = port->send_token_queue_slot;
    /* pre */ port__next_to_poll = port->next_to_poll;
    /*dispatch */ offset = DISPATCH_OFFSET (isr);
    type = sqs->type;
    if (!type)
      {
	gm.poll.port = port__next_to_poll;
	GM_LOG_EVT (offset);
	LOG_DISPATCH (69, 0);	/* don't log polling */
	/* dispatch */ GOTO_HANDLER_AT_OFFSET (offset);
      }
#endif /* not L7 */
  }


  /****************
   * Found a new send
   ****************/

  st = port->first_free_send_token;

  GM_INCR_DEBUG_CNT (gm.queued_send_token_cnt);
  GM_PRINT (GM_DEBUG_SEND_TOKENS, ("poll found send in slot 0x%x.\n",
				   (int) (st - port->_send_tokens)));

  gm_assert (sqs != FAKE_SEND_QUEUE_SLOT ());
  gm_assert (st - &port->_send_tokens[0]
	     == sqs - &port->PORT->send_token_queue[0]);
  sqs->type = GM_NO_SEND_EVENT;
  switch (type)
    {
    case GM_RESUME_SENDING_EVENT:
    case GM_DROP_SENDS_EVENT:
      gm_recycle_first_send_token (port, st);

      /* Reenable the disabled subport */
      {
	gm_connection_t *c;
	gm_s32_t rtc;
	gm_subport_t *sp, *first;
	struct gm_resend_send_event *e;
	unsigned int port__id;
	unsigned int priority;
	unsigned int sp__disabled;
	unsigned int target_node_id;
	unsigned int target_subport_id;

	/* read the event fields. */

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, resend);
	target_subport_id = e->target_subport_id;
	target_node_id = e->target_node_id;

	/* find the subport to reenable */

	if (target_node_id > gm.max_node_id)
	  {
	    gm_report_error (port, GM_NEW_BAD_RESEND_DETECTED_EVENT);
	    DISPATCH (70, "invalid drop or resume (target_node_id too big)");
	  }
	c = &gm_connection[target_node_id];
	first = sp = c->first_active_send_port;
	if (!first)
	  {
	    DISPATCH (71, "nothing to drop or resend");
	  }
	priority = GM_SUBPORT_PRIORITY (target_subport_id);
	port__id = port->id;
	while (sp->id != GM_SUBPORT (priority, port__id))
	  {
	    sp = sp->next;
	    if (sp == first)
	      {
		DISPATCH (72, "no matching subport for send or drop");
	      }
	  }

	/* Verify that the subport needs to be reenabled and reenable it. */

	sp__disabled = sp->disabled;
	sp->disabled = 0;
	rtc = RTC;
	if (!sp__disabled)
	  {
	    gm_report_error (port, GM_NEW_BAD_RESEND_DETECTED_EVENT);
	    DISPATCH (73, "invalid drop or resume (port is enabled)");
	  }
	GM_PRINT (GM_DEBUG_CONNECTIONS, ("enabled subport %d\n", sp->id));
	sp->progress_time = rtc;

	/* If we used a hack to keep the disabled subport around, use
	   another hack to perform the deferred
	   remove_first_send_token_from_send_queue() */

	if (sp->first_send_token == 0)
	  {
	    gm_send_token_t hack;

	    gm_bzero (&hack, sizeof (hack));
	    hack.common.subport = sp;
	    sp->first_send_token = &hack;
	    remove_first_send_token_from_send_queue (&hack, sp->id);
	  }

	/* drop the sends if needed */

	if (type == GM_DROP_SENDS_EVENT)
	  {
	    while ((st = sp->first_send_token) != 0)
	      {
		remove_first_send_token_from_send_queue (st, sp->id);
		pass_sent_token_to_port_and_free
		  (st, port, 0, GM_SEND_DROPPED);
	      }
	  }

	/* done. */
	DISPATCH (74, "drop or resend queued");
      }

      /************
       * Raw sends
       ************/

    case GM_RAW_SEND_EVENT:
      {
	unsigned int target_node_id;
	unsigned int sp_id;
	struct gm_raw_send_send_event *e;

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, raw_send);
	st = port->first_free_send_token;
	if (!st)
	  {
	    gm_report_error (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("out of send tokens.\n");
	    DISPATCH (75, "raw send token violation");
	  }

	st->raw.type = GM_ST_RAW;
	st->raw.orig_ptr = e->message;
	st->raw.send_ptr = 0;	/* HACK: Mark this as a raw send. */
	st->raw.total_length = GM_PACKET_ROUNDUP (u32, e->total_length);
	st->raw.route_length = e->route_length;
	st->raw.size = 16;	/* HACK: Make sure finish_send code
				   doesn't report error. */
	sp_id = GM_SUBPORT (GM_HIGH_PRIORITY, port->id);
	st->raw.target_subport_id = sp_id;	/* HACK for queuing */
	target_node_id = gm.max_node_id;	/* any legit node will do */

	if (!port->privileged
	    || !st->raw.total_length
	    || (st->raw.route_length > st->raw.total_length)
	    || st->raw.total_length > GM_MTU + GM_MAX_NETWORK_DIAMETER
	    || !GM_DMA_ALIGNED (st->raw.orig_ptr + st->raw.total_length))
	  {
	    BAD_SEND (port, e);
	    DISPATCH (76, "bad raw send");
	  }
	/* Verify that this is a legitimate send */
	if (target_node_id > gm.max_node_id
	    || !st->raw.orig_ptr
	    || (GM_ENABLE_MCP_SEND_LENGTH_CHECK
		&& st->raw.total_length >
		GM_MAX_LENGTH_FOR_SIZE (st->raw.size))
	    || st->raw.target_subport_id > GM_MAX_SUBPORT_ID)
	  {
	    BAD_SEND (port, e);
	    DISPATCH (77, "bad raw send (2)");
	  }

	FINISH_KNOWN_GOOD_SEND (DISPATCH (78, "raw send queued"));
      }

#if GM_ENABLE_DIRECTED_SEND
      /************
       * Directed sends
       ************/

      /* Handled here just like normal sends, but storing a
         different type in the receive token, omitting the size, and
         storing the pointer to the remote buffer in the send
         token. */

    case GM_DIRECTED_SEND_EVENT:
      {
	unsigned int target_node_id;
	unsigned int sp_id;
	struct gm_directed_send_send_event *e;

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, directed_send);
	st = port->first_free_send_token;
	if (!st)
	  {
	    gm_report_error (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("no send token for directed send.\n");
	    DISPATCH (79, "send token violation for dirsend");
	  }

	st->directed.type = GM_ST_DIRECTED;
	st->directed.orig_ptr = st->directed.send_ptr = e->source_buffer;
	st->directed.send_len = e->length;
	st->directed.target_subport_id = e->target_subport_id;
	sp_id
	  = GM_SUBPORT (GM_SUBPORT_PRIORITY (st->directed.target_subport_id),
			port->id);
	st->directed.remote_ptr = e->target_buffer;
	target_node_id = e->target_node_id;

	/* Verify that this is a legitimate send */
	if (target_node_id > gm.max_node_id
	    || !st->directed.orig_ptr
	    || !st->directed.send_len
	    || st->directed.target_subport_id > GM_MAX_SUBPORT_ID)
	  {
	    BAD_SEND (port, e);
	    DISPATCH (80, "bad dirsend");
	  }
	FINISH_KNOWN_GOOD_SEND (DISPATCH (81, "illegitimate dirsend"));
      }
#endif

      /************
       * Normal sends
       ************/

    case GM_SEND_EVENT_0...GM_SEND_EVENT_31:
      {
	gm_up_t message;
	gm_u32_t send_len;
	unsigned int size;
	unsigned int target_node_id;
	unsigned int target_subport_id;
	unsigned int sp_id;
	struct gm_send_send_event *e;

	/* load the send info into registers */

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, send);
	st = port->first_free_send_token;
	message = e->message;
	send_len = e->length;
	size = type - GM_SEND_EVENT_0;
	target_node_id = e->target_node_id;
	target_subport_id = e->target_subport_id;
	sp_id =
	  GM_SUBPORT (GM_SUBPORT_PRIORITY (target_subport_id), port->id);

	if (!st)
	  {
	    gm_report_error (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("out of send tokens.\n");
	    DISPATCH (82, "send token violation for reliable send");
	  }

	/* build the send token */

	st->reliable.type = GM_ST_RELIABLE;
	st->reliable.orig_ptr = st->reliable.send_ptr = message;
	st->reliable.send_len = send_len;
	st->reliable.size = size;
	st->reliable.target_subport_id = target_subport_id;
#if GM_ENABLE_GALVANTECH_WORKAROUND
	st->reliable.ip_checksum = e->ip_checksum;
#endif

#if GM_FAST_SMALL_SEND
	if (send_len <= GM_FAST_SEND_LEN)
	  st->reliable.data = ((void *) &e->message - st->reliable.send_len);
	else
	  st->reliable.data = 0;
#endif

	gm_assert (port->id == (unsigned) (port - gm_port));
	/* Verify that this is a legitimate send */
	if (message
	    && send_len
	    && !(GM_ENABLE_MCP_SEND_LENGTH_CHECK
		 && send_len > GM_MAX_LENGTH_FOR_SIZE (size))
	    && target_subport_id <= GM_MAX_SUBPORT_ID
	    && target_node_id <= gm.max_node_id)
	  {
	    GM_INCR_DEBUG_CNT (gm.queued_reliable_send_cnt);
	    if (GM_ENABLE_SHORTCUT && send_len <= GM_MAX_SHORTCUT_MESSAGE_LEN)
	      {
		SHORTCUT_IF_POSSIBLE
		  (DISPATCH (83, "send SDMA started via shortcut"));
	      }
	    FINISH_KNOWN_GOOD_SEND (DISPATCH (84, "send queued normally"));
	  }
	BAD_SEND (port, e);
	DISPATCH (85, "bad send");
      }

      /****************************************
       * Streamlined sends.  (Target port is same as sender port)
       ****************************************/

    case GM_FAST_SEND_EVENT_0...GM_FAST_SEND_HIGH_EVENT_31:
      {
	gm_up_t message;
	gm_u32_t send_len;
	unsigned int size;
	unsigned int target_node_id;
	unsigned int target_subport_id;
	unsigned int sp_id;
	struct gm_fast_send_send_event *e;

	/* load the send info into registers */

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, fast_send);
	st = port->first_free_send_token;
	message = e->message;
	send_len = e->length;
	size = GM_PRIORITY_SIZE__SIZE (type - GM_FAST_SEND_EVENT_0);

	if (st)
	  {
#if GM_ENABLE_GALVANTECH_WORKAROUND
	    st->reliable.ip_checksum = e->ip_checksum;
#endif
	    target_node_id = e->target_node_id;
	    if (message)
	      {
		target_subport_id
		  = sp_id
		  = GM_SUBPORT ((GM_PRIORITY_SIZE__PRIORITY
				 (type - GM_FAST_SEND_EVENT_0)), port->id);
		if (send_len)
		  {
		    st->reliable.type = GM_ST_RELIABLE;
		    if (!GM_ENABLE_MCP_SEND_LENGTH_CHECK
			|| send_len <= GM_MAX_LENGTH_FOR_SIZE (size))
		      {
			st->reliable.orig_ptr = message;
			st->reliable.send_ptr = message;
			if (target_subport_id <= GM_MAX_SUBPORT_ID)
			  {
			    st->reliable.send_len = send_len;
			    st->reliable.size = size;
			    if (target_node_id <= gm.max_node_id)
			      {
				st->reliable.target_subport_id
				  = target_subport_id;
#if GM_FAST_SMALL_SEND
				st->reliable = 0;
#endif
				gm_assert (send_len
					   <= GM_MAX_SHORTCUT_MESSAGE_LEN);
				GM_INCR_DEBUG_CNT
				  (gm.queued_reliable_send_cnt);
				if (GM_ENABLE_SHORTCUT)
				  {
				    gm_assert
				      (send_len
				       <= GM_MAX_SHORTCUT_MESSAGE_LEN);
				    SHORTCUT_IF_POSSIBLE
				      (DISPATCH
				       (108,
					"fast SDMA started via shortcut"));
				  }
				FINISH_KNOWN_GOOD_SEND
				  (DISPATCH (109, "fast send queued"));
			      }
			  }
		      }
		  }
	      }
	    BAD_SEND (port, e);
	    DISPATCH (86, "bad fast send");
	  }
	else
	  {
	    gm_report_error (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("out of send tokens.\n");
	    DISPATCH (87, "send token violation for normal send");
	  }

	/* never get here */
	gm_always_assert (0);

      }

#if GM_ENABLE_ETHERNET
      /****************************************
       * Ethernet sends and broadcasts
       ****************************************/

    case GM_ETHERNET_SEND_EVENT:
    case GM_ETHERNET_MARK_AND_SEND_EVENT:
    case GM_ETHERNET_BROADCAST_EVENT:
    case GM_ETHERNET_MARK_AND_BROADCAST_EVENT:

      {
	struct gm_ethernet_send_event *e;

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, ethernet);

	/* Delay the send if the earlier send is not complete */

	if (gm.ethernet.send.busy)
	  {
	    /* restore the type that was set to NO_EVENT before switch */
	    sqs->type = type;
	    DISPATCH (88, "delay ether send (busy)");
	  }

	if (port->id != GM_ETHERNET_PORT_ID)
	  {
	    BAD_SEND (port, e);
	    DISPATCH (89, "bad ethernet send (not on ethernet port)");
	  }

	if (type == GM_ETHERNET_SEND_EVENT
	    || type == GM_ETHERNET_MARK_AND_SEND_EVENT)
	  {
	    if (type == GM_ETHERNET_SEND_EVENT)
	      {
		gm_puts ("GM_ETHERNET_SEND_EVENT\n");
		LOG_DISPATCH(0,"GM_ETHERNET_SEND_EVENT");
		gm.ethernet.send.mark = 0;
	      }
	    else
	      {
		gm_assert (type == GM_ETHERNET_MARK_AND_SEND_EVENT);
		gm_puts ("GM_ETHERNET_MARK_AND_SEND_EVENT\n");
		LOG_DISPATCH(0,"GM_ETHERNET_MARK_AND_SEND_EVENT");
		gm.ethernet.send.mark = 1;
	      }
	    gm.ethernet.send.token.common.type = GM_ST_ETHERNET_SEND;

	    /* compute the target to send to */
	    gm.ethernet.send.target = e->target_node_id;
	    if (gm.ethernet.send.target > gm.max_node_id)
	      gm.ethernet.send.target = 0;
	  }
	else
	  {
	    /*** Ethernet broadcasts ***/

	    /* Ethernet broadcasts are handled by unicasting to each
	       remote node.  Note that we do not advance the send token
	       queue until the send is done to prevent queuing multiple
	       ethernet sends.  In this way, order is preserved for
	       ethernet sends, regardless of the destination. */

	    if (type == GM_ETHERNET_BROADCAST_EVENT)
	      {
		gm_puts ("GM_ETHERNET_BROADCAST_EVENT\n");
		LOG_DISPATCH(0,"GM_ETHERNET_BROADCAST_EVENT");
		gm.ethernet.send.mark = 0;
	      }
	    else
	      {
		gm_assert (type == GM_ETHERNET_MARK_AND_BROADCAST_EVENT);
		gm_puts ("GM_ETHERNET_MARK_AND_BROADCAST_EVENT\n");
		LOG_DISPATCH(0,"GM_ETHERNET_MARK_AND_BROADCAST_EVENT");
		gm.ethernet.send.mark = 1;
	      }
	    gm.ethernet.send.token.common.type = GM_ST_ETHERNET_BROADCAST;

	    /* Compute first target node ID. */
	    gm.ethernet.send.target = 0;
	    gm_ethernet_compute_next_broadcast_target ();
	  }

	/* Verify legitimacy of the target node. */
	if (!gm.ethernet.send.target)
	  {
	    gm_puts ("gm_sdma: Not sure where to send ethernet packet.\n");

	    /* Report completion of ethernet send or broadcast. */
	    gm_puts ("generating GM_ETHERNET_SENT_INTERRUPT\n");
	    LOG_DISPATCH(0,"generating GM_ETHERNET_SENT_INTERRUPT");
	    prepare_to_interrupt ("ethernet sent shortcut");
	    gm_interrupt (GM_ETHERNET_SENT_INTERRUPT);
	    gm_ethernet_recycle_first_send_token (port, st);

	    /* Advance host send token queue and dispatch. */
	    gm_puts ("gm_sdma: advancing queue.\n");
	    DISPATCH (90, "ethernet queue advancing");
	  }

	/* Copy the gather list while computing the total length of
	   the send. */
	{
	  gm_u32_t i, total_len = 0;

	  gm.ethernet.send.gather_cnt = e->gather_cnt;

	  for (i = 0; i < gm.ethernet.send.gather_cnt; i++)
	    {
	      gm_dp_t ptr;
	      gm_u32_t len;

	      ptr = e->gather_segment[i].ptr;
	      len = e->gather_segment[i].len;

	      gm_assert (len <= GM_ETHERNET_MTU);
#if !GM_EMULATE_BYTE_DMAS
	      gm_assert (GM_DMA_ALIGNED (ptr));
	      gm_assert (GM_DMA_ALIGNED (len));
#endif
	      gm_assert (len);
	      /* OK for host to cross pages if it knows it is safe */
	      gm.ethernet.send.gather_segment[i].ptr = ptr;
	      gm.ethernet.send.gather_segment[i].len = len;
	      total_len += len;
	    }
	  gm.ethernet.send.total_len =
	    total_len + (gm.ethernet.send.mark ? 2 : 0);
	}

	/* Initialize the remaining ethernet send state. */
	gm_assert (gm.ethernet.send.gather_cnt);
	gm_assert (gm.ethernet.send.token.common.sendable);
	gm_assert (gm.ethernet.send.gather_segment[0].ptr);
	gm_assert (gm.ethernet.send.chunk[0].filled == 0);
	gm_assert (gm.ethernet.send.chunk[1].filled == 0);
	gm.ethernet.send.gather_pos = 0;

	/* Queue the send */
	gm_assert (!gm.ethernet.send.busy);
	gm.ethernet.send.busy = 1;
	gm_assert (gm.ethernet.send.token.common.next == 0);
	append_send_token_to_send_queue (((gm_send_token_t *)
					  & gm.ethernet.send.token),
					 ETHERNET_SUBPORT,
					 gm.ethernet.send.target);
	gm_ethernet_recycle_first_send_token (port, st);

	NOTICE (SDMA_PENDING);
	gm_puts ("Queued a new ethernet send or broadcast.\n");
	DISPATCH (91, "queued new ethernet broadcast");
      }
#endif /* GM_ENABLE_ETHERNET */

      /****************************************
       * Alarms
       ****************************************/
    case GM_SET_ALARM_EVENT:
      {
	struct gm_set_alarm_send_event *e;

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, set_alarm);
	gm_recycle_first_send_token (port, st);
	if (port->alarm_set)
	  {
	    /* User violated token-regulation protocol. */

	    gm_report_error (port, GM_NEW_ALARM_VIOLATION_EVENT);
	    DISPATCH (92, "alarm violation");
	  }
	gm_lanai_set_alarm (port, e->usecs);
	DISPATCH (93, "alarm set");
      }

    case GM_FLUSH_ALARM_EVENT:
      gm_recycle_first_send_token (port, st);
      if (port->alarm_set)
	{
	  gm_unset_alarm (port);
	  port->alarm_set = 0;
	  gm_report (port, _GM_NEW_FLUSHED_ALARM_EVENT);
	}
      DISPATCH (94, "alarm flushed");

    case GM_SLEEP_RQST_EVENT:
      /* Cause the host to receive a _GM_SLEEP_EVENT. */
      gm_recycle_first_send_token (port, st);
      gm_report (port, _GM_NEW_SLEEP_EVENT);
      /* Record that we should wake the host on the next recv event. */
      port->wake_host = -1;
      DISPATCH (95, "processed sleep request");

#if GM_ENABLE_TRACE
    case GM_TRACE_START_EVENT:
      G_TRACEPTR = (unsigned) gtracebuf;
      gtracebuf[GM_LANAI_NUMTRACE - 1].evt = -1;
      gm_recycle_first_send_token (port, st);
      DISPATCH (96, "trace started");

    case GM_TRACE_STOP_EVENT:
      if (gtracebuf[GM_LANAI_NUMTRACE - 1].evt != -1)
	{
	  /* overrun occured */
	  int index = (gm_l_trace_t *) G_TRACEPTR - gtracebuf;
	  gm.trace_index = GM_LANAI_NUMTRACE;
	  gm_bcopy (gtracebuf + index, gm.trace_log,
		    sizeof (gtracebuf[0]) * (GM_LANAI_NUMTRACE - index));
	  gm_bcopy (gtracebuf, gm.trace_log + (GM_LANAI_NUMTRACE - index),
		    sizeof (gtracebuf[0]) * index);
	}
      else
	{
	  gm.trace_index = (gm_l_trace_t *) G_TRACEPTR - gtracebuf;
	  gm_bcopy (gtracebuf, gm.trace_log,
		    sizeof (gtracebuf[0]) * gm.trace_index);
	}
      gm_recycle_first_send_token (port, st);
      DISPATCH (97, "trace stopped");
#endif

#if GM_ENABLE_DATAGRAMS
      /****************
       * datagram sends
       ****************/

    case GM_DATAGRAM_SEND_EVENT_0...GM_DATAGRAM_SEND_EVENT_31:
      {
	gm_send_token_t *st;
	gm_up_t message;
	gm_u32_t send_len;
	unsigned int size;
	unsigned int target_node_id;
	unsigned int target_subport_id;
	unsigned int sp_id;
	struct gm_send_send_event *e;

	/* load the send info into registers */

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, send);
	st = port->first_free_send_token;
	message = e->message;
	send_len = e->length;
	size = type - GM_DATAGRAM_SEND_EVENT_0;
	target_node_id = e->target_node_id;
	target_subport_id = e->target_subport_id;
	sp_id =
	  GM_SUBPORT (GM_SUBPORT_PRIORITY (target_subport_id), port->id);

	if (!st)
	  {
	    gm_report (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("out of send tokens.\n");
	    DISPATCH (98, "send token violation for datagram send");
	  }

	/* build the send token */

	st->datagram.type = GM_ST_DATAGRAM;
	st->datagram.send_ptr = message;
	st->datagram.send_len = send_len;
	st->datagram.size = size;
	st->datagram.target_subport_id = target_subport_id;

	gm_assert (port->id == port - gm_port);
	/* Verify that this is a legitimate send */
	if (message
	    && send_len
	    && !(GM_ENABLE_MCP_SEND_LENGTH_CHECK
		 && send_len > GM_MAX_LENGTH_FOR_SIZE (size))
	    && send_len <= GM_MTU
	    && target_subport_id <= GM_MAX_SUBPORT_ID
	    && target_node_id <= gm.max_node_id)
	  {
	    GM_INCR_DEBUG_CNT (gm.queued_datagram_send_cnt);
	    if (GM_ENABLE_SHORTCUT && send_len <= GM_MAX_SHORTCUT_MESSAGE_LEN)
	      {
		DATAGRAM_SHORTCUT_IF_POSSIBLE
		  (DISPATCH (99, "datagram SDMA started via shortcut"));
	      }
	    FINISH_KNOWN_GOOD_SEND (DISPATCH (100, "datagram queued"));
	  }
	BAD_SEND (port, e);
	DISPATCH (101, "bad datagram send");
      }

#if GM_ENABLE_PIO_DATAGRAMS && GM_ENABLE_PIO_SENDS
      /****************
       * datagram sends
       ****************/

    case GM_PIO_DATAGRAM_4_SEND_EVENT:
      {
	gm_send_token_t *st;
	gm_u32_t data;
	const gm_u32_t send_len = 4;
#warning size hack
	const unsigned int size = 12;
	unsigned int target_node_id;
	unsigned int target_subport_id;
	unsigned int sp_id;
	struct gm_pio_datagram_send_event *e;

	/* load the send info into registers */

	e = GM_SEND_QUEUE_SLOT_EVENT (sqs, pio_datagram);
	st = port->first_free_send_token;
	target_node_id = e->target_node_id;
	target_subport_id = e->target_subport_id;
	data = e->data;
	sp_id =
	  GM_SUBPORT (GM_SUBPORT_PRIORITY (target_subport_id), port->id);

	if (!st)
	  {
	    gm_report (port, GM_NEW_SEND_TOKEN_VIOLATION_EVENT);
	    gm_printf_p ("out of send tokens.\n");
	    DISPATCH (102, "send token violation for PIO datagram");
	  }

	/* build the send token */

	st->pio_datagram.type = GM_ST_DATAGRAM;
	st->pio_datagram.send_len = send_len;
	st->pio_datagram.size = size;
	st->pio_datagram.target_subport_id = target_subport_id;
	st->pio_datagram.data[0] = data;

	gm_assert (port->id == port - gm_port);
	/* Verify that this is a legitimate send */
	if (!(GM_ENABLE_MCP_SEND_LENGTH_CHECK
	      && send_len > GM_MAX_LENGTH_FOR_SIZE (size))
	    && target_subport_id <= GM_MAX_SUBPORT_ID
	    && target_node_id <= gm.max_node_id)
	  {
	    GM_INCR_DEBUG_CNT (gm.queued_datagram_sends);
	    if (GM_ENABLE_SHORTCUT && send_len <= GM_MAX_SHORTCUT_MESSAGE_LEN)
	      {
		PIO_DATAGRAM_SHORTCUT_IF_POSSIBLE
		  (DISPATCH (103, "datagram SDMA started via shortcut"));
	      }
	    FINISH_KNOWN_GOOD_SEND (DISPATCH (104, "datagram send enqueued"));
	  }
	BAD_SEND (port, e);
	DISPATCH (105, "bad PIO datagram send");
      }
#endif /* GM_ENABLE_PIO_SENDS */
#endif /* GM_ENABLE_DATAGRAMS */

    default:
      BAD_SEND (port, sqs);
      DISPATCH (106, "unrecognized send event");
    }

  gm_always_assert (0);
}
GM_END_HANDLER;
#endif /* LZERO == 0 */

/************
 * SDMAing
 ************/

MARK_LABEL (L_sdma__start_sdma_, LZERO)
{
  gm_packet_header_t *p;

  gm_send_record_t *sr;
  gm_send_token_t *st;
  gm_subport_t *sp;
  gm_connection_t *c;
  unsigned int port_id;
  gm_s32_t rtc;
  gm_s32_t sr__resend_time;
  gm_u32_t seqno;

  gm_assert (gm.free_send_chunk_cnt > 0);
  gm_assert (NOTICED (SDMA_PENDING));

  c = gm.first_active_connection;
  /* prefetch */ rtc = RTC;
  gm_assert (c);
  gm_assert (c >= gm.connection);
  gm_assert (c <= gm.connection + gm.max_node_id);
#ifdef POLL_PENDING
  NOTICE (POLL_PENDING);
#else
  ;
#endif

  sp = c->first_active_send_port;
  gm_assert (sp);
  gm_assert (sp->connection == c);
  gm_assert (sp->next);
  gm_assert (sp->prev);
  /* prefetch */ sr = c->first_send_record;
  gm_assert (sr == 0
	     || (sr >= gm._send_record
		 && sr < gm._send_record + GM_NUM_SEND_RECORDS));

  {
    gm_s32_t sp__delay_until;
    gm_u32_t sp__disabled;

    sp__disabled = sp->disabled;
    sp__delay_until = sp->delay_until;

    /* prefetch */ st = sp->first_send_token;
    gm_assert (sp__disabled || st);
    gm_assert (sp__disabled || (st->common.subport == sp));

    /* prefetch (OK if sr is 0) */ sr__resend_time = sr->resend_time;

    /* If subport is disabled, don't send anything for it. */

    if (sp__disabled || (SEQ_CMP32 (rtc, sp__delay_until) < 0))
      {
	/* Rotate the send list to allow the other subports to send. */
	c->first_active_send_port = sp->next;
	gm.first_active_connection = c->next_active;

	gm_printf_p ("Subport temporarily disabled  disabled=%d "
		     "(0x%x,0x%x) %d\n",
		     sp__disabled, rtc, sp__delay_until,
		     (SEQ_CMP32 (rtc, sp__delay_until) < 0));
	DISPATCH (107, "send subport is disabled");
      }
  }

  /************
   * Resending (if needed)
   ************/

  /* If no ack has been received in too long, resend a message for the
     connection. */


  if (sr && (SEQ_CMP32 (rtc, sr__resend_time + GM_SEND_TIMEOUT) > 0))
    {
      if (SEQ_CMP32 (rtc, c->known_alive_time + GM_CONNECTION_TIMEOUT) > 0)
	{
	  /* There has been a catastrophic connection timeout.  We
	     clean up and close the subports and report errors for any
	     open subport, as well as resetting the connection. */

	  unsigned int cnt = 0;
	  gm_subport_t *first_subport;

	  rewind_connection (c);

	  /* count the number of subports that are affected by
	     the connection outage. */

	  first_subport = sp;
	  do
	    cnt++;
	  while ((sp = sp->next) != first_subport);

	  /* Report the connection error on all subports, except
	     those that are disabled. */

	  while (cnt--)
	    {
	      gm_subport_t *next;

	      next = sp->next;
	      if (!sp->disabled)
		{
		  if (1)
		    {
		      printf ("rtc = 0x%x  known_alive = 0x%x"
				 " sr_resend = 0x%x\n", rtc,
				 c->known_alive_time, sr__resend_time);
		      fflush (stdout);
		    }
		  handle_send_error (sp, GM_SEND_TARGET_NODE_UNREACHABLE,
					(int) (c - gm.connection));
		}
	      sp = next;
	    }
	  gm_connection_reset (c);
	  /* Don't repeat this error for a while */
	  c->known_alive_time = rtc;
	  DISPATCH (108, "connection timeout");
	}
      else
	{
	  /* Reconstruct and resend the message */

	  gm_lp_t lar;

	  st = sr->send_token;
	  gm_assert (st);
	  sp = st->ackable.subport;
	  gm_assert (sp);
	  c = sp->connection;
	  gm_assert (c);

	  p = &gm.send_chunk[LZERO].packet.as_gm.header;
	  p->type = GM_PACKET_TYPE;
	  /* p->subtype = see below; */
	  p->sexno.whole = sr->sexno.whole;
	  /* p->length = see below; */
	  p->target_subport_id = st->ackable.target_subport_id;
	  p->target_node_id = c->ack_packet.target_node_id;
	  p->sender_node_id = gm.this_node_id;
	  p->sender_subport_id = sp->id;
	  port_id = GM_SUBPORT_PORT (sp->id);
	  switch (st->ackable.type)
	    {
	    default:
	      gm_printf ("%s:%d bad type st->ackable.type = %d (0x%x)\n",
			 __FILE__, __LINE__, st->ackable.type,
			 st->ackable.type);
	      fflush (stdout);
	      gm_always_assert (0);
#if GM_ENABLE_DIRECTED_SEND
	    case GM_ST_DIRECTED:
	      {
		/* For directed sends, we align the data within the
		   payload of the packet such that it is properly
		   aligned for a DMA of granularity
		   GM_MAX_DMA_GRANULARITY on the receiver. */

		gm_directed_packet_t *dp;
		gm_up_t source, orig_source;
		gm_remote_ptr_t remote_ptr;
		gm_u32_t remaining_len;
		struct gm_st_directed *std;
		unsigned int p__length;
		unsigned int target_subport_id;
		unsigned int data_len;
		unsigned int wasted_len;

		std = &st->directed;
		source = sr->before_ptr;
		orig_source = std->orig_ptr;
		remaining_len = sr->before_len;
		target_subport_id = std->target_subport_id;
		remote_ptr = std->remote_ptr + (source - orig_source);

		p->target_subport_id = target_subport_id;
		dp = (gm_directed_packet_t *) p;
		dp->target_addr = remote_ptr;
		wasted_len = (remote_ptr & (GM_MAX_DMA_GRANULARITY - 1));
		lar = dp->payload + wasted_len;

		if (source == orig_source)
		  {
		    /* This is the first segment for the message. */
		    if (remaining_len <= GM_MTU)
		      {
			/* only segment for message */
			data_len = remaining_len;
			p->subtype = GM_DIRECTED_DATA_SUBTYPE;
		      }
		    else
		      {
			/* first segment of multi-segment message */
			data_len =
			  (GM_SEGMENT_LEN (wasted_len + remaining_len) -
			   wasted_len);
			p->subtype = GM_DIRECTED_HEAD_DATA_SUBTYPE;
		      }
		  }
		else
		  {
		    /* not the first segment */
		    gm_assert (GM_DMA_ALIGNED (wasted_len /* remote_ptr */ ));
		    if (remaining_len <= GM_MTU)
		      {
			/* last segment of message */
			data_len = remaining_len;
			p->subtype = GM_DIRECTED_TAIL_DATA_SUBTYPE;
		      }
		    else
		      {
			/* body segment of message */
			data_len =
			  (GM_SEGMENT_LEN (wasted_len + remaining_len) -
			   wasted_len);
			p->subtype = GM_DIRECTED_BODY_DATA_SUBTYPE;
		      }
		  }
		gm_galvantech_set_header_checksum (p);

		p__length = (((char *) lar + data_len)
			     - (char *) GM_HEADER_PAYLOAD (p));
		p->length = p__length;
		gm_galvantech_set_header_checksum (p);

		if (GM_DMA_GRANULARITY == 1)
		  {
		    /* This node supports byte DMA.  No need to shift
		       header. */

		    USER_SDMA (source, lar, data_len, port_id);
		    copy_route (c->route, gm.send_chunk[LZERO].route);
		    gm.send_chunk[LZERO].smp
		      = (gm.send_chunk[LZERO].packet.as_bytes - c->route_len);
#if GM_ENABLE_CRC32
		    gm.send_chunk[LZERO].smh
		      =
		      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes - 1) : 0;
#endif
		    gm.send_chunk[LZERO].smlt
		      = gm.send_chunk[LZERO].packet.as_gm.payload + p__length;
		  }
		else if (0 == (wasted_len	/* (remote_ptr - source) */
			       & (GM_DMA_GRANULARITY - 1)))
		  {
		    /* Got lucky: Don't need to shift header despite
		       chunky DMA. */

		    USER_SDMA (source, lar,
			       data_len + 2 * (GM_DMA_GRANULARITY - 1),
			       port_id);
		    copy_route (c->route, gm.send_chunk[LZERO].route);
		    gm.send_chunk[LZERO].smp
		      = (gm.send_chunk[LZERO].packet.as_bytes - c->route_len);
#if GM_ENABLE_CRC32
		    gm.send_chunk[LZERO].smh
		      =
		      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes -
				      1) : 0;
#endif
		    gm.send_chunk[LZERO].smlt
		      = gm.send_chunk[LZERO].packet.as_gm.payload + p__length;
		  }
		else
		  {
		    int shift;

		    /* need to shift header */

		    /* Start the DMA and make sure the firsst
		       GM_DMA_GRANULARITY bytes have been transferred
		       before continuing. */
		    {
		      gm_up_t aligned_source;
		      gm_lp_t aligned_lar;
		      aligned_source = GM_DMA_ALIGN (up, source);
		      aligned_lar = GM_DMA_ALIGN (lp, lar);
		      if (data_len <= 8 * GM_MAX_DMA_GRANULARITY)
			{
			  /* small, so block during the whole DMA */
			  USER_SDMA (aligned_source,
				     (char *) aligned_lar +
				     GM_DMA_GRANULARITY,
				     data_len + 2 * (GM_DMA_GRANULARITY - 1),
				     port_id);
			  await_free_DMA_engine ();
			}
		      else
			{
			  /* large, so DMA the first tiny part, then
			     block, then start the rest of the DMA. */
			  USER_SDMA (aligned_source,
				     (char *) aligned_lar +
				     GM_DMA_GRANULARITY, GM_DMA_GRANULARITY,
				     port_id);
			  await_free_DMA_engine ();
			  USER_SDMA (aligned_source + GM_DMA_GRANULARITY,
				     ((char *) aligned_lar
				      + 2 * GM_DMA_GRANULARITY),
				     data_len + GM_DMA_GRANULARITY - 2,
				     port_id);
			}
		    }
		    copy_route (c->route, gm.send_chunk[LZERO].route);
		    shift = (GM_DMA_GRANULARITY
			     + ((long) source & (GM_DMA_GRANULARITY - 1))
			     - (wasted_len & (GM_DMA_GRANULARITY - 1)));
		    shift_directed_header
		      ((gm_u32_t *) gm.send_chunk[LZERO].route, shift);
		    gm.send_chunk[LZERO].smp
		      = (gm.send_chunk[LZERO].packet.as_bytes
			 - c->route_len + shift);
#if GM_ENABLE_CRC32
		    gm.send_chunk[LZERO].smh
		      =
		      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes -
				      1) : 0;
#endif
		    gm.send_chunk[LZERO].smlt
		      = (gm.send_chunk[LZERO].packet.as_gm.payload
			 + p__length + shift);
		  }
		sr->before_ptr = source;
		sr->before_len = remaining_len;
	      }
	      /* WARNING: Don't dereference "p" after the header shift
	         above. */
	      {
		gm_connection_t *c__next_active;
		gm_subport_t *sp__next;

		/* pre */ rtc = RTC;
		/* pre */ sp__next = sp->next;
		/* pre */ c__next_active = c->next_active;
		sr->resend_time = rtc;

		/* Rotate the send list to be fair */

		gm_assert (sp__next);
		c->first_active_send_port = sp__next;
		gm_assert (c__next_active);
		gm.first_active_connection = c__next_active;

		GM_INCR_ERROR_CNT (gm.resend_cnt);

		NOTICE (SDMAING);
		SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
		gm_printf_p ("Resent a packet.\n");
		DISPATCH (109, "re SDMA'ed a dirsend packet");
	      }
#endif /* GM_ENABLE_DIRECTED_SEND */

	    case GM_ST_RELIABLE:
#if GM_FAST_SMALL_SEND
	      if (st->reliable.data)
		{
		  int i;
		  int *dest =
		    (int *) &gm.send_chunk[LZERO].packet.as_gm.payload;
		  for (i = 0; i < sr->before_len / sizeof (int); i++)
		    dest[i] = ((int *) st->reliable.data)[i];
		  p->length = sr->before_len;
		  p->subtype = (GM_RELIABLE_DATA_SUBTYPE_0
				+ st->reliable.size);
		  gm_galvantech_set_header_checksum (p);
		  gm.send_chunk[LZERO].smlt
		    = gm.send_chunk[LZERO].packet.as_gm.payload + p->length;
		  /* as we do not really start a SDMA, the state-machine
		     will behave as it it finish instantaneously (the
		     DMA_INT_BIT stays high) */
		  /* jump to set route, do no start a real SDMA */
		  break;
		}
#endif
	      lar = gm.send_chunk[LZERO].packet.as_gm.payload;
	      if (sr->before_ptr == st->reliable.orig_ptr)
		{
		  /* This is the first segment for the message. */
		  if (sr->before_len <= GM_MTU)
		    {
		      /* This is a monolithic message */
		      p->length = sr->before_len;
		      p->subtype = (GM_RELIABLE_DATA_SUBTYPE_0
				    + st->reliable.size);
		      USER_SDMA (sr->before_ptr, lar, sr->before_len,
				 port_id);
		      GM_GALVANTECH_SET_IP_CHECKSUM (p,
						     st->reliable.
						     ip_checksum);
		    }
		  else
		    {
		      unsigned long p__length;

		      /* This is a multi-segment message. */
		      gm_assert (st->reliable.size >= 13);
		      p__length = GM_SEGMENT_LEN (sr->before_len);
		      p->length = p__length;
		      p->subtype
			=
			GM_RELIABLE_HEAD_DATA_SUBTYPE_13 +
			(st->reliable.size - 13);
		      USER_SDMA (sr->before_ptr, lar, p__length, port_id);
		    }
		}
	      else
		{
		  /* This is not the first segment for the message. */
		  if (sr->before_len <= GM_MTU)
		    {
		      /* This is the last segment for a multi-segment
		         message. */
		      p->length = sr->before_len;
		      p->subtype = GM_RELIABLE_TAIL_DATA_SUBTYPE;
		      USER_SDMA (sr->before_ptr, lar, sr->before_len,
				 port_id);
		      GM_GALVANTECH_SET_IP_CHECKSUM (p,
						     st->reliable.
						     ip_checksum);
		    }
		  else
		    {
		      unsigned long p__length;

		      /* This is not the last segment for the message. */
		      p__length = GM_SEGMENT_LEN (sr->before_len);
		      p->length = p__length;
		      p->subtype = GM_RELIABLE_BODY_DATA_SUBTYPE;
		      USER_SDMA (sr->before_ptr, lar, p__length, port_id);
		    }
		}
	      gm_galvantech_set_header_checksum (p);
	      gm.send_chunk[LZERO].smlt
		= gm.send_chunk[LZERO].packet.as_gm.payload + p->length;
	    }

	  /* Tell the send state machine what to send. */
	  gm.send_chunk[LZERO].smp = (gm.send_chunk[LZERO].packet.as_bytes
				      - c->route_len);
#if GM_ENABLE_CRC32
	  gm.send_chunk[LZERO].smh =
	    c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes - 1) : 0;
#endif

	  /* Set the route. */

	  copy_route (c->route, gm.send_chunk[LZERO].route);

	  /* Record the time of the resend */

	  sr->resend_time = RTC;

	  /* Rotate the send list to be fair */

	  gm_assert (sp->next);
	  c->first_active_send_port = sp->next;
	  gm_assert (c->next_active);
	  gm.first_active_connection = c->next_active;

	  GM_INCR_ERROR_CNT (gm.resend_cnt);

	  NOTICE (SDMAING);
	  SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	  gm_printf_p ("Resent a packet.\n");
	  DISPATCH (110, "re SDMA'ed a reliable packet");
	}
    }

  /************
   * Sending
   ************/

  /* Find first send token with data to send. */

  while (!st->common.sendable)
    {
      gm_assert (st->common.subport == sp);
      st = st->common.next;
      if (!st)
	{
	  /* Nothing to send for this subport. */

	  /* Rotate the send list to allow the other subports to send. */
	  c->first_active_send_port = sp->next;
	  gm.first_active_connection = c->next_active;

	  gm_printf_p ("Subport (0x%x) Awaiting ack.\n", sp->id);
	  DISPATCH (111, "awaiting ack for subport");
	}
    }

  /* ST is now the token for which we */
  /* should send some data. */

  {
    /* temporary variables to allow pipelining */

    gm_u32_t sexno;
    gm_u32_t target_node_id;
    gm_u32_t this_node_id;
    gm_u32_t subport_id;

    /* preallocate a send record. */
    sr = gm.free_send_records;
    /* Build segment header */
    p = &gm.send_chunk[LZERO].packet.as_gm.header;
    p->type = GM_PACKET_TYPE;

    /* batch reads */

    sexno = c->send_sexno.whole;
    seqno = c->send_sexno.parts.seqno;
    target_node_id = c->ack_packet.target_node_id;
    this_node_id = gm.this_node_id;
    subport_id = sp->id;

    if (!sr)
      {
	/* ??? Should we resend something instead? */
	gm_printf_p ("Aborted new send for want of a send record.\n");
	DISPATCH (112, "out of send records");
      }
    /* batch writes */

    sr->sexno.whole = sexno;
    p->sexno.whole = sexno;
    /* p->length = don't care; */
    p->target_node_id = target_node_id;
    p->sender_node_id = this_node_id;
    p->sender_subport_id = subport_id;
    port_id = GM_SUBPORT_PORT (subport_id);

    gm_printf_p ("starting send (%d, %d).\n",
		 c->send_sexno.parts.sesno, c->send_sexno.parts.seqno);
  }

  switch (st->common.type)
    {
#if GM_ENABLE_ETHERNET
      /************
       * Ethernet
       ************/

    case GM_ST_ETHERNET_SEND:
    case GM_ST_ETHERNET_BROADCAST:
      /* If the ethernet packet needs to be DMAd, start the DMA and
         arrange for the FINISH_SDMA handler to complete the DMA and
         mark the send as pending.  Otherwise, simply mark the send as
         pending. */

      if (gm.ethernet.send.chunk[LZERO].filled)
	{
	  /* No need to DMA the packet.  Just record that it is ready to send
	     and reserved the send chunk. */

	  NOTICE (SEND_PENDING);
	  if (!--gm.free_send_chunk_cnt)
	    NOTICE_NOT (FREE_SEND_CHUNK);
	}
      else
	{
	  gm_puts ("Gathering chunks for an ethernet send.\n");
	  LOG_DISPATCH(0,"Gathering chunks for an ethernet send");
	  gm_assert (gm.ethernet.send.gather_segment[0].ptr);
	  gm_assert (gm.ethernet.send.gather_segment[0].len);
#if !GM_EMULATE_BYTE_DMAS
	  gm_assert (GM_DMA_ALIGNED (gm.ethernet.send.gather_segment[0].ptr));
	  gm_assert (GM_DMA_ALIGNED (gm.ethernet.send.gather_segment[0].len));
#endif

	  gm_puts ("gm_sdma: setting up a DMA.\n");

	  /* Setup DMA first */
#if GM_EMULATE_BYTE_DMAS && (GM_DMA_GRANULARITY > 1)
	  gm_puts ("GM_EMULATE_BYTE_DMAS: 1 int\n");

	  {
	    gm_dp_t src;
	    gm_lp_t dest;
	    gm_u32_t len;

	    if (!gm.ethernet.send.mark)
	      {
		dest = &gm.ethernet.send.chunk[LZERO].packet.as_ethernet;
		src = gm.ethernet.send.gather_segment[0].ptr;
		len = gm.ethernet.send.gather_segment[0].len;

		gm_assert (len);

		start_SDMA (GM_ALIGN (dp, src, 4),
			    GM_ALIGN (lp, dest, 4),
			    GM_ROUNDUP (u32, len + ((long) src & 3), 4),
			    __LINE__);

		/* update byte DMA emulation state */

		/* gm.ethernet.send.byte_dma_emul.saved_word = don't care */
		gm.ethernet.send.next_lar = dest;
		gm.ethernet.send.byte_dma_emul.shift = -(long) src & 3;
		gm.ethernet.send.byte_dma_emul.restore = 0;
	      }
	    else
	      {
		dest = &gm.ethernet.send.chunk[LZERO].packet.as_bytes[2];
		src = gm.ethernet.send.gather_segment[0].ptr;
		len = gm.ethernet.send.gather_segment[0].len;

		gm_assert (len);

		start_SDMA (GM_ALIGN (dp, src, 4),
			    GM_ALIGN (lp, dest, 4),
			    GM_ROUNDUP (u32, len + ((long) src & 3), 4),
			    __LINE__);

		/* update byte DMA emulation state */

		gm.ethernet.send.byte_dma_emul.saved_word
		  = GM_ETHERNET_PACKET_TYPE << 16;
		gm.ethernet.send.next_lar = dest;
		gm.ethernet.send.byte_dma_emul.shift = 2 - ((long) src & 3);
		gm.ethernet.send.byte_dma_emul.restore = 2;
	      }
	  }
	  gm_puts ("GM_EMULATE_BYTE_DMAS: 1 out\n");

#else /* !EMULATE !! GRANULARITY==1 */

	  gm_assert (gm.ethernet.send.gather_segment[0].len);
#if GM_ENABLE_CRC32
	  {
	    gm_dp_t src;
	    gm_lp_t dest;
	    gm_u32_t len;

	    src = gm.ethernet.send.gather_segment[0].ptr;
	    dest = &gm.ethernet.send.chunk[LZERO].packet.as_ethernet;
	    len = gm.ethernet.send.gather_segment[0].len;
	    if (gm.ethernet.send.mark)
	      {
		LOG_DISPATCH(0,"ethernet crc32 with MARK");
		dest = dest + 2;
	      }
	    else 
	      {
		LOG_DISPATCH(0,"ethernet crc32 with no mark");
	      }

	    LOG_DISPATCH(0,"ethernet crc32 - start_SDMA");
	    start_SDMA (src, dest, len, __LINE__);

	    /* Record where to start DMAing the next packet. */
	    gm.ethernet.send.next_lar = (void *) (dest + len);

	    *(gm_u16_t *) gm.ethernet.send.chunk[LZERO].packet.as_bytes =
	      GM_ETHERNET_PACKET_TYPE;
	  }
#else
	  start_SDMA (gm.ethernet.send.gather_segment[0].ptr,
		      &gm.ethernet.send.chunk[LZERO].packet.as_ethernet,
		      gm.ethernet.send.gather_segment[0].len,
		      __LINE__);

	  /* Record where to start DMAing the next packet. */
	  gm.ethernet.send.next_lar
	    = (gm.ethernet.send.chunk[LZERO].packet.as_bytes
	       + gm.ethernet.send.gather_segment[0].len);
#endif /* GM_ENABLE_CRC32 */

#endif

	  NOTICE (SDMAING);
	  SET_HANDLER (FINISH_SDMA_EVENT, L_sdma__finish_ethernet_sdma,);
	}

#if GM_EMULATE_BYTE_DMAS && (GM_DMA_GRANULARITY > 1)
      gm_puts ("EMULATE: copy the route for the send into the send chunk.\n");
      gm_ethernet_copy_route_from_connection
	(c, gm.ethernet.send.chunk[LZERO].route, 0);

      /* Tell the send state machine what to send. */
      gm.send_chunk[LZERO].smp
	= &gm.ethernet.send.chunk[LZERO].route[GM_MAX_NETWORK_DIAMETER
					       - c->route_len];
      gm.send_chunk[LZERO].smlt
	= GM_PACKET_ROUNDUP (lp,
			     gm.ethernet.send.chunk[LZERO].packet.as_bytes
			     + gm.ethernet.send.total_len
			     + (gm.ethernet.send.mark ? 2 : 0));
#else
      gm_puts ("copy the route for the send into the send chunk.\n");
      gm_ethernet_copy_route_from_connection
	(c, gm.ethernet.send.chunk[LZERO].route, GM_ENABLE_CRC32?0:gm.ethernet.send.mark);

#if GM_ENABLE_CRC32
      /* Tell the send state machine what to send. */
      gm.send_chunk[LZERO].smp
	= (&gm.ethernet.send.chunk[LZERO].route[GM_MAX_NETWORK_DIAMETER
						- c->route_len]);

      gm.send_chunk[LZERO].smh
	=
	c->route_len ? gm.ethernet.send.chunk[LZERO].packet.as_bytes - 1 : 0;

      gm.send_chunk[LZERO].smlt
	= gm.ethernet.send.chunk[LZERO].packet.as_bytes
	+ gm.ethernet.send.total_len;
#else

      /* Tell the send state machine what to send. */
      gm.send_chunk[LZERO].smp
	= (&gm.ethernet.send.chunk[LZERO].route[GM_MAX_NETWORK_DIAMETER
						- c->route_len
						-
						(gm.ethernet.send.
						 mark ? 2 : 0)]);

      gm.send_chunk[LZERO].smlt
	= (gm.ethernet.send.chunk[LZERO].packet.as_bytes
	   + gm.ethernet.send.total_len);
#endif /* GM_ENABLE_CRC32 */

#endif /* EMULATE !! GRANULARITY>1 */

      gm_puts ("gm_sdma: removing send token from send queue.\n");
      remove_first_send_token_from_send_queue (st, ETHERNET_SUBPORT);

      /* Determine if there is another unicast required to simulate an
         ethernet broadcast.  If so, appropriately requeue the
         ethernet packet, and mark the DMAd copy of the packet as
         valid so we won't unneccesarily DMA the packet again.
         Otherwise, mark the cached packets as invalid since we know
         we will not be starting another DMA for the same packet. */

      if (st->common.type == GM_ST_ETHERNET_BROADCAST)
	{
	  gm_puts ("st->common.type == BROADCAST\n");
	  gm_ethernet_compute_next_broadcast_target ();
	  if (gm.ethernet.send.target)
	    {
	      /* There are more unicasts for the broadcast, so queue
	         the next one. */
	      gm_assert (st->common.next == 0);
	      append_send_token_to_send_queue (st, ETHERNET_SUBPORT,
					       gm.ethernet.send.target);
	      NOTICE (SDMA_PENDING);
	      gm.ethernet.send.chunk[LZERO].filled = 1;
	    }
	  else
	    {
	      /* If we are not about to DMA into the current ethernet
	         chunk, then we may be done. */
#if 1
	      if (gm.ethernet.send.chunk[LZERO].filled)
		gm_ethernet_maybe_done ();
#endif

	      /* All ethernet sends have been queued. */
	      gm.ethernet.send.chunk[LZERO].filled
		= gm.ethernet.send.chunk[LONE].filled = 0;
	    }
	}

      /* Update MCP state information */
      SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
      DISPATCH (113, "started ethernet SDMA");
#endif /* GM_ENABLE_ETHERNET */

      /*finucane: should collapse common code in next 2 cases */
      /************
       * send a reply to a mapper "scout"
       ************/
    case GM_ST_MAPPER_SCOUT_REPLY:
      {
	int i;
	gm_mapper_scout_reply_packet_t *r;
	gm_mapper_scout_reply_stage_t *mrs;
	gm_assert (st->common.type == GM_ST_MAPPER_SCOUT_REPLY);

	mrs = &gm.mapper_state.scout_reply;

	/* copy the route to send chunk */
	for (i = 0; i < GM_MAX_NETWORK_DIAMETER; i++)
	  gm.send_chunk[LZERO].route[i] = mrs->route[i];

	/* copy the packet to send chunk */
	r = &gm.send_chunk[LZERO].packet.as_mapper_packet.scout_reply;
	gm_bcopy (&mrs->packet, r, sizeof (*r));
	// *r = mrs->packet;

	/* Tell send state machine what to DMA */
	gm.send_chunk[LZERO].smp = (gm_u8_t *) r - mrs->route_len;
#if GM_ENABLE_CRC32
	gm.send_chunk[LZERO].smh = mrs->route_len ? (gm_u8_t *) r - 1 : 0;
#endif
	gm.send_chunk[LZERO].smlt = r + 1;

	/* Remove send token from send queue. */
	remove_first_send_token_from_send_queue (st, 0);	/* HACK */
	mrs->in_send_queue = 0;

	/* Update MCP state information */
	SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	/* Reserve the send chunk we are using.  This must be done
	   only once the chunk is sendable. */
	if (!--gm.free_send_chunk_cnt)
	  {
	    NOTICE (SEND_PENDING);
	    NOTICE_NOT (FREE_SEND_CHUNK);
	    DISPATCH (114, "started mapper reply SDMA (send chunks full)");
	  }
	NOTICE (SEND_PENDING);
	gm_assert (NOTICED (FREE_SEND_CHUNK));
	DISPATCH (115, "started mapper reply SDMA (send chunks available)");
      }
      /************
       * send a reply to a mapper "config" message.
       ************/
    case GM_ST_MAPPER_CONFIG_REPLY:
      {
	int i;
	gm_mapper_config_reply_packet_t *r;
	gm_mapper_config_reply_stage_t *mrs;

	gm_assert (st->common.type == GM_ST_MAPPER_CONFIG_REPLY);

	mrs = &gm.mapper_state.config_reply;

	/* copy the route to send chunk */
	for (i = 0; i < GM_MAX_NETWORK_DIAMETER; i++)
	  gm.send_chunk[LZERO].route[i] = mrs->route[i];

	/* copy the packet to send chunk */
	r = &gm.send_chunk[LZERO].packet.as_mapper_packet.config_reply;
	gm_bcopy (&mrs->packet, r, sizeof (*r));
	// *r = mrs->packet;

	/* Tell send state machine what to DMA */
	gm.send_chunk[LZERO].smp = (gm_u8_t *) r - mrs->route_len;
#if GM_ENABLE_CRC32
	gm.send_chunk[LZERO].smh = mrs->route_len ? (gm_u8_t *) r - 1 : 0;
#endif
	gm.send_chunk[LZERO].smlt = r + 1;

	/* Remove send token from send queue. */
	remove_first_send_token_from_send_queue (st, 0);	/* HACK */
	mrs->in_send_queue = 0;

	/* Update MCP state information */
	SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	/* Reserve the send chunk we are using.  This must be done
	   only once the chunk is sendable. */
	if (!--gm.free_send_chunk_cnt)
	  {
	    NOTICE (SEND_PENDING);
	    NOTICE_NOT (FREE_SEND_CHUNK);
	    DISPATCH (116, "started mapper config SDMA (out of send chunks)");
	  }
	NOTICE (SEND_PENDING);
	gm_assert (NOTICED (FREE_SEND_CHUNK));
	DISPATCH (117, "started mapper config SDMA (send chunks available)");
      }
    default:
      gm_printf ("%s:%d  st->common.type = %d (0x%x) is bad\n",
		 __FILE__, __LINE__, st->common.type, st->common.type);
      fflush (stdout);
      gm_always_assert (0);
      break;

#if GM_ENABLE_DIRECTED_SEND
    case GM_ST_DIRECTED:
      {
	/* For directed sends, we align the data within the payload
	   of the packet such that it is properly aligned for a
	   DMA of granularity GM_MAX_DMA_GRANULARITY on the
	   receiver. */

	gm_directed_packet_t *dp;
	gm_up_t source, orig_source;
	gm_remote_ptr_t remote_ptr;
	gm_lp_t lar;
	gm_u32_t remaining_len;
	struct gm_st_directed *std;
	unsigned int p__length;
	unsigned int target_subport_id;
	unsigned int data_len;
	unsigned int wasted_len;

	c->send_sexno.parts.seqno = seqno + 1;

	std = &st->directed;
	source = std->send_ptr;
	orig_source = std->orig_ptr;
	remaining_len = std->send_len;
	target_subport_id = std->target_subport_id;
	remote_ptr = std->remote_ptr + (source - orig_source);

	p->target_subport_id = target_subport_id;
	dp = (gm_directed_packet_t *) p;
	dp->target_addr = remote_ptr;
	wasted_len = remote_ptr & (GM_MAX_DMA_GRANULARITY - 1);
	lar = dp->payload + wasted_len;

	if (source == orig_source)
	  {
	    /* This is the first segment for the message. */
	    if (remaining_len <= GM_MTU)
	      {
		/* only segment for message */
		data_len = remaining_len;
		p->subtype = GM_DIRECTED_DATA_SUBTYPE;
		/* std->send_ptr not needed */
		std->send_len = 0;
	      }
	    else
	      {
		/* first segment of multi-segment message */
		data_len = (GM_SEGMENT_LEN (wasted_len + remaining_len)
			    - wasted_len);
		p->subtype = GM_DIRECTED_HEAD_DATA_SUBTYPE;
		std->send_ptr = source + data_len;
		std->send_len = remaining_len - data_len;
	      }
	  }
	else
	  {
	    /* not the first segment */
	    gm_assert (GM_DMA_ALIGNED (remote_ptr));
	    if (remaining_len <= GM_MTU)
	      {
		/* last segment of message */
		data_len = remaining_len;
		p->subtype = GM_DIRECTED_TAIL_DATA_SUBTYPE;
		/* std->send_ptr is not needed */
		std->send_len = 0;
	      }
	    else
	      {
		/* body segment of message */
		data_len = (GM_SEGMENT_LEN (wasted_len + remaining_len)
			    - wasted_len);
		p->subtype = GM_DIRECTED_BODY_DATA_SUBTYPE;
		std->send_ptr = source + data_len;
		std->send_len = remaining_len - data_len;
	      }
	  }

	p__length =
	  ((char *) lar + data_len) - (char *) GM_HEADER_PAYLOAD (p);
	p->length = p__length;
	gm_galvantech_set_header_checksum (p);

	if (GM_DMA_GRANULARITY == 1)
	  {
	    /* This node supports byte DMA.  No need to shift header. */

	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("starting byte DMA\n"));
	      }
	    USER_SDMA (source, lar, data_len, port_id);
	    copy_route (c->route, gm.send_chunk[LZERO].route);
	    gm.send_chunk[LZERO].smp = (gm.send_chunk[LZERO].packet.as_bytes
					- c->route_len);
#if GM_ENABLE_CRC32
	    gm.send_chunk[LZERO].smh =
	      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes - 1) : 0;
#endif
	    gm.send_chunk[LZERO].smlt
	      = gm.send_chunk[LZERO].packet.as_gm.payload + p__length;

	    gm_assert (gm.send_chunk[LZERO].smlt > gm.send_chunk[LZERO].smp);
	  }
	else if (0 == ((remote_ptr - source) & (GM_DMA_GRANULARITY - 1)))
	  {
	    /* Got lucky: Don't need to shift header despite chunky DMA. */

	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("starting lucky DMA of length 0x%x\n"),
			   data_len);
		gm_bzero (GM_DMA_ALIGN (lp, lar),
			  GM_DMA_ALIGN (size,
					data_len + 2 * (GM_DMA_GRANULARITY -
							1)));
	      }
	    USER_SDMA (source, lar, data_len + 2 * (GM_DMA_GRANULARITY - 1),
		       port_id);
	    copy_route (c->route, gm.send_chunk[LZERO].route);
	    gm.send_chunk[LZERO].smp = (gm.send_chunk[LZERO].packet.as_bytes
					- c->route_len);
#if GM_ENABLE_CRC32
	    gm.send_chunk[LZERO].smh =
	      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes - 1) : 0;
#endif
	    gm.send_chunk[LZERO].smlt
	      = gm.send_chunk[LZERO].packet.as_gm.payload + p__length;
	  }
	else
	  {
	    int shift;

	    /* need to shift header */

	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("starting shift DMA\n"));
	      }
	    
	    /* Start the DMA and make sure the firsst GM_DMA_GRANULARITY
	       bytes have been transferred before continuing. */
	    {
	      gm_up_t aligned_source;
	      gm_lp_t aligned_lar;
	      aligned_source = GM_DMA_ALIGN (up, source);
	      aligned_lar = GM_DMA_ALIGN (lp, lar);
	      if (data_len <= 8 * GM_MAX_DMA_GRANULARITY)
		{
		  /* small, so block during the whole DMA */
		  USER_SDMA (aligned_source,
			     (char *) aligned_lar + GM_DMA_GRANULARITY,
			     data_len + 2 * (GM_DMA_GRANULARITY - 1),
			     port_id);
		  await_free_DMA_engine ();
		}
	      else
		{
		  /* large, so DMA the first tiny part, then block, then
		     start the rest of the DMA. */
		  USER_SDMA (aligned_source,
			     (char *) aligned_lar + GM_DMA_GRANULARITY,
			     GM_DMA_GRANULARITY, port_id);
		  await_free_DMA_engine ();
		  USER_SDMA (aligned_source + GM_DMA_GRANULARITY,
			     (char *) aligned_lar + 2 * GM_DMA_GRANULARITY,
			     data_len + GM_DMA_GRANULARITY - 2, port_id);
		}
	    }
	    copy_route (c->route, gm.send_chunk[LZERO].route);
	    shift = (GM_DMA_GRANULARITY
		     + ((long) source & (GM_DMA_GRANULARITY - 1))
		     - (wasted_len & (GM_DMA_GRANULARITY - 1)));
	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("before shift: "));
		gm_hex_dump (gm.send_chunk[LZERO].packet.as_bytes - c->route_len,
			     (c->route_len + sizeof (gm_packet_header_t)
			      + p__length + shift));
	      }
	    shift_directed_header ((gm_u32_t *) gm.send_chunk[LZERO].route,
				   shift);
	    gm.send_chunk[LZERO].smp = (gm.send_chunk[LZERO].packet.as_bytes
					- c->route_len + shift);
#if GM_ENABLE_CRC32
	    gm.send_chunk[LZERO].smh =
	      c->route_len ? (gm.send_chunk[LZERO].packet.as_bytes - 1) : 0;
#endif
	    gm.send_chunk[LZERO].smlt
	      = gm.send_chunk[LZERO].packet.as_gm.payload + p__length + shift;

	    if (GM_DEBUG_DIRECTED_SEND)
	      {
		gm_printf (GM_STR ("@@@@@@@@@@@@@: "));
		gm_hex_dump ((gm.send_chunk[LZERO].packet.as_bytes
			      - c->route_len + shift),
			     (c->route_len + sizeof (gm_packet_header_t)
			      + p__length));
	      }
	  }
	sr->before_ptr = source;
	sr->before_len = remaining_len;

	if (GM_DEBUG_DIRECTED_SEND)
	  {
	    gm_remote_ptr_t check_remote;
	    gm_u16_t check_len, u16;
	    gm_u8_t expected_value, *start, *limit, u8;
	    int i;

	    /* wait for the DMA to complete before checking the packet */
	    await_free_DMA_engine ();

	    /* compute the location of the shifted packet. */
	    dp = (gm_directed_packet_t *) ((char *) gm.send_chunk[LZERO].smp
					   + c->route_len);

	    /* confirm header fields */
	    gm_bcopy (&dp->header.type, &u16, 2);
	    gm_always_assert (u16 == GM_PACKET_TYPE);
	    gm_bcopy (&dp->header.subtype, &u16, 2);
	    gm_always_assert (u16 == GM_DIRECTED_DATA_SUBTYPE
			      || u16 == GM_DIRECTED_HEAD_DATA_SUBTYPE
			      || u16 == GM_DIRECTED_BODY_DATA_SUBTYPE
			      || u16 == GM_DIRECTED_TAIL_DATA_SUBTYPE);
	    gm_bcopy (&dp->header.target_node_id, &u16, 2);
	    gm_always_assert (u16 == c->ack_packet.target_node_id);
	    gm_bcopy (&dp->header.sender_node_id, &u16, 2);
	    gm_always_assert (u16 == gm.this_node_id);
	    gm_bcopy (&dp->header.length, &check_len, sizeof (check_len));
	    gm_always_assert (check_len == p__length);
	    gm_bcopy (&dp->header.target_subport_id, &u8, 1);
	    gm_always_assert (u8 == target_subport_id);
	    gm_bcopy (&dp->header.target_subport_id, &u8, 1);
	    gm_always_assert (u8 == sp->id);
	    gm_bcopy (&dp->target_addr, &check_remote, sizeof (check_remote));
	    gm_always_assert (remote_ptr == check_remote);

	    /* confirm the expected payload */

	    start = dp->payload + (remote_ptr & (GM_MAX_DMA_GRANULARITY - 1));
	    limit = dp->payload + check_len - sizeof (gm_remote_ptr_t);
	    expected_value = 254;
	    gm_printf (GM_STR ("sending from %p: "), start);
	    gm_hex_dump (start, limit - start);
	    for (i = 0; i < limit - start; i++)
	      {
		if (start[i] != expected_value)
		  {
		    gm_printf ("\nstart[0x%x] == 0x%x != 0x%x\n",
			    i, start[i], expected_value);
		    gm_printf ("src_align = %d, dest_align = %d\n",
			    (int) source & (GM_MAX_DMA_GRANULARITY - 1),
			    (int) start & (GM_MAX_DMA_GRANULARITY - 1));
		    fflush (stdout);
		  }
		--expected_value;
		if (expected_value == 0)
		  expected_value = 254;
	      }
	    gm_printf ("\n");
	    fflush (stdout);
	  }
      }
      /* WARNING: Don't dereference "p" after the header shift above. */
      {
	gm_subport_t *sp__next;
	gm_send_record_t *sr__next;
	gm_send_record_t *c__first_send_record, *c__last_send_record;
	gm_connection_t *c__next_active;

	/* Commit to using send record; */
	gm_assert (sr != sr->next);
	gm_assert (sr == gm.free_send_records);
	/* pre */ sr__next = sr->next;
	/* pre */ rtc = RTC;
	sr->send_token = st;
	gm.free_send_records = sr__next;
	sr->resend_time = rtc;
	/* pre */ c__first_send_record = c->first_send_record;
	/* pre */ c__last_send_record = c->last_send_record;

	/* Add send record to unacked send record list. */
	/* pre */ sp__next = sp->next;
	if (c__first_send_record)
	  c->last_send_record = c__last_send_record->next = sr;
	else
	  c->first_send_record = c->last_send_record = sr;
	/* pre */ c__next_active = c->next_active;

	if (GM_DEBUG_DIRECTED_SEND)
	  {
	    gm_printf (GM_STR ("before FINISH_FINISH_SDMA_START():"));
	    gm_hex_dump (gm.send_chunk[LZERO].smp,
			 (char *) gm.send_chunk[LZERO].smlt
			 - (char *) gm.send_chunk[LZERO].smp);
	  }

	FINISH_FINISH_SDMA_START (DISPATCH (118, "started directed SDMA"));
      }
#endif /* GM_ENABLE_DIRECTED_SEND */
    case GM_ST_RAW:
#if GM_LOG_DISPATCHES
#if LZERO == 0
      gm.current_handler = "L_sdma__start_sdma_0 (case GM_ST_RAW)";
#else
      gm.current_handler = "L_sdma__start_sdma_1 (case GM_ST_RAW)";
#endif
#endif
      p->target_subport_id = st->raw.target_subport_id;
      sr->before_ptr = st->raw.send_ptr;
      sr->before_len = st->raw.total_length;

      /* Note: no need to free the partially built send record, as it is still
         in the free send list. */
      gm_assert (gm.free_send_records == sr);
      {
	unsigned sp_id = sp->id;
	gm_assert (GM_SUBPORT_PRIORITY (sp_id) == GM_HIGH_PRIORITY);
	/* gm_assert (GM_SUBPORT_PORT (sp_id) == port->id); */
	/* Start the DMA of the message with route on the beginning.  The
	   message will have DMA granularity and alignment, but the route
	   on the front is byte aligned, so DMA extra on the beginning as
	   needed. */
#if GM_ENABLE_CRC32
	USER_SDMA (GM_DMA_ALIGN (up, st->raw.orig_ptr),
		   gm.send_chunk[LZERO].packet.as_bytes -
		   st->raw.route_length, GM_DMA_ROUNDUP (u32,
							 st->raw.
							 total_length),
		   port_id);
#else
	USER_SDMA (GM_DMA_ALIGN (up, st->raw.orig_ptr),
		   gm.send_chunk[LZERO].route,
		   GM_DMA_ROUNDUP (u32, st->raw.total_length), port_id);
#endif
	GM_GALVANTECH_SET_IP_CHECKSUM (p, st->raw.ip_checksum);
	/* Specify what data is to be sent after DMA completes. */
#if GM_ENABLE_CRC32
	gm.send_chunk[LZERO].smp =
	  (gm.send_chunk[LZERO].packet.as_bytes - st->raw.route_length);
	gm.send_chunk[LZERO].smlt =
	  (gm.send_chunk[LZERO].smp + st->raw.total_length);
	gm.send_chunk[LZERO].smh = 
	   st->raw.route_length?gm.send_chunk[LZERO].packet.as_bytes - 1 : 0;
#else
	gm.send_chunk[LZERO].smp = (gm.send_chunk[LZERO].route
				    + ((int) st->raw.orig_ptr
				       & (GM_PACKET_GRANULARITY - 1)));
	gm.send_chunk[LZERO].smlt = ((char *) gm.send_chunk[LZERO].smp
				     + st->raw.total_length);
#endif

	/* Remove raw send from send list and pass the send token back to the
	   host. */
	remove_first_send_token_from_send_queue (st, sp_id);
	/* HACK within a HACK: Because (a) the message is transferred
	   with a single DMA, (b) the message is delivered unreliably,
	   and (c) the following must wait for the DMA to complete
	   before informing the host that the send is done, it is safe
	   to call before we know the message has been DMAd. */
	pass_sent_token_to_port_and_free (st,
					  &gm_port[GM_SUBPORT_PORT (sp_id)],
					  0, GM_SUCCESS);
	NOTICE (SDMAING);
	SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	gm_printf_p ("Sent a new raw message.\n");
      }
      DISPATCH (119, "started raw SDMA");

    case GM_ST_RELIABLE:
      {
	struct gm_st_reliable *str = &st->reliable;
	gm_lp_t lar;
	gm_up_t str__send_ptr, str__orig_ptr;
	gm_u32_t str__send_len, str__size;
	gm_u32_t str__target_subport_id;
	gm_u32_t p__length;

	c->send_sexno.parts.seqno = seqno + 1;

	str__send_ptr = str->send_ptr;
	str__orig_ptr = str->orig_ptr;
	str__send_len = str->send_len;
	str__target_subport_id = str->target_subport_id;
	str__size = str->size;
	lar = gm.send_chunk[LZERO].packet.as_gm.payload;

	p->target_subport_id = str__target_subport_id;
	sr->before_ptr = str__send_ptr;
	sr->before_len = str__send_len;

#if GM_FAST_SMALL_SEND
	if (str->data)
	  {
	    int i;
	    int *dest = (int *) &gm.send_chunk[LZERO].packet.as_gm.payload;
	    for (i = 0; i < str->send_len / sizeof (int); i++)
	      dest[i] = ((int *) str->data)[i];
	    p->length = p__length = sr->before_len;
	    p->subtype = (GM_RELIABLE_DATA_SUBTYPE_0 + st->reliable.size);
	    gm_galvantech_set_header_checksum (p);
	    str->send_len = 0;	/* common.sendable = 0 */
	    /* as we do not really start a SDMA, the state-machine
	       will behave as it it finish instantaneously (the
	       DMA_INT_BIT stays high) */
	    FINISH_SDMA_START (DISPATCH (120, "started fast small SDMA"));
	  }
#endif

	GM_LOG_EVT (GM_AFTER_PAGE_TRANSLATION);

	if (str__send_ptr == str__orig_ptr)
	  {
	    /* This is the first segment for the message. */
	    if (str__send_len <= GM_MTU)
	      {
		/* This is a monolithic message */

		p->length = p__length = str__send_len;
		p->subtype = GM_RELIABLE_DATA_SUBTYPE_0 + str__size;
		gm_galvantech_set_header_checksum (p);

		USER_SDMA (str__send_ptr, lar,
			   str__send_len + GM_DMA_GRANULARITY - 1, port_id);
		GM_GALVANTECH_SET_IP_CHECKSUM (p, st->reliable.ip_checksum);

		str->send_len = 0;

		FINISH_SDMA_START (DISPATCH (121, "started monolith SDMA"));
	      }
	    else
	      {
		/* This is a multi-segment message. */
		gm_assert (str__size >= 13);

		p->length = p__length = GM_SEGMENT_LEN (str__send_len);
		p->subtype = (GM_RELIABLE_HEAD_DATA_SUBTYPE_13
			      + (str__size - 13));
		gm_galvantech_set_header_checksum (p);

		USER_SDMA (str__send_ptr, lar, p__length, port_id);

		str->send_ptr = str__send_ptr + p__length;
		str->send_len = str__send_len - p__length;

		FINISH_SDMA_START (DISPATCH (122, "started head SDMA"));
	      }
	  }
	else
	  {
	    /* This is not the first segment for the message. */
	    if (str__send_len <= GM_MTU)
	      {
		/* This is the last segment for a multi-segment message. */
		p->length = p__length = str__send_len;
		p->subtype = GM_RELIABLE_TAIL_DATA_SUBTYPE;
		gm_galvantech_set_header_checksum (p);

		USER_SDMA (str__send_ptr, lar, str__send_len, port_id);
		GM_GALVANTECH_SET_IP_CHECKSUM (p, st->reliable.ip_checksum);

		str->send_len = 0;

		FINISH_SDMA_START (DISPATCH (123, "started tail SDMA"));
	      }
	    else
	      {
		/* This is not the last segment for the message. */
		p->length = p__length = GM_SEGMENT_LEN (str__send_len);
		p->subtype = GM_RELIABLE_BODY_DATA_SUBTYPE;
		gm_galvantech_set_header_checksum (p);

		USER_SDMA (str__send_ptr, lar, p__length, port_id);

		str->send_ptr = str__send_ptr + p__length;
		str->send_len = str__send_len - p__length;

		FINISH_SDMA_START (DISPATCH (124, "started body SDMA"));
	      }
	  }
      }

#if GM_ENABLE_DATAGRAMS
    case GM_ST_DATAGRAM:
      {
	struct gm_st_datagram *std = &st->datagram;
	gm_lp_t lar;
	gm_up_t std__send_ptr;
	gm_u32_t std__send_len, std__size;
	gm_u32_t std__target_subport_id;
	gm_u32_t p__length;
	unsigned int target_node_id;
	unsigned int sp__id;
	unsigned int gm_this_node_id;
	unsigned int c__route_len;

	/* start the SDMA */

	std__send_ptr = std->send_ptr;
	std__send_len = std->send_len;
	gm_assert (std__send_len <= GM_MTU);
	lar = gm.send_chunk[LZERO].packet.as_gm.payload;
	/* pre */ std__target_subport_id = std->target_subport_id;
	/* pre */ std__size = std->size;

	USER_SDMA (std__send_ptr, lar, std__send_len + GM_DMA_GRANULARITY - 1,
		   port_id);

	/* build the packet */

	copy_route (c->route, gm.send_chunk[LZERO].route);
	p->type = GM_PACKET_TYPE;
	/* pre */ target_node_id = c->ack_packet.target_node_id;
	/* pre */ gm_this_node_id = gm.this_node_id;
	p->subtype = GM_DATAGRAM_SUBTYPE_0 + std__size;
	p->target_node_id = target_node_id;
	p->sender_node_id = gm_this_node_id;
	/* p->sexno = don't care */
	/* pre */ sp__id = sp->id;
	p->length = p__length = std__send_len;
	/* pre */ c__route_len = c->route_len;
	p->target_subport_id = std__target_subport_id;
	p->sender_subport_id = sp__id;

	/* record what to send */

	gm.send_chunk[LZERO].smp = (gm_u8_t *) p - c__route_len;
	gm.send_chunk[LZERO].smlt = (gm_u8_t *) (p + 1) + std__send_len;
	if (GM_ENABLE_CRC32)
	  gm.send_chunk[LZERO].smh = c__route_len?(char *) p - 1 : 0;

	/* dequeue the send and report send completion */

	rotate_send_queue (sp, c);
	remove_send_token_from_send_queue (st, sp__id);
	pass_sent_token_to_port_and_free
	  (st, &gm.port[port_id], 0, GM_SUCCESS);

	/* update state */

	NOTICE (SDMAING);
	SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	gm_printf_p ("Sent a datagram\n");
	DISPATCH (125, "started datagram SDMA");
      }
#if GM_ENABLE_PIO_SENDS && GM_ENABLE_PIO_DATAGRAMS
    case GM_ST_PIO_DATAGRAM:
      {
	struct gm_st_pio_datagram *std = &st->pio_datagram;
	gm_lp_t lar;
	gm_up_t std__send_ptr;
	gm_u32_t std__send_len, std__size;
	gm_u32_t std__target_subport_id;
	gm_u32_t p__length;
	unsigned int target_node_id;
	unsigned int sp__id;
	unsigned int gm_this_node_id;
	unsigned int c__route_len;
	gm_u32_t data;

	/* build the packet */

	copy_route (c->route, gm.send_chunk[LZERO].route);
	p->type = GM_PACKET_TYPE;
	/* pre */ std__size = std->size;
	/* pre */ target_node_id = c->ack_packet.target_node_id;
	/* pre */ gm_this_node_id = gm.this_node_id;
	p->subtype = GM_DATAGRAM_SUBTYPE_0 + std__size;
	p->target_node_id = target_node_id;
	/* pre */ std__send_len = std->send_len;
	p->sender_node_id = gm_this_node_id;
	/* p->sexno = don't care */
	/* pre */ sp__id = sp->id;
	p->length = p__length = std__send_len;
	/* pre */ std__target_subport_id = std->target_subport_id;
	/* pre */ c__route_len = c->route_len;
	/* pre */ data = std->data[0];
	p->target_subport_id = std__target_subport_id;
	p->sender_subport_id = sp__id;
	*(gm_u32_t *) (p + 1) = data;

	/* record what to send */

	gm.send_chunk[LZERO].smp = (gm_u8_t *) p - c__route_len;
	gm.send_chunk[LZERO].smlt = (gm_u8_t *) (p + 1) + std__send_len;
	if (GM_ENABLE_CRC32)
	  gm.send_chunk[LZERO].smh = c__route_len ? (char *) p - 1 : 0;

	/* dequeue the send and report send completion */

	rotate_send_queue (sp, c);
	remove_send_token_from_send_queue (st, sp__id);
	pass_sent_token_to_port_and_free
	  (st, &gm.port[port_id], 0, GM_SUCCESS);

	/* update state */

	NOTICE (SDMAING);
	SET_HANDLER (START_SDMA_EVENT, L_sdma__start_sdma_, LONE);
	gm_printf_p ("Sent a PIO datagram\n");
	DISPATCH (126, "started PIO datagram DMA");
      }
#endif /* GM_ENABLE_PIO_SENDS */
#endif /* GM_ENABLE_DATAGRAMS */
    }
  gm_always_assert (0);
  DISPATCH (0, "never get here");
}
GM_END_HANDLER;

#if LZERO == 0
MARK_LABEL (L_sdma__continue_sdma,)
{
  gm_u32_t port_id;
  gm_u32_t ctr;
  gm_up_t hp;
  gm_lp_t lar;
  gm_dp_t dp;

  /* The DMA is partially completed and stopped on a page boundary. */

  /* prefetch */ port_id = gm.remaining_sdma_port_id;
  /* prefetch */ hp = gm.remaining_sdma_hp;
  /* prefetch */ ctr = gm.remaining_sdma_ctr;
  /* prefetch */ lar = gm.remaining_sdma_lar;

  dp = sdma_addr (hp, port_id);
  GM_PRINT (0, ("critical dp=0x%qx\n", (gm_u64_t) dp));
  if (GM_MTU <= GM_PAGE_LEN	/* short circuit if always true */
      || ctr <= GM_PAGE_LEN)
    {
      start_SDMA (dp, lar, ctr + GM_DMA_GRANULARITY_ROUNDUP, __LINE__);
      SET_HANDLER (FINISH_SDMA_EVENT, L_sdma__finish_sdma,);
      DISPATCH (127, "continuing SDMA (last DMA)");
    }
  else
    {
      start_SDMA (dp, lar, GM_PAGE_LEN, __LINE__);
      gm.remaining_sdma_ctr = ctr - GM_PAGE_LEN;
      gm.remaining_sdma_hp = hp + GM_PAGE_LEN;
      gm.remaining_sdma_lar = (char *) lar + GM_PAGE_LEN;
      DISPATCH (128, "continuing SDMA (more DMAs remain)");
    }
}
GM_END_HANDLER;

MARK_LABEL (L_sdma__finish_sdma,)
{
  gm_u32_t gm_free_send_chunk_cnt;

  /* OPT: Could save 2 insns on the L7 by integrating the dispatch. */

  gm_free_send_chunk_cnt = gm.free_send_chunk_cnt;
  gm_assert (gm_free_send_chunk_cnt > 0);
  NOTICE (SEND_PENDING);
  --gm_free_send_chunk_cnt;
  NOTICE_NOT (SDMAING | ((gm_free_send_chunk_cnt ^ 1) * FREE_SEND_CHUNK));
  gm.free_send_chunk_cnt = gm_free_send_chunk_cnt;
  DISPATCH (129, "finished SDMA");
}
GM_END_HANDLER;

#if GM_ENABLE_ETHERNET
#  if GM_CPU_lanai
#    error pseudo-Ethernet not supported for LANai-only embedded nodes
#  endif
MARK_LABEL (L_sdma__finish_ethernet_sdma,)
{
  gm_u32_t pos = gm.ethernet.send.gather_pos;
#if GM_EMULATE_BYTE_DMAS && (GM_DMA_GRANULARITY > 1)
  gm_lp_t dest;
  gm_dp_t src;
  unsigned int shift;
  unsigned int restore;
  gm_u32_t len;
#endif

  gm_puts ("L_sdma__finish_ethernet_sdma handler.\n");

#if GM_EMULATE_BYTE_DMAS && (GM_DMA_GRANULARITY > 1)
  gm_puts ("GM_EMULATE_BYTE_DMAS: 2 in\n");

  /* Finish up the DMA by correcting any misalignment. */

  /* Recover the saved state. */

  src = gm.ethernet.send.gather_segment[pos].ptr;
  len = gm.ethernet.send.gather_segment[pos].len;
  dest = gm.ethernet.send.next_lar;
  shift = gm.ethernet.send.byte_dma_emul.shift;
  restore = gm.ethernet.send.byte_dma_emul.restore;

  /* Shift the DMAd data to its intended destination, if needed. */

  if (shift)
    {
      gm_ether_byte_dma_emul_shift ((char *) dest - shift, shift, len);
    }

  /* Restore the RESTORE bytes just before dest that were trashed by the
     DMA and/or shift(). */

  if (restore)
    {
      gm_u32_t *save = &gm.ethernet.send.byte_dma_emul.saved_word;
      gm_lp_t tw = GM_ALIGN (lp, dest, 4);

      if (restore == 1)
	*(gm_u8_t *) tw = *(gm_u8_t *) save;
      else
	{
	  *(gm_u16_t *) tw = *(gm_u16_t *) save;
	  if (restore == 3)
	    ((gm_u8_t *) tw)[2] = ((gm_u8_t *) save)[2];
	}
    }

  /* Update the state.  There is no need to store the updated state
     unless another DMA is started below. */

  dest = (char *) dest + len;

  gm_puts ("GM_EMULATE_BYTE_DMAS: 2 out\n");
#endif

  gm_puts ("L_sdma__finish_ethernet_sdma handler.\n");

  gm_assert (gm.free_send_chunk_cnt > 0);

  /* If needed, start the next segment of the ethernet gather. */
  pos = ++gm.ethernet.send.gather_pos;
  if (pos < gm.ethernet.send.gather_cnt)
    {
#if GM_EMULATE_BYTE_DMAS && (GM_DMA_GRANULARITY > 1)
      gm_puts ("GM_EMULATE_BYTE_DMAS: 3 in\n");

      /* dest set above. */
      src = gm.ethernet.send.gather_segment[pos].ptr;
      len = gm.ethernet.send.gather_segment[pos].len;
      gm_assert (len);

      /* Save the word containing data that might be trashed by the DMA */

      gm.ethernet.send.byte_dma_emul.saved_word
	= (*(gm_u32_t *) GM_ALIGN (lp, dest, 4));

      /* Start the DMA */

      start_SDMA (GM_ALIGN (dp, src, 4),
		  GM_ALIGN (lp, dest, 4),
		  GM_ROUNDUP (u32, len + ((long) src & 3), 4),
		  __LINE__);

      /* update byte DMA emulation state */

      gm.ethernet.send.next_lar = dest;
      gm.ethernet.send.byte_dma_emul.shift = (((long) dest & 3)
					      - ((long) src & 3));
      gm.ethernet.send.byte_dma_emul.restore = (long) dest & 3;;
      gm_puts ("GM_EMULATE_BYTE_DMAS: 3 out\n");
#else
      /* Start the DMA */

      gm_assert (gm.ethernet.send.gather_segment[pos].len);
      start_SDMA (gm.ethernet.send.gather_segment[pos].ptr,
		  gm.ethernet.send.next_lar,
		  gm.ethernet.send.gather_segment[pos].len,
		  __LINE__);

      /* Compute start of next DMA */
      gm.ethernet.send.next_lar += gm.ethernet.send.gather_segment[pos].len;
#endif

      gm_puts ("started DMAing the next segment\n");

      DISPATCH (130, "starting next ethernet segment SDMA");
    }

  /* Finished last DMA for ethernet gather. */

  /* Report completion if done. */
  gm_ethernet_maybe_done ();

  /* restore the normal SDMA completion handler */
  SET_HANDLER (FINISH_SDMA_EVENT, L_sdma__finish_sdma,);

  /* Reserve the send chunk we are using.  This must be done only once
     the chunk is sendable. */
  NOTICE (SEND_PENDING);
  if (!--gm.free_send_chunk_cnt)
    {
      NOTICE_NOT (SDMAING | FREE_SEND_CHUNK);
      gm_puts ("No more free send chunks.\n");
      DISPATCH (131, "ethernet SDMA done (send chunks full)");
    }
  NOTICE_NOT (SDMAING);
  gm_assert (NOTICED (FREE_SEND_CHUNK));
  gm_puts ("Are more free send chunks.\n");
  DISPATCH (132, "ethernet SDMA done (send chunks available");
}
GM_END_HANDLER;
#endif /* GM_ENABLE_ETHERNET */
#endif /* LZERO == 0 */

/*
  This file uses GM standard indentation.

  Local Variables:
  c-file-style:"gnu"
  c-backslash-column:72
  tab-width:8
  End:
*/
