
#include <stdarg.h>

#define GM_STRONG_TYPES 0
#include "gmpi.h"

#include "mpid.h"
#include "mpiddev.h"
#include "mpid_bind.h"
#include "packets.h"
#include "queue.h"
#include "reqalloc.h"
#include "chpackflow.h"

#include "queue.h"

#if !GM_DISABLE_ALARM && __GNUC__
#warning GM_DISABLE_ALARM is disabled, enabling the ALARM
#endif

#if !GM_DISABLE_ALARM
#include <signal.h>
#include <sys/time.h>
#include <pthread.h>
#endif

/* FORK and SYSTEM */

#define GMPI_ENABLE_FORK_SYSTEM 1

/* 'ENABLE' means allow users to have fork or system */

#if (GMPI_ENABLE_FORK_SYSTEM==0)

int system(const char *cmd)
  {
       GM_PRINT(0,("***** WARNING - you can NOT call 'system()' in a GM-mpich program\n\n"));
    return -1;
  }

int fork()
  {
       GM_PRINT(0,("***** WARNING - you can NOT call 'fork()' in a GM-mpich program\n\n"));
    return -1;
  }

#endif /* GMPI_ENABLE_FORK_SYSTEM */



#if GM_DEBUG
static MPID_QUEUE r_acksent;
#endif


void gmpi_datasent_event(MPIR_SHANDLE *shandle,void *p);

struct gmpi_var  gmpi;


int  MPID_DeviceCheck(MPID_BLOCKING_TYPE blocking);

#if !GM_DISABLE_ALARM

static void alarm_handler(int sig)
{
  if (gmpi.busy == 0) {
    pthread_cond_signal(&gmpi.alarm_wake);
    pthread_cond_wait(&gmpi.alarm_ack,&gmpi.alarm_mutex);
  }
}

static volatile  int second_thread_started = 0;
static void *second_thread(void *p)
{
  pthread_mutex_lock(&gmpi.alarm_mutex);
  second_thread_started = 1;
  while (1) {
    pthread_cond_wait(&gmpi.alarm_wake,&gmpi.alarm_mutex);
    while (gmpi.busy == 0 && MPID_DeviceCheck(0) == 1);
    pthread_cond_signal(&gmpi.alarm_ack);
  }
  return 0;
}

#endif

static gm_inline int able_to_recv(int len)
{
  return gmpi.rtoken > 0 && gmpi.free_tag &&
    gmpi.dma_bytes[GMPI_RDMA] + len < gmpi.max_dma_bytes[GMPI_RDMA];
}


static gm_inline int able_to_send(int grank, int len, int is_data)
{
  if (gmpi.stoken <= 0)
    {
      return 0;
    }
  if(is_data && gmpi.dma_bytes[GMPI_SDMA] + len > gmpi.max_dma_bytes[GMPI_SDMA])
    {
      return 0;
    }
#ifdef GMPI_CREDITS
  /* control message */
  if (!is_data && gmpi.credits_left[dest] <= 0)
    return 0;
#endif

  return 1;
}


static void post_recv_now(MPIR_RHANDLE *rhandle,int len,MPID_RNDV_T *tagp)
{
  void *buf;
  struct gmpi_tag *tag;
  int gmlen = GMPI_ROUNDUP(len) + GMPI_NETDATA_HEADER_LEN;
  int copy;

  tag = gmpi.free_tag;
  gm_assert_p(tag);
  gmpi.free_tag = tag->next;
  tag->next = 0;
  gm_assert_p(!tag->rhandle);
  tag->rhandle = rhandle;
  *tagp = tag->num;

  gm_assert_p(len > 0);
  gm_assert_p(rhandle->gm.netbuf == 0);
  if (!GMPI_NETDATA_HEADER_LEN && !gmpi_need_rcopy((char*)rhandle->buf+rhandle->gm.frag,len) &&
      rhandle->buf && (rhandle->gm.frag+gmlen <= rhandle->s.count)) {
    /* no need to do an intermediate copy */
    buf = (char*)rhandle->buf+rhandle->gm.frag;
    gm_assert_p(buf);
    copy = 0;
  } else {
    buf = DMA_ALLOC(gmpi.port, gmlen, GMPI_RDMA); 
    if (!buf) {
       printf("%s:%d  DMA_ALLOC return NULL\n",__FILE__,__LINE__);
       gm_abort();
    }
    rhandle->gm.netbuf = (char*)buf + GMPI_NETDATA_HEADER_LEN;
    copy = 1;
  }
  gm_assert_p(buf);
  rhandle->gm.frag_length = gmlen;
  gmpi_netpostrecv(buf,gmlen, tag->num,rhandle,copy);
  gmpi.rtoken -= 1;
}

void gmpi_queue_recv(int len, recvbuf_callback func, MPIR_RHANDLE *rhandle)
{
  MPID_RNDV_T tag;

  gmpi_lock();

  gm_assert_p(rhandle->gm.state == ST_POSTED_GOTFRAG ||
	      rhandle->gm.state == ST_POSTED_GOTCTL ||
	      rhandle->gm.state == ST_PROXY_GOTCTL);
  
  if (!gmpi.recv_fifo_head && able_to_recv(len)) { 
    post_recv_now(rhandle,len,&tag);
    func(rhandle,tag);
#if GM_DEBUG
    gm_assert_p(MPID_Enqueue(&r_acksent,0,0,0,rhandle) == MPI_SUCCESS);
#endif
  } else {
    struct recv_fifo_req *new,**reqp;
    GM_DBG(rhandle->gm.state = IS_PROXY(rhandle->gm.state)? 
	   ST_PROXY_ACKQUEUED : ST_POSTED_ACKQUEUED);
    new = gmpi_xmalloc(sizeof(*new),"MPI/ch_gm:gmpi_queue_recv");
    gm_assert(new);
    for (reqp=&gmpi.recv_fifo_head;*reqp;)
      reqp = &(*reqp)->next;
    new->callback = func;
    new->rhandle = rhandle;
    new->len = len;
    new->next = 0;
    *reqp = new;
  }

  gmpi_unlock();
}

static gm_inline void post_send_now(void *data, MPIR_SHANDLE *shandle, int len, int grank, int is_data)
{
  void *send_buf;
  int gmlen = GMPI_ROUNDUP(len);
  /*fprintf(stderr,"send_now\n");*/
  gm_assert_p(!shandle || !shandle->is_complete);
  if (is_data) {
    gm_assert_p(shandle);
    shandle->gm.copy = 0;
    gm_assert_p(len > 0);
    if (gmpi_need_scopy(data,len)) {
      /* FIXME: if a system without zero copy can give an ack before the sent evt, we may
	 have not yet freed the copy, so always leave this assertion on */

      gm_assert_p(shandle->gm.copy == 0);
      send_buf = shandle->gm.copy = DMA_ALLOC(gmpi.port,gmlen+GMPI_SEND_HEAD_LEN+GMPI_NETDATA_HEADER_LEN, GMPI_SDMA);
      if (!send_buf) {
         printf("%s:%d  DMA_ALLOC returns NULL\n",__FILE__,__LINE__);
         gm_abort();
      }
      gm_bcopy(data,(char*)GMPI_SEND_NETDATA(send_buf)+GMPI_NETDATA_HEADER_LEN,len);
      /* if we need to insert an header */
      if (GMPI_NETDATA_HEADER_LEN > 0)
	*(MPID_RNDV_T*)GMPI_SEND_NETDATA(send_buf) = shandle->recv_handle;	
    } else {
      send_buf = data;
    }
    /* assertion not valid if shandle->frag already incremented (when doing delayed send) */
    /*gm_assert_p(GM_DMA_ROUNDUP(u32,shandle->bytes_as_contig-shandle->frag+sizeof(MPID_RNDV_T)) >= gmlen); */
    if (GMPI_SEND_HEAD_LEN)
      *(void**)send_buf = shandle;
    gmpi_net_datasend(grank,GMPI_SEND_NETDATA(send_buf),gmlen, shandle->recv_handle,shandle);
    
  } else { /* is_data */
    /* we are sending a control message */
    MPID_PKT_T *pkt = data;
#ifdef GMPI_CREDITS
    pkt->head.bip_credits = gmpi.credits_back[grank];
    gmpi.credits_back[grank] = 0;
    gm_assert_p(gmpi.credits_left[grank] > 0 && gmpi.credits_left[grank] <= GMPI_CREDITS);
    gmpi.credits_left[grank] -= 1;
#endif
    if (gmpi.enable_csum) {
      pkt->head.ck.sum = 0;
      pkt->head.ck.len = len;
      pkt->head.ck.sum = 0xffff & ~gmpi_checksum(data,len);
    }
    gm_assert_p(!shandle);
    send_buf = data;
    if (GMPI_SEND_HEAD_LEN)
      *(void**)send_buf = 0;
    gmpi_net_ctlsend(grank,GMPI_SEND_NETDATA(send_buf),gmlen);
  }

  gmpi.stoken -= 1;

#if 0
  printf("sending now %d bytes",gmlen);
  if (size == GMPI_CONTROL_TAG) {
    MPID_PKT_T *p = (void*)GMPI_SEND_NETDATA(send_buf);
    printf(" mode=%d src=%d, cid=%d tag=%d, len=%d",p->head.mode,p->head.src,p->head.context_id, p->head.tag,p->head.len);
  }
  printf("\n");
#endif
}

void gmpi_queue_send(void *data, MPIR_SHANDLE *shandle, int len, int grank, int is_data, int prepend)
{

  gmpi_lock();

  if (!gmpi.send_fifo_head && able_to_send(grank,len,is_data)) {

    post_send_now(data,shandle,len, grank,is_data);
  } else {
    struct send_fifo_req *new,**evtp;
    new = gmpi_xmalloc(sizeof(*new),"MPI/CH_GM:gmpi_queue_send");
    evtp=&gmpi.send_fifo_head;
    if (!prepend) {
      while (*evtp)
        evtp = &(*evtp)->next;
    }
    new->data = data;
    new->shandle = shandle;
    new->len = len;
    new->grank = grank;
    new->is_data = is_data;
    new->next = *evtp;
    *evtp = new;
    gmpi.num_sends_queued++;
  }
 
  gmpi_unlock();
}


void gmpi_ctlsent_event(void *p)
{
  struct send_control_block *block = (struct send_control_block*)p;
  block->next = gmpi.send_control_root;
  gmpi.send_control_root = block;
  gmpi.stoken += 1;
}

void gmpi_datafrag_complete(MPIR_SHANDLE *shandle)
{
  int gmlen;
  if (shandle->gm.copy) {
    gmlen = GMPI_ROUNDUP(shandle->gm.last_frag_length)+GMPI_SEND_HEAD_LEN+GMPI_NETDATA_HEADER_LEN;
    DMA_FREE(gmpi.port, shandle->gm.copy, gmlen, GMPI_SDMA);
  } else {
    gm_assert_p(shandle->gm.last_frag_length > 0);
    gm_assert_p(shandle->gm.posted == shandle->gm.sent_evt_expected);
    gmpi_unuse_interval((unsigned long)shandle->start + shandle->gm.frag - shandle->gm.last_frag_length,
                        shandle->gm.last_frag_length); 
  }
  shandle->gm.copy = 0;
}

void gmpi_datasent_event(MPIR_SHANDLE *shandle, void *p)
{
  gm_assert_p(shandle);
  gm_assert_p(shandle->gm.posted > 0);
  shandle->gm.posted -= 1;

  if (shandle->gm.sent_evt_expected) {
    /* we received an ack before the sent_evt */
    /* gmpi_free_datafrag has already been called in the ack procedure */
    shandle->gm.sent_evt_expected  -= 1;
  } else {
    gmpi_datafrag_complete(shandle);
  }
  if (shandle->gm.posted == 0 && shandle->gm.frag >= shandle->bytes_as_contig) {
    /*  no message is still in the FIFO, and we have finished sending the whole buffer */
    gm_assert_p(shandle->gm.sent_evt_expected == 0);
    gm_assert_p(!shandle->is_complete);
    shandle->is_complete = 1;
    if (shandle->finish) 
      (shandle->finish)( shandle );
  }
  gmpi.stoken += 1;
}

void *gmpi_ctl_alloc(unsigned len)
{
  struct send_control_block *ptr = gmpi.send_control_root;
  gm_assert_p(len <= gm_max_length_for_size(GMPI_CONTROL_TAG));
  if (ptr) {
    gmpi.send_control_root = ptr->next;
    /* this is not really necessary, as the header field is cleared in send_now */
    ptr->next = 0;
    return GMPI_SEND_NETDATA(ptr);
  }
  ptr = gm_dma_malloc(gmpi.port, GMPI_SEND_HEAD_LEN+gm_max_length_for_size(GMPI_CONTROL_TAG));
  if (!ptr) {
     printf("%s:%d: gm_dma_malloc returned NULL\n",__FILE__,__LINE__);
     gm_abort();
  }
  return GMPI_SEND_NETDATA(ptr);
}

void gmpi_send_control(void *data, long length, unsigned int dest, int prepend)
{
  int gmlen = GMPI_ROUNDUP(length);
  gm_assert_p(dest < MPID_MyWorldSize);
  gm_assert_p(gmlen <= gm_max_length_for_size(GMPI_CONTROL_TAG));
  gmpi_queue_send(GMPI_SEND_HEAD(data), 0, length, dest, 0, prepend);
}

/* this function is derived from MPID_CH_Check_incoming in chchkdev.c */

static int gmpi_ctlrecv_event(MPID_Device *dev,void *buf)
{
    MPID_PKT_T   * pkt_p = buf;
#define pkt (*pkt_p)
    int          from_grank;
    MPIR_RHANDLE *rhandle;
    int          is_posted;
    int          err = MPI_SUCCESS;
    unsigned char *pktp;
     
    if (gmpi.enable_csum && gmpi_checksum(buf,pkt.head.ck.len) != 0xffff) {
      GM_PRINT(0,("mpich-gm checksum error at mpi_node = %d: 0x%x from node %d\n \n", 
		MPID_MyWorldRank, gmpi_checksum(buf,pkt.head.ck.len),pkt.head.src));
      if (gmpi.enable_csum == 1)
	exit(1);
      else{
	err = MPI_ERR_OTHER;
	return err;
      }
    }
    from_grank = pkt.head.src;
    MPID_PKT_UNPACK( &pkt, sizeof(MPID_PKT_HEAD_T), from_grank );

#ifdef GMPI_CREDITS
    gmpi.credits_left[from_grank] += pkt.head.bip_credits;
    gmpi.credits_back[from_grank] += 1;
    if (gmpi.credits_back[from_grank] == GMPI_CREDITS - 1) {
      MPID_PKT_HEAD_T *credit = gmpi_ctl_alloc(sizeof(MPID_PKT_HEAD_T));
      credit->mode = MPID_PKT_CREDITS;
      credit->src = MPID_MyWorldRank;
      gmpi_send_control(credit,sizeof(*credit),from_grank,1);
    }
#endif

    DEBUG_PRINT_PKT("R received message",&pkt);
    
    /* Separate the incoming messages from control messages */
    if (MPID_PKT_IS_MSG(pkt.head.mode)) {
      DEBUG_PRINT_RECV_PKT("R rcvd msg",&pkt);
      
      /* Is the message expected or not? 
	 This routine RETURNS a rhandle, creating one if the message 
	 is unexpected (is_posted == 0) */
      MPID_Msg_arrived( pkt.head.lrank, pkt.head.tag, pkt.head.context_id, 
			&rhandle, &is_posted );

      /* Need the send handle address in order to cancel a send */
      if (!is_posted) {  /* begin if !is_posted */
	if (pkt.head.mode == MPID_PKT_REQUEST_SEND) 
	  rhandle->send_id = pkt.request_pkt.send_id;
	else if (pkt.head.mode == MPID_PKT_SHORT)
	  rhandle->send_id = pkt.short_pkt.send_id; 
	else if (pkt.head.mode == MPID_PKT_LONG)
	  rhandle->send_id = pkt.long_pkt.send_id;
      } 
      
      MPID_DO_HETERO(rhandle->msgrep = (MPID_Msgrep_t)pkt.head.msgrep );
#ifdef MPID_DEBUG_ALL   /* #DEBUG_START# */
      if (MPID_DebugFlag) {
	FPRINTF( MPID_DEBUG_FILE, "[%d]R msg was %s (%s:%d)\n", 
		 MPID_MyWorldRank, 
		 is_posted ? "posted" : "unexpected", __FILE__, __LINE__ );
      }
#endif                  /* #DEBUG_END# */
      if (is_posted) {
	/* We should check the size here for internal errors .... */
	gm_assert_p(rhandle->gm.state == ST_POSTED_CTLWAIT);
	GM_DBG(rhandle->gm.state = ST_POSTED_GOTCTL);
	switch (pkt.head.mode) {
	case MPID_PKT_SHORT:
	  DEBUG_TEST_FCN(dev->short_msg->recv,"dev->short->recv");
	  err = (*dev->short_msg->recv)( rhandle, from_grank, &pkt );
	  break;
	  
	case MPID_PKT_REQUEST_SEND:
	  DEBUG_TEST_FCN(dev->rndv->irecv,"dev->rndv->irecv");
	  err = (*dev->rndv->irecv)( rhandle, from_grank, &pkt );
	  break;

	case MPID_PKT_3WAY_SEND:
	  DEBUG_TEST_FCN(dev->long_msg->irecv,"dev->long_msg->irecv");
	  err = (*dev->long_msg->irecv)( rhandle, from_grank, &pkt );
	  DEBUG_PRINT_PKT("R received message: 3WAY SEND! ",&pkt);
	  break;
	  
	case MPID_PKT_LONG:
	  DEBUG_TEST_FCN(dev->eager->irecv,"dev->eager->irecv");
	  err = (*dev->eager->irecv)( rhandle, from_grank, &pkt );
	  break;
	  
	default:
	  fprintf( stderr, 
		   "[%d] Internal error: msg packet discarded (%s:%d)\n",
		   MPID_MyWorldRank, __FILE__, __LINE__ );
	  fflush( stderr );
	}
      }
      else {
	GM_DBG(rhandle->gm.state = ST_PROXY_GOTCTL);
	switch (pkt.head.mode) {
	case MPID_PKT_SHORT:
	  DEBUG_TEST_FCN(dev->short_msg->unex,"dev->short->unex");
	  err = (*dev->short_msg->unex)( rhandle, from_grank, &pkt );
	  break;
	case MPID_PKT_REQUEST_SEND:
	  DEBUG_TEST_FCN(dev->rndv->unex,"dev->rndv->unex");
	  err = (*dev->rndv->unex)( rhandle, from_grank, &pkt );
	  break;
	  
	case MPID_PKT_3WAY_SEND:
	  DEBUG_TEST_FCN(dev->long_msg->unex,"dev->long_msg->unex");
	  err = (*dev->long_msg->unex)( rhandle, from_grank, &pkt );
	  DEBUG_PRINT_PKT("R received message: 3WAY SEND",&pkt);
	  break;
	  
	case MPID_PKT_LONG:
	  DEBUG_TEST_FCN(dev->eager->unex,"dev->eager->unex");
	  err = (*dev->eager->unex)( rhandle, from_grank, &pkt );
	  break;
	  
	default:
	  fprintf( stderr, 
		   "[%d] Internal error: msg packet discarded (%s:%d)\n",
		   MPID_MyWorldRank, __FILE__, __LINE__ );
	}
      }
    }
    else {
      switch (pkt.head.mode) {
      case MPID_PKT_OK_TO_SEND:
	DEBUG_TEST_FCN(dev->rndv->do_ack,"dev->rndv->do_ack");
	err = (*dev->rndv->do_ack)( &pkt, from_grank );
	break;

      case MPID_PKT_ANTI_SEND:
	MPID_SendCancelOkPacket( &pkt, from_grank ); 
	break;
            
      case MPID_PKT_ANTI_SEND_OK:
	MPID_RecvCancelOkPacket( &pkt, from_grank ); 
	break;
            
#ifdef MPID_FLOW_CONTROL
      case MPID_PKT_FLOW:
	MPID_RecvFlowPacket( &pkt, from_grank );
	break;
#endif

#ifdef MPID_PACK_CONTROL
      case MPID_PKT_PROTO_ACK:
      case MPID_PKT_ACK_PROTO:
	MPID_RecvProtoAck( &pkt, from_grank );
	break;
#endif

#ifdef GMPI_CREDITS
      case MPID_PKT_CREDITS:
        break;
#endif
      default:
	fprintf( stdout, "gmpi:[%d] Packet type %d (0x%x) is unknown %s:%d!\n", 
		 MPID_MyWorldRank, pkt.head.mode, pkt.head.mode,
		 __FILE__, __LINE__ );
	pktp = (unsigned char *)&pkt;
	fprintf( stdout, "gmpi: Packet dump: 0x%x%x%x%x%x%x%x%x\n", 
		pktp[0],pktp[1],pktp[2],pktp[3],pktp[4],pktp[5],pktp[6],pktp[7]);
	if ((pktp[0] == 0xaa) && (pktp[1] == 0xaa)  &&
	    (pktp[2] == 0xaa) && (pktp[3] == 0xaa)) {
	   fprintf(stdout,"gmpi: GM couldn't translate a virtual address\n");
	   gm_abort();
	}
      }
      /* Really should remember error in case subsequent events are 
	 successful */
    }
    DEBUG_PRINT_MSG("Exiting check_incoming");
    return err;
#undef pkt
}

void gmpi_free_databuf(void *buf,int length)
{
  gm_assert_p(length == GMPI_ROUNDUP(length));
  DMA_FREE(gmpi.port, (char*)buf-GMPI_NETDATA_HEADER_LEN, length+GMPI_NETDATA_HEADER_LEN, GMPI_RDMA);
}




static void gmpi_datarecv_event(int tag, void *buf, int rlength)
{
  MPIR_RHANDLE *rhandle;
  struct gmpi_tag *tagp;
  gm_assert_p(tag >= 0 && tag < GMPI_NTAGS);
  tagp = gmpi.tags+tag;
  rhandle = tagp->rhandle;
  tagp->rhandle = 0;
  gm_assert_p(tagp->next == 0);
  tagp->next = gmpi.free_tag;
  gmpi.free_tag = tagp;
  gm_assert_p(rhandle);
  if (!rhandle->gm.netbuf) {
    gmpi_unuse_interval((unsigned long)rhandle->buf+rhandle->gm.frag,rhandle->gm.frag_length);
  }
  gm_assert_p(( rhandle->gm.netbuf && (unsigned long)rhandle->gm.netbuf == (unsigned long)(char*)buf + GMPI_NETDATA_HEADER_LEN) ||
	      ( !rhandle->gm.netbuf && (unsigned long)buf == (unsigned long)rhandle->buf + rhandle->gm.frag));
  {
    char *p = buf;
    GM_PRINT(2,("receive packet:%2x:%2x:%2x:%2x\n",p[0],p[1],p[2],p[3]));
  }
#if 0
  if (rhandle->gm.netbuf != p ) {
    fprintf(stderr,"debug notice: rhandle buffers have been mixed\n");
    fflush(stderr);
  }
#endif
  rlength -= GMPI_NETDATA_HEADER_LEN;
  
#if GM_DEBUG
  gm_assert_p(MPID_Dequeue(&r_acksent,rhandle) == MPI_SUCCESS);
#endif
  rhandle->gm.recvcallback(rhandle,rlength);
  gmpi.rtoken += 1;
}

int  MPID_CH_Check_incoming(MPID_Device *dev,MPID_BLOCKING_TYPE blocking)
{

  struct send_fifo_req *sreq;
  struct recv_fifo_req *rreq;
  int ret;

  gmpi_lock();

  gm_assert_p(gmpi.stoken >= 0);

  sreq = gmpi.send_fifo_head;
  while (((sreq = gmpi.send_fifo_head)) && able_to_send(sreq->grank,sreq->len,sreq->is_data)) {
    post_send_now(sreq->data,sreq->shandle,sreq->len, sreq->grank, sreq->is_data);
    gmpi.send_fifo_head = sreq->next;
    free(sreq);
    gmpi.num_sends_queued--;
  }
  while (((rreq = gmpi.recv_fifo_head)) && able_to_recv(rreq->len)) {
    MPID_RNDV_T tag;
    post_recv_now(rreq->rhandle,rreq->len,&tag);
    gm_assert_p(rreq->callback);
    if (rreq->callback) {
      MPIR_RHANDLE *rhandle = rreq->rhandle;
      gm_assert_p(rhandle->gm.state == ST_POSTED_ACKQUEUED || rhandle->gm.state == ST_PROXY_ACKQUEUED);
      (rreq->callback)(rhandle,tag);
#if GM_DEBUG
      gm_assert_p(MPID_Enqueue(&r_acksent,0,0,0,rhandle) == MPI_SUCCESS);
#endif
    }
    gmpi.recv_fifo_head = rreq->next;
    free(rreq);
  }

  ret =  gmpi_net_lookup(dev,blocking);
  gmpi_unlock();
  return ret;
}

#if !GM_DISABLE_ALARM
static void alarm_init(void)
{
  struct itimerval val;
  struct sigaction sa;
  int sig_ret;
  int timer_ret;
  pthread_t child;
  int cc;

  cc = pthread_cond_init(&gmpi.alarm_wake,NULL);
  gm_always_assert(cc == 0);
  cc = pthread_cond_init(&gmpi.alarm_ack,NULL);
  gm_always_assert(cc == 0);
  cc = pthread_mutex_init(&gmpi.alarm_mutex,NULL);
  gm_always_assert(cc == 0);
  if (pthread_create(&child,NULL,second_thread,(void*)0) != 0) {
    GM_PRINT(0,("error creating second thread\n"));
    exit(1);
  }
  while (!second_thread_started) sched_yield();
  pthread_mutex_lock(&gmpi.alarm_mutex);

  val.it_value.tv_sec = 0;
  val.it_value.tv_usec = 40000;
  val.it_interval.tv_sec = 0;
  val.it_interval.tv_usec = 1000;
  timer_ret = setitimer(ITIMER_VIRTUAL,&val,0);
  gm_always_assert(timer_ret == 0);

  sa.sa_handler = alarm_handler;
  sa.sa_flags = 0;
  sigemptyset(&sa.sa_mask);
  sig_ret = sigaction(SIGVTALRM,&sa,0);
  gm_always_assert(sig_ret == 0);
}
#endif


void gmpi_init(int *argc,char ***argv)
{
  int i;

#if GM_DEBUG
  r_acksent.first = 0;
  r_acksent.lastp = &r_acksent.first;
#endif
  gmpi.max_dma_bytes[GMPI_RDMA] = GMPI_MAX_DMA_BYTES;
  gmpi.max_dma_bytes[GMPI_SDMA] = GMPI_MAX_DMA_BYTES;

  /* initialize tags */
  gmpi.tags[0].rhandle = 0;
  gmpi.tags[0].num = 0;
  gmpi.tags[0].next = 0;
  for (i=1;i<GMPI_NTAGS;i++) {
    gmpi.tags[i].rhandle = 0;
    gmpi.tags[i].num = i;
    gmpi.tags[i].next = &gmpi.tags[i-1];
  }
  gmpi.free_tag = &gmpi.tags[GMPI_NTAGS-1];


  gmpi.stokens[0].shandle = 0;
  gmpi.stokens[0].buf = 0;
  gmpi.stokens[0].next = 0;
  for (i=1;i<GMPI_NSTOKEN;i++) {
    gmpi.stokens[i].shandle = 0;
    gmpi.stokens[i].buf = 0;
    gmpi.stokens[i].next = &gmpi.stokens[i-1];
  }
  gmpi.free_stoken = &gmpi.stokens[GMPI_NSTOKEN-1];
  gmpi.used_stoken = 0;

  gmpi_subinit();
  gmpi.stoken = gmpi.max_stoken;
  gmpi.num_sends_queued = 0;
#ifdef GMPI_CREDITS
  for (i=0;i< MPID_MyWorldSize;i++)
    gmpi.credits_left[i] = GMPI_CREDITS;
#endif

#if !GM_DISABLE_ALARM
  alarm_init();
#endif

}


void gmpi_finish(MPID_Device * dev)
{
  while (gmpi.send_fifo_head || gmpi.recv_fifo_head || gmpi.stoken < gmpi.max_stoken) {
    (*dev->check_device)( dev, MPID_BLOCKING );
  }
  gmpi_clear_all_intervals();
  gm_close(gmpi.port);
#if 0
  fprintf(stderr,"gmpi.busy=%d\n",gmpi.busy);
#endif
}


void *gmpi_alloc(gmpi_port p, int l, int res)
{
  gm_assert_p(GMPI_ROUNDUP(l) == l);
  gm_assert_p(l > 0);
  gmpi.dma_bytes[res] += l;
  gm_assert_p(gmpi.dma_bytes[res] <= gmpi.max_dma_bytes[res]*2);
  gmpi.dma_nballoc += 1;
#if 0
  printf("alloc for %d: %d bytes\n",res,l);
#endif
  return gm_dma_malloc(p,l);
}

void gmpi_free(gmpi_port p, void *addr, int l, int res)
{
  int i;
  gm_assert_p(GMPI_ROUNDUP(l) == l);
  gm_assert_p(l > 0);
  gm_assert_p(gmpi.dma_nballoc > 0);
  gm_assert_p(gmpi.dma_bytes[res] <= gmpi.max_dma_bytes[res]*2);
  gmpi.dma_nballoc -= 1;
  gmpi.dma_bytes[res] -= l;
  gm_assert_p(gmpi.dma_bytes[res] >= 0);
#if 0
  printf("freeing for %d: %d bytes\n",res,l);
#endif
  gm_dma_free(p,addr);
}

void gmpi_printf (char *format, ...)
{
  va_list ap;
  
  va_start (ap, format);
  vfprintf (stderr,format, ap);
  va_end (ap);
}

void MPID_SetChecksumFlag (int enable)
{
  gmpi.enable_csum = enable;
}

extern double* time_alive;

void MPID_SetStarttimeFlag (int enable)
{
  int i;
  if (MPID_MyWorldRank == 0){
    printf("Time alive message received from each node:\n");
    printf("    Node        Time\n");
    for (i = 1; i < MPID_MyWorldSize ; i++)
      printf("    %d        %f\n",i, time_alive[i]);
  }
  free(time_alive);
}


unsigned gmpi_checksum(unsigned char *buf, int len)
{
  unsigned int csum=~0;
#if 0
  csum = csum_partial(buf,len,0);
#else
  int wlen,blen;
  union {
    unsigned char b[2];
    unsigned short w;
  } conv;
  int i;
  unsigned *wbuf;
  int swap = 0;

  if (len >= 1 && ((long)buf & 1)) {
    conv.b[0] = 0;
    conv.b[1] = buf[0];
    csum += conv.w;
    len -= 1;
    buf += 1;
    swap = 1;
  }
  if (len >= 2 && ((long)buf & 2)) {
    csum += *(unsigned short*)buf;
    len -= 2;
    buf += 2;
  }
  wbuf = (unsigned *)buf;
  wlen = len / 4;
  for (i=0;i<wlen;i++) {
    csum += (wbuf[i]>>16)+(wbuf[i]&0xffff);
    if ((i % 8192) == 0)
      csum = (csum>>16)+(csum&0xffff);
  }
  if (len & 2)
    csum += *(unsigned short*)(buf+wlen*4);
  if (len & 1) {
    conv.b[0] = buf[len-1];
    conv.b[1] = 0;
    csum += conv.w;
  }
  /* put the len into the checksum to help catch zero padding */
  csum += len;

  if (swap)
    csum = ((csum & 0xffff)<<8) + ((csum & 0xffff0000) >> 8);
#endif
  csum = (csum>>16)+(csum&0xffff);
  /* carry */
  csum = (csum>>16)+(csum&0xffff);
  return csum;
}

void gmpi_lock(void)
{
  gmpi.busy += 1;
}

void gmpi_unlock(void)
{
  gmpi.busy -= 1;
}



void *gmpi_xmalloc(int len, char *msg)
{
  void *res = malloc(len);
  if (!res) {
    MPID_Abort((struct MPIR_COMMUNICATOR *)0, 1, msg,"out of memory");
    exit(1);
  }
  return res;
}

#ifndef BIP
#include "mpigm.c"
#else
#include "mpibip.c"
#endif
