/******************************************************************-*-c-*-
 * Myricom GM networking software and documentation                      *
 * Copyright (c) 1998 by Myricom, Inc.                                   *
 * All rights reserved.  See the file `COPYING' for copyright notice.    *
 *************************************************************************/

/* author: glenn@myri.com */

#include "gm.h"
#include <stdio.h>
#include <string.h>

#define GM_DEBUG_DIRSEND 0

#define MAX_ALIGN GM_DMA_GRANULARITY
#define BUF_LEN (6 * 8192)
#define ROUNDUP(n,m) (((unsigned long)(n)+(unsigned long)(m)-1)&~((unsigned long)(m)-1))
#define GM_PORT_ID 2

unsigned char *src_buf, *dest_buf;
int errors = 0;
struct gm_port *port = NULL;
void *sbuf, *rbuf;
unsigned int this_node_id;



#define CLOSE_IF_OPEN(myport)  \
{                              \
   if (myport)                 \
      {                        \
        gm_close(myport);      \
        myport = NULL;         \
      }                        \
}


/* Perform the equivalent of bcopy using gm_directed_send() */

/****************************************************************
 * Perform a bcopy using gm_directed_send().  This is coded so
 * that gm_bcopy() may be used instead for debugging purposes.
 ****************************************************************/

static void
my_bcopy (const void *src, void *dest, long len)
{
  int done;
  gm_recv_event_t *e;

  gm_always_assert (((unsigned char *) src)[-1] == 0xff);
  gm_always_assert (((unsigned char *) src)[len] == 0xff);

#if 1
  /* Allocate a send token and do a bcopy with gm_directed_send.  If
     we have to wait for a send token, we should not receive any event
     types that we need to process while waiting. */

  while (!gm_alloc_send_token (port, GM_LOW_PRIORITY))
    gm_unknown (port, gm_blocking_receive (port));
  gm_directed_send (port,
		    (void *) src,
		    (gm_remote_ptr_t) (gm_up_t) dest,
		    len, GM_LOW_PRIORITY, this_node_id, GM_PORT_ID);
#else
  gm_assert (src);
  gm_assert (dest);
  gm_assert (len);
  gm_bcopy ((void *) src, (void *) dest, len);
#endif

  /* Follow the directed send with notification that that the directed
     send has completed.  We have to do this because there is no
     explicit notification of directed sent completion on either the
     send or the recv end.

     Don't forget to allocate a send token first, as above. */

  while (!gm_alloc_send_token (port, GM_LOW_PRIORITY))
    gm_unknown (port, gm_blocking_receive (port));
  gm_send_to_peer (port, sbuf, 5, 16, GM_LOW_PRIORITY, this_node_id);

  /* wait for the the send and receive to complete by waiting for a
     GM_SENT_EVENT and a GM_FAST_PEER_RECV_EVENT.  Normal GM programs
     don't normally perform synchronous sends like this, especially to
     the same node!  However, this is just a data integrity test, so
     KISS. */

  done = 0;
  while (done < 2)
    {
      e = gm_blocking_receive (port);
      switch (GM_RECV_EVENT_TYPE (e))
	{
	case GM_SENT_EVENT:
	  /* free the send token used for the send */
	  gm_free_send_token (port, GM_LOW_PRIORITY);
	  done++;
	  break;

	case GM_FAST_PEER_RECV_EVENT:
	  /* recycle the receive buffer */
	  gm_provide_receive_buffer (port, rbuf, 5, GM_LOW_PRIORITY);
	  done++;
	  break;

	default:
	  gm_unknown (port, e);
	}
    }
}

/****************************************************************
 * Perform a test.
 ****************************************************************/

static int
test (int src_start_align, int src_end_align, int dest_start_align,
      void (*copyfn) (const void *, void *, long))
{
  unsigned char *src, *dest;
  unsigned char *p;
  int i, len, corrupt = 0;

  /* compute source and destination. */

  src = src_buf + src_start_align;
  len = src_end_align - src_start_align;
  dest = dest_buf + dest_start_align;

  if (GM_DEBUG_DIRSEND)
    {
      gm_printf ("src= %lx  ", (long) src & (MAX_ALIGN - 1));
      gm_printf ("dest= %lx  ", (long) dest & (MAX_ALIGN - 1));
      gm_printf ("len= 0x%x\n", len);
      fflush (stdout);
    }

  /* Fill the unused src buffer with 0xff.  Fill the used buffer
     with 254,253,252,...3,2,1,254,253,...  Clear the destination buffer. */

  p = src_buf - MAX_ALIGN;
  while (p < src)
    *p++ = 0xff;
  i = 254;
  while (p < src + len)
    {
      if (GM_DEBUG_DIRSEND)
	printf (" %02x", i & 0xff);
      *p++ = i--;
      if (i == 0)
	i = 254;
    }
  if (GM_DEBUG_DIRSEND)
    printf ("\n");
  while (p < src_buf + BUF_LEN + MAX_ALIGN)
    *p++ = 0xff;
  gm_bzero (dest_buf - MAX_ALIGN, BUF_LEN + 2 * MAX_ALIGN);

  /* Copy the packet from the source to the corresponding position in
     the destination buffer. */

  copyfn (src, dest, len);
  
  /* Verify that the bytes were copied to the destination */

  p = dest_buf - MAX_ALIGN;
  while (p < dest)
    {
      if (*p != 0x0)
	{
	  gm_printf ("dest[0x%lx] = 0x%02x != 0 (error before packet)\n",
		     (unsigned long) (p - dest), *p);
	  corrupt++;
	}
      p++;
    }
  while (p < dest + len)
    {
      if (*p != src[p - dest])
	{
	  gm_printf
	    ("dest[0x%lx] = 0x%02x != src[0x%lx] = 0x%02x (error in packet)\n",
	     (unsigned long) (p - dest), *p,
	     (unsigned long) (p - dest), src[p - dest]);
	  corrupt++;
	}
      p++;
    }
  while (p <= dest_buf + BUF_LEN)
    {
      if (*p != 0)
	{
	  gm_printf ("dest[0x%lx] = 0x%02x != 0 (error after packet)\n",
		     (unsigned long) (p - dest), *p);
	  corrupt++;
	}
      p++;
    }

  /* print stats */

  if (corrupt)
    {
      errors += corrupt;
      gm_printf ("src_start_align = %d (0x%x) src_end_align = %d (0x%x)\n",
		 src_start_align,  src_start_align, src_end_align,src_end_align);
      gm_printf ("dest_start_align = %d (0x%x)\n", dest_start_align,dest_start_align);
      gm_printf ("****************************************************\n");
    }
  else if (GM_DEBUG_DIRSEND)
    {
      gm_printf ("good (0x%x,0x%x)->(0x%x,??)\n", src_start_align,
		 src_end_align, dest_start_align);
    }
  fflush (stdout);

  if (corrupt)
    {
      CLOSE_IF_OPEN (port);
      gm_exit (GM_MEMORY_FAULT);
    }
  return 0;
}

/****************************************************************
 * Function to allocate DMAable memory.  We use memory registration
 * under Windows NT to test this feature.
 ****************************************************************/

static void *
my_malloc (long len)
{
#ifdef WIN32
  void *p;

  p = gm_malloc (len);
  if (!p)
    goto abort_with_nothing;
  if (gm_register_memory (port, p, len) != GM_SUCCESS)
    goto abort_with_p;
  return p;

abort_with_p:
  gm_free (p);
abort_with_nothing:
  return 0;
#else
  return gm_dma_calloc (port, 1, len);
#endif
}

/****************************************************************
 * 
 ****************************************************************/

static void
usage (void)
{
  printf ("\nusage: gm_dirsend [-B board]\n");

  printf ("-B #   specifies board number (default = 0)\n");
  printf ("\n");
  CLOSE_IF_OPEN (port);
  gm_exit (GM_FAILURE);
}

static int
gm_dirsend (int argc, char *argv[])
{
  char *orig_src_buf, *orig_dest_buf;
  gm_status_t status;
  int board_id = 0;

  /****************
   * Initialize
   ****************/

  for (argc--, argv++; argc; argc--, argv++)
    {
      if (strcmp (*argv, "-B") == 0)
	{
	  argc--, argv++;
	  if (!argc)
	    {
	      printf ("Board number expected after '-B'.\n");
	      usage ();
	    }
	  if (sscanf (*argv, "%i", &board_id) != 1
	      || board_id < 0 || board_id > 1000)
	    {
	      printf ("bad board number: %d\n", board_id);
	      usage ();
	    }
	  printf ("Will use board number %d\n", board_id);
	  continue;
	}
    }


  port = NULL;
  status = gm_open (&port, board_id, GM_PORT_ID, "direct_send test",
		    GM_API_VERSION_1_0);
  if (status != GM_SUCCESS)
    {
      gm_perror ("could not open port", status);
      CLOSE_IF_OPEN (port);
      gm_exit (status);
    }

  status = gm_allow_remote_memory_access (port);
  if (status != GM_SUCCESS)
    {
      gm_perror ("could not enable directed sends", status);
      CLOSE_IF_OPEN (port);
      gm_exit (status);
    }

  status = gm_get_node_id (port, &this_node_id);
  if (status != GM_SUCCESS)
    {
      gm_perror ("could not get node id for this node", status);
      CLOSE_IF_OPEN (port);
      gm_exit (status);
    }

  status = gm_set_acceptable_sizes (port, GM_LOW_PRIORITY, 0xffff);
  if (status != GM_SUCCESS)
    {
      gm_perror ("could not set GM acceptable sizes", status);
      CLOSE_IF_OPEN (port);
      gm_exit (status);
    }

  /* let GM manage the send tokens */
  gm_free_send_tokens (port, GM_LOW_PRIORITY, gm_num_send_tokens (port));

  sbuf = gm_dma_calloc (port, 1, 32);
  if (!sbuf)
    {
      fprintf (stderr, "Could not allocate send buffer.\n");
      CLOSE_IF_OPEN (port);
      gm_exit (status);
    }

  rbuf = gm_dma_calloc (port, 1, 32);
  if (!rbuf)
    {
      fprintf (stderr, "Could not allocate recv buffer.\n");
      CLOSE_IF_OPEN (port);
      gm_exit (GM_OUT_OF_MEMORY);
    }
  gm_provide_receive_buffer (port, rbuf, 5, GM_LOW_PRIORITY);

  orig_src_buf = (char *) my_malloc (BUF_LEN + 8192 + MAX_ALIGN);
  if (!orig_src_buf)
    {
      fprintf (stderr, "Could not allocate source buffer.\n");
      CLOSE_IF_OPEN (port);
      gm_exit (GM_OUT_OF_MEMORY);
    }
  src_buf = (unsigned char *) ROUNDUP (orig_src_buf, MAX_ALIGN);
  src_buf += 8192;
  gm_assert (GM_DMA_ALIGNED (src_buf));

  orig_dest_buf = (char *) my_malloc (BUF_LEN + 8192 + MAX_ALIGN);
  if (!orig_dest_buf)
    {
      fprintf (stderr, "Could not allocate destination buffer.\n");
      CLOSE_IF_OPEN (port);
      gm_exit (GM_OUT_OF_MEMORY);
    }
  dest_buf = (unsigned char *) ROUNDUP (orig_dest_buf, MAX_ALIGN);
  dest_buf += 8192;
  gm_assert (GM_DMA_ALIGNED (dest_buf));

  /****************
   * Run tests.
   ****************/

  {
    int src_start, src_end, dst_start;
    unsigned int page_len;

    page_len = 4096;

    for (dst_start = -MAX_ALIGN + 1; dst_start < MAX_ALIGN; dst_start++)
      {
	gm_printf ("testing destination alignment %d.\n", dst_start);
	for (src_start = -MAX_ALIGN + 1; src_start < MAX_ALIGN; src_start++)
	  {
	    gm_printf ("testing source alignment %d.\n", src_start);
	    for (src_end = -MAX_ALIGN + 1; src_end < MAX_ALIGN; src_end++)
	      {
		if (src_start < src_end)
		  test (src_start, src_end, dst_start, my_bcopy);
		test (src_start, 1 * page_len + src_end, dst_start, my_bcopy);
		test (src_start, 2 * page_len + src_end, dst_start, my_bcopy);
		test (src_start, 3 * page_len + src_end, dst_start, my_bcopy);
		test (src_start, 4 * page_len + src_end, dst_start, my_bcopy);
		test (src_start, 5 * page_len + src_end, dst_start, my_bcopy);
	      }
	  }
      }
  }

  /****************
   * Report errors.
   ****************/

  gm_printf ("%d errors\n", errors);

  gm_close (port);
  port = NULL;
  return 0;
}

/****************************************************************
 * entry point
 ****************************************************************/

#define GM_MAIN gm_dirsend
#include "gm_main.h"


/*
  This file uses GM standard indentation:

  Local Variables:
  c-file-style:"gnu"
  tab-width:8
  c-backslash-column:72
  End:
*/
