/* This program collects timing statistics for node-to-node message passing
 * 
 * To measure performance of native send/receive, compile as
 *		Paragon:	icc -nx -O -DPGON_NODE nntime.c
 * 		iPSC/860:	icc -O -DI860_NODE nntime.c -node
 *		CM5(CMMD):  cc -c -DCM5_NODE nntime.c; cmmd-ld -comp cc -node nntime.o
 */

#include <stdio.h>

#if !defined(CNVX) && !defined(CNVXN)
#include <memory.h>
#endif

#ifdef CM5_NODE
#include <cm/cmmd.h>
#include <cm/timers.h>
#endif

#ifdef SUNMP
#include <sys/types.h>
#include <sys/processor.h>
#include <sys/procset.h>
#endif

#ifdef PVM
#include "pvm3.h"
#endif

#define NPROC	4
#define SAMPLE	100		/* sample rate */
#define MAXSIZE	100000	/* must be a power of 10 */
#define ENCODE	PvmDataRaw
#define ENCODE2	PvmDataInPlace


#ifdef CM5_NODE

#include <cm/cmmd.h>
#define TIMER_CLEAR		CMMD_node_timer_clear(1) 
#define TIMER_START		CMMD_node_timer_start(1) 
#define TIMER_STOP		CMMD_node_timer_stop(1) 
#define TIMER_ELAPSED	CMMD_node_timer_elapsed(1) 

#endif /*CM5_NODE*/

#if defined(PGON_NODE) || defined(I860_NODE)

double tstart, tstop, dclock();
#define TIMER_CLEAR		(tstart = tstop = 0)
#define TIMER_START		(tstart = dclock())
#define TIMER_STOP		(tstop = dclock())
#define TIMER_ELAPSED	(tstop - tstart)

#endif /*PGON_NODE/I860_NODE*/

#if !defined(PGON_NODE) && !defined(I860_NODE) && !defined(CM5_NODE)

#include <sys/time.h>
struct timeval tv1, tv2;
#define TIMER_CLEAR     (tv1.tv_sec = tv1.tv_usec = tv2.tv_sec = tv2.tv_usec =0)
#define TIMER_START     gettimeofday(&tv1, (struct timezone*)0)
#define TIMER_STOP      gettimeofday(&tv2, (struct timezone*)0)
#define TIMER_ELAPSED	(tv2.tv_sec-tv1.tv_sec+(tv2.tv_usec-tv1.tv_usec)*1.E-6)

#endif 

int mytid, momtid;

main(argc, argv)
int argc;
char *argv[];
{
	int kidtid;
	int ntask = 0, i; 
	char *grpname;
	char **tmp_argv;
#if defined(SP2MPI_NODE)
        int info, ntasks;
        struct pvmtaskinfo *taskp;
#endif /*SP2MPI_NODE*/

#if defined(PVM) && !defined(SP2MPI_NODE)

	if ((mytid = pvm_mytid()) < 0)
		exit(-1);
	momtid = pvm_parent();
	pvm_setopt(PvmRoute, PvmRouteDirect);	

/*
 *  --- Create a unique group name from the PID of the parent.
 *  --- The parent will pass the group name to the child through cmd line
 */

	if(argc > 1)
		grpname = argv[1];
	else {
		grpname = (char *) malloc (20);
		sprintf(grpname,"NNTIME_%d", getpid());
	}

	if((ntask = pvm_joingroup(grpname)) < 0) {
		pvm_perror("Error joining nntime group");
		exit(-1);
	}

	switch (ntask) {
	case 0: 
		tmp_argv = (char **) malloc (sizeof (char *)*2);
		tmp_argv[0] = grpname;
		tmp_argv[1] = 0;
		pvm_spawn("nntime", tmp_argv, 0, "", 1, &kidtid);
		pingpong(kidtid);
		break;
	case 1:
		pingpong(momtid);                                       /* she's mom */
		break;
	default:
		printf("Too many 'nntime' processes running: %d\n", ntask);
	}

	pvm_lvgroup(grpname);
	pvm_exit();

#else /*PVM*/

#if defined(SP2MPI_NODE)
#define PROGRAM "nntime"

	if ((mytid = pvm_mytid()) < 0)
		exit(-1);
	momtid = pvm_parent();
	pvm_setopt(PvmRoute, PvmRouteDirect);	
	info = pvm_tasks(0, &ntasks, &taskp);
	if (!info) {
		for (i=0;i < ntasks;i++) {
			if ((strcmp(taskp[i].ti_a_out,PROGRAM) == 0) && 
			    (mytid != taskp[i].ti_tid)){
				pingpong(taskp[i].ti_tid);
			}
		}
	}
	pvm_exit();

#endif /*SP2MPI_NODE*/

#if defined(PGON_NODE) || defined(I860_NODE)

	if (mytid = mynode())
		pingpong(0);
	else
		pingpong(1);

#endif /*I860_NODE*/

#ifdef CM5_NODE

	CMMD_fset_io_mode(stdout, CMMD_independent);
	if ((mytid = CMMD_self_address()) == 1)
		pingpong(0);
	if ((mytid = CMMD_self_address()) == 0)
		pingpong(1);
	CMMD_all_msgs_wait();

#endif /*CM5_NODE*/

#endif /*PVM*/

	exit(0);
}


#ifdef PVM

void
validate(data, size)
	double data[];
	int size;
{
	int i;

	for (i = 0; i < size; i++)
		if ((i*i - data[i]) > 0.01) {
			printf("error: data[%d] = %g\n", i, data[i]);
			break;
		}

	if (i == size)
		printf("t%x: %d doubles received correctly\n\n\n", mytid, i);
}

#endif

/* exchange messages and measure the transit time */
pingpong(hertid)
	int hertid;
{
	int n, size;
	static double data[MAXSIZE];
	char str[32];
	int t;

	/* test node-to-node send */

	if (mytid > hertid) {

		for (n = 0; n < MAXSIZE; n++)
			data[n] = n*n;
#ifdef PVM
#ifdef PACK
		pvm_recv(-1, 0);
		pvm_upkstr(str);
		printf("t%x: %s\n", mytid, str);
		pvm_upkdouble(data, MAXSIZE, 1);
		validate(data, MAXSIZE);
		pvm_initsend(ENCODE);
		sprintf(str, "%d doubles from t%x", MAXSIZE, mytid);
		pvm_pkstr(str);
		pvm_pkdouble(data, MAXSIZE, 1);
		pvm_send(hertid, 0);
#else
		pvm_precv(-1, 0, data, MAXSIZE, PVM_DOUBLE, (int*)0, (int*)0, (int*)0);
		validate(data, MAXSIZE);
		pvm_psend(hertid, 0, data, MAXSIZE, PVM_DOUBLE);
#endif /*PACK*/
		
/*
		pvm_freebuf(pvm_setsbuf(0));
		pvm_freebuf(pvm_setrbuf(0));
*/
#endif /*PVM*/
	
/*
#ifdef SUNMP
if (processor_bind(P_PID, P_MYID, 2, NULL) == -1)
	perror("processor_bind");
#endif
*/
		for (size = 0; size <= MAXSIZE; size = 10*(size ? size : 1))
			for (n = 0; n < SAMPLE; n++) {
#ifdef PVM
#ifdef PACK
				pvm_recv(-1, 0);
				pvm_initsend(ENCODE);
				pvm_pkdouble(data, size, 1);
				pvm_send(hertid, 0);
#else
				pvm_precv(-1, -1, data, MAXSIZE, PVM_DOUBLE, (int*)0, 
					(int*)0, (int*)0);
				pvm_psend(hertid, size, data, size, PVM_DOUBLE);
#endif /*PACK*/
#else /*PVM*/
#if defined(PGON_NODE) || defined(I860_NODE)
				crecv(-1, (char *)data, MAXSIZE*sizeof(double));
				csend(size, data, size*sizeof(double), hertid, 0);
#endif
#ifdef CM5_NODE
				CMMD_receive_block(CMMD_ANY_NODE, CMMD_ANY_TAG, (char *)data, 
					MAXSIZE*sizeof(double));
				CMMD_send_block(hertid, size, data, size*sizeof(double));
#endif
#endif /*PVM*/
			}

	} else {

		for (n = 0; n < MAXSIZE; n++)
			data[n] = n*n;
#ifdef PVM
#ifdef PACK
		pvm_initsend(ENCODE);
		sprintf(str, "%d doubles from t%x", MAXSIZE, mytid);
		pvm_pkstr(str);
		pvm_pkdouble(data, MAXSIZE, 1);
		pvm_send(hertid, 0);
		pvm_recv(-1, 0);
		pvm_upkstr(str);
		printf("t%x: %s\n", mytid, str);
		pvm_upkdouble(data, MAXSIZE, 1);
#else
		pvm_psend(hertid, 0, data, MAXSIZE, PVM_DOUBLE);
		pvm_precv(-1, 0, data, MAXSIZE, PVM_DOUBLE, (int*)0, (int*)0, (int*)0);
#endif /*PACK*/
		validate(data, MAXSIZE);
/*
		pvm_freebuf(pvm_setsbuf(0));
		pvm_freebuf(pvm_setrbuf(0));
*/
#endif

		/* do timing measurements */
		puts("Node-to-node Send/Ack\n");
		for (size = 0; size <= MAXSIZE; size = 10*(size ? size : 1)) {

#ifdef TIMER_CLEAR

/*
#ifdef SUNMP
if (processor_bind(P_PID, P_MYID, 0, NULL) == -1)
	perror("processor_bind");
#endif
*/
			TIMER_CLEAR;
			TIMER_START;
			for (n = 0; n < SAMPLE; n++) {
#ifdef PVM
#ifdef PACK
				pvm_initsend(ENCODE);
				pvm_pkdouble(data, size, 1);
				pvm_send(hertid, 0);
				pvm_recv(-1, 0);
#else
				pvm_psend(hertid, size, data, size, PVM_DOUBLE);
				pvm_precv(-1, -1, data, MAXSIZE, PVM_DOUBLE, (int*)0, 
					(int*)0, (int*)0);
#endif /*PACK*/
#else /*PVM*/
#if defined(PGON_NODE) || defined(I860_NODE)
				csend(size, (char *)data, size*sizeof(double), hertid, 0);
				crecv(-1, data, MAXSIZE*sizeof(double));
#endif 
#ifdef CM5_NODE
				CMMD_send_block(hertid, size, data, size*sizeof(double));
				CMMD_receive_block(CMMD_ANY_NODE, CMMD_ANY_TAG, data, 
					MAXSIZE*sizeof(double));
#endif
#endif /*PVM*/
			}
			TIMER_STOP;
			t = 1000000*TIMER_ELAPSED/SAMPLE;
			printf("Roundtrip T = %d (us)  (%.4f MB/s)      Data size: %d\n",
				t, 2.0*8.0*(float)size/(float)t, sizeof(double)*size);
		}

#endif /*TIMER_CLEAR*/

/* sleep(1); */

	}
}


#ifdef PVM

/* measure packet transit time */
time_one(size, dtid)
int size;
int dtid;
{
	int i;
	static double data[MAXSIZE];
	int t;

#ifdef TIMER_CLEAR

	for (i = 0; i < size; i++)
		data[i] = i*i;

/*
	pvm_initsend(ENCODE2);
	pvm_pkdouble(data, size, 1);
*/

/*
#ifdef SUNMP
if (processor_bind(P_PID, P_MYID, 0, NULL) == -1)
	perror("processor_bind");
#endif
*/

	TIMER_CLEAR;
	TIMER_START;
	for (i = 0; i < SAMPLE; i++) {
		int dummy;

		pvm_psend(dtid, 1, data, size, PVM_DOUBLE);
		pvm_precv(-1, -1, &dummy, 1, PVM_INT, (int*)0, (int*)0, (int*)0);
/*
		pvm_send(dtid, 0);
		pvm_recv(-1, 0);
*/
	}
	TIMER_STOP;
	t = 1000000*TIMER_ELAPSED/SAMPLE;
	printf("Send+ACK T = %d (us)  (%.4f MB/s)      Data size: %d\n",
		t, 8.0*(float)size/(float)t, 8*size);

/* sleep(1); */

#endif /*TIMER_CLEAR*/
}

#endif /*PVM*/
