
/*
 *         PVM version 3.3:  Parallel Virtual Machine System
 *               University of Tennessee, Knoxville TN.
 *           Oak Ridge National Laboratory, Oak Ridge TN.
 *                   Emory University, Atlanta GA.
 *      Authors:  A. L. Beguelin, J. J. Dongarra, G. A. Geist,
 *    W. C. Jiang, R. J. Manchek, B. K. Moore, and V. S. Sunderam
 *                   (C) 1992 All Rights Reserved
 *
 *                              NOTICE
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted
 * provided that the above copyright notice appear in all copies and
 * that both the copyright notice and this permission notice appear in
 * supporting documentation.
 *
 * Neither the Institutions (Emory University, Oak Ridge National
 * Laboratory, and University of Tennessee) nor the Authors make any
 * representations about the suitability of this software for any
 * purpose.  This software is provided ``as is'' without express or
 * implied warranty.
 *
 * PVM version 3 was funded in part by the U.S. Department of Energy,
 * the National Science Foundation and the State of Tennessee.
 */

/*
 *	lpvmshmem.c
 *
 *	Libpvm core for MPP environment.
 *
$Log: lpvmshmem.c,v $
 * Revision 1.40  1996/05/13  21:34:28  manchek
 * added flag so we only install SIGTERM handler once.
 * cleaned up code in beatask so we don't segfault if we can't
 * allocate resources
 *
 * Revision 1.39  1995/11/10  21:37:46  manchek
 * check for EINTR from semop in peer_wait
 *
 * Revision 1.38  1995/11/02  16:17:20  manchek
 * free replies to control messages in mxfer
 *
 * Revision 1.37  1995/09/08  17:26:02  manchek
 * aargh forgot semicolon
 *
 * Revision 1.36  1995/09/08  16:56:17  manchek
 * experimental changes to pvm_psend to improve performance (postpone freebuf)
 *
 * Revision 1.35  1995/09/06  17:37:24  manchek
 * aargh, forgot pvm_precv
 *
 * Revision 1.34  1995/09/06  17:31:41  manchek
 * pvm_psend returns not implemented instead of bad param for string type
 *
 * Revision 1.33  1995/09/05  19:20:30  manchek
 * changes from bigapple to make busywait work better
 *
 * Revision 1.32  1995/07/28  16:40:59  manchek
 * wrap HASERRORVARS around errno declarations
 *
 * Revision 1.31  1995/07/28  15:35:19  manchek
 * address of copy databuf wasn't set right in peer_send
 *
 * Revision 1.30  1995/07/28  15:10:37  manchek
 * only send message header on first fragment
 *
 * Revision 1.29  1995/07/28  15:07:30  manchek
 * peer_send wasn't generating message checksum
 *
 * Revision 1.28  1995/07/24  18:45:51  manchek
 * message headers passed in inbox shmpkhdr instead of databuf
 * requires changes to peer_recv, peer_send, msendrecv.
 * use pvmnametag function to print message tags symbolically
 *
 * Revision 1.27  1995/07/19  16:37:51  manchek
 * peer_send returns NotImpl if DataInPlace used
 *
 * Revision 1.26  1995/07/18  17:00:13  manchek
 * added code to generate and check crc on each message (MCHECKSUM).
 * get and put wait-id in message header
 *
 * Revision 1.25  1995/07/12  01:20:40  manchek
 * initialize lots of globals, reset in pvmendtask.
 * pvmendtask checks that globals are set before using them.
 * catch_kill reinstalls old sighldr, doesn't call if it's SIG_IGN,
 * sets pidtid state to ST_FINISH.
 * peer_detach now frees the peer struct
 *
 * Revision 1.24  1995/07/11  18:58:30  manchek
 * peer_wait returns int, -1 if error reading semaphore.
 * mroute and dynbuf check peer_wait return for error
 *
 * Revision 1.23  1995/07/05  16:18:10  manchek
 * exiting task sets pidtid entry state to ST_FINISH instead of ST_EXIT
 *
 * Revision 1.22  1995/07/05  15:36:40  manchek
 * free messages before sending TASKEXIT in pvmendtask
 *
 * Revision 1.21  1995/07/03  20:51:15  manchek
 * made convex poll-type code in peer_wait() the default case
 *
 * Revision 1.20  1995/07/03  20:18:26  manchek
 * hellish cleanup of comments and formatting.
 * removed POWER4 ifdefs.
 * added deadlock detection where task owns all pages in shared
 * segment and needs still another
 *
 * Revision 1.19  1995/06/30  16:24:45  manchek
 * aargh
 *
 * Revision 1.18  1995/06/28  18:17:56  manchek
 * moved check_for_exit from pvmumbuf.c
 *
 * Revision 1.17  1995/06/28  17:59:11  manchek
 * typo
 *
 * Revision 1.16  1995/06/28  15:47:18  manchek
 * added TC_SHMAT connect handshake
 *
 * Revision 1.15  1995/06/19  17:47:58  manchek
 * was packing random string in TC_CONACK message in pvmmctl
 *
 * Revision 1.14  1995/06/02  17:21:34  manchek
 * pvm_start_pvmd ignores INT, QUIT, TSTP signals
 *
 * Revision 1.13  1995/05/30  14:48:37  manchek
 * in pvmendtask() must call peer_cleanup() before erasing our tid
 *
 * Revision 1.12  1995/05/24  19:07:37  manchek
 * small fix in getdsock().
 * changed HPPA shared memory name to HPPAMP
 *
 * Revision 1.11  1995/05/22  19:45:45  manchek
 * added ifdefs for RS6KMP
 *
 * Revision 1.10  1995/05/22  19:09:02  manchek
 * took out ifdefs around read_int().
 * with new startup, can read pvmd sockaddr file only once
 *
 * Revision 1.9  1995/05/18  17:22:08  manchek
 * need to export pvminbox and myshmbufid
 *
 * Revision 1.8  1995/05/17  16:21:17  manchek
 * added support for CSPP shared memory.
 * lots of bug fixes from SGI and Convex.
 * added PVMTASKDEBUG envar.
 * pvm_start_pvmd reads sockaddr from pvmd instead of sleeping on addr file.
 * CSPP port (only) uses new TM_GETOPT message to get trace, output dest.
 *
 * Revision 1.7  1995/02/06  21:43:59  manchek
 * pvmmctl now replies to TC_CONREQ message.
 * better cleanup of message buffers in pvmendtask
 *
 * Revision 1.6  1995/02/01  21:24:47  manchek
 * error 4 is now PvmOverflow
 *
 * Revision 1.5  1994/12/20  16:38:44  manchek
 * added pvmshowtaskid variable
 *
 * Revision 1.4  1994/11/07  22:42:39  manchek
 * general damage control and cleanup:
 * initialize variables
 * send null packets to wake up pvmd instead of reconnecting
 * clean up on catching SIGTERM
 *
 * Revision 1.3  1994/06/30  21:35:40  manchek
 * typo in peer_recv()
 *
 * Revision 1.2  1994/06/04  21:44:31  manchek
 * updated header.
 * changed TM_SET to TM_SETOPT
 *
 * Revision 1.1  1994/06/03  20:38:18  manchek
 * Initial revision
 *
 */

#include <sys/param.h>
#include <stdio.h>
#include <rpc/types.h>
#include <rpc/xdr.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef	SYSVSTR
#include <string.h>
#else
#include <strings.h>
#endif
#include <signal.h>
#include <errno.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/sem.h>
#include <sys/mman.h>
#include <unistd.h>
#ifdef IMA_SYMM
#include <parallel/parallel.h>
#endif
#ifdef IMA_KSR1
#include <pthread.h>
#endif

#include <pvm3.h>
#include "global.h"
#include "tdpro.h"
#include "ddpro.h"
#include "pvmalloc.h"
#include "pvmdabuf.h"
#include "pvmfrag.h"
#include "pvmumbuf.h"
#include "listmac.h"
#include "pvmshmem.h"
#include "bfunc.h"
#include <pvmtev.h>
#include "tevmac.h"

/* task debug mask */

#define	TDMPACKET	1		/* packet tracing */
#define	TDMMESSAGE	2		/* message tracing */

#ifndef max
#define max(a,b)	((a)>(b)?(a):(b))
#endif

#ifndef min
#define min(a,b)	((a)<(b)?(a):(b))
#endif

char *getenv();

extern struct encvec *enctovec();
char *pvmgetpvmd();
char *pvmgethome();
char *pvmdsockfile();
char *pvmnametag();

#ifdef IMA_CSPP
int current_node();
#endif


/***************
 **  Globals  **
 **           **
 ***************/

#ifndef HASERRORVARS
extern int errno;					/* from libc */
extern char *sys_errlist[];
extern int sys_nerr;
#endif

extern int pvmrbufmid;				/* from pack.c */
extern int pvmsbufmid;				/* from pack.c */
extern int bufpageused;				/* from pvmshmem.c */
extern struct mhp *pvmmidh;			/* from pvmumbuf.c */
extern int pvmmidhsiz;				/* from pvmumbuf.c */

char *pvm_errlist[] = {					/* error messages for -pvm_errno */
		"Error 0",
		"Error 1",
	"Bad parameter",
	"Count mismatch",
	"Value too large",
	"End of buffer",
	"No such host",
	"No such file",
		"Error 8",				/* not used */
		"Error 9",				/* not used */
	"Malloc failed",
		"Error 11",				/* not used */
	"Can't decode message",
		"Error 13",				/* not used */
	"Can't contact local daemon",
	"No current buffer",
	"No such buffer",
	"Null group name",
	"Already in group",
	"No such group",
	"Not in group",
	"No such instance",
	"Host failed",
	"No parent task",
	"Not implemented",
	"Pvmd system error",
	"Version mismatch",
	"Out of resources",
	"Duplicate host",
	"Can't start pvmd",
	"Already in progress",
	"No such task",
	"No such entry",
	"Duplicate entry",
};

int pvmautoerr = 1;						/* whether to auto print err msgs */
int pvmcouttid = 0;						/* child stdout dst and code */
int pvmcoutcod = 0;
int pvmctrctid = 0;						/* child trace dst and code */
int pvmctrccod = 0;
int pvmmyndf = 0;						/* host native data enc, init XDR */
int pvmmyptid = -1;						/* parent task id */
int pvmmytid = -1;						/* this task id */
int pvmmyupid = -1;						/* process unix pid */
int pvmudpmtu = MAXFRAGSIZE;			/* max fragment size */
int pvmfrgsiz = MAXFRAGSIZE;			/* message frag length (to pack) */
int pvm_useruid = -1;					/* user's uid */
int shmbufsiz = 0;						/* shared-memory buffer size */

int pvm_errno = 0;						/* last libpvm error code */
int pvm_nerr = sizeof(pvm_errlist)
			/sizeof(pvm_errlist[0]);	/* exported num of errors */
int pvmschedtid = 0;					/* scheduler task */
int pvmrescode = 0;						/* allow use of reserved tids, codes */
struct umbuf *pvmrxlist = 0;			/* not-recvd msg list */
int pvmtidhmask = TIDHOST;				/* mask for host field of tids */
int pvmtidlmask = TIDLOCAL;				/* mask for local field of tids */
int pvmouttid = 0;						/* stdout dst and code */
int pvmoutcod = 0;
int pvmshowtaskid = 1;					/* catchout shows task ids */
int pvmtoplvl = 1;						/* function called from outside lib */
int pvmtrctid = 0;						/* trace dst and code */
int pvmtrccod = 0;
Pvmtmask pvmtrcmask;					/* trace bitmask */
Pvmtmask pvmctrcmask;					/* child trace bitmask */

int pgsz = 0;							/* system page size */
int pvmpgsz = 0;						/* PVM virtual page size */
char *outmsgbuf = 0;					/* my outgoing message buffer */
int outbufsz = 0;						/* how many frags in outgoing msg buf */
int nbufsowned = 0;						/* num shared frags held by us */
struct pidtid *pidtids = 0;				/* pid -> tid table */
char *infopage = 0;						/* proto, NDF, pid-tid table */
int maxpidtid = 0;						/* size of pid-tid table */
int debugmask = 0;						/* which debugging info */
int myshmbufid = -1;					/* ID of shared-memory buffer */
char *pvminbox = 0;						/* incoming message buffer */


/***************
 **  Private  **
 **           **
 ***************/

static char rcsid[] = "$Id: lpvmshmem.c,v 1.40 1996/05/13 21:34:28 manchek Exp $";
static char pvmtxt[512];				/* scratch for error log */
static struct umbuf *rxfrag = 0;		/* not-assembled incm msgs */
static struct frag *rxbuf;				/* buffer for incoming packets */
static int pvmtrcmid = 0;				/* message buffer for trace */
static int pvmtrcsbf = 0;				/* existing other buffer during trace */

#ifdef IMA_SYMM
static int cpuonline;					/* the number of CPUs available */
static struct shpage *pvmfraginfo;		/* frag locks and ref counts */
#endif
#ifdef USERECVSEMAPHORE
static int mysemid = -1;				/* ID of semaphore to sleep on */
#endif
static char *pvmdinbox = 0;				/* pvmd's incoming message buffer */
static char *pvmdoutbuf = 0;			/* pvmd's outgoing message buffer */
static int pvminboxsz = 0;				/* size of incoming message buffer */
static int mypidtid = -1;				/* my position in pid-tid table */
static int pvmdpid = 0;					/* pvmd's Unix proc ID */
static struct sockaddr_in pvmdsad;		/* address of pvmd socket */
static int pvmdsock = -1;				/* pvmd socket descriptor */
static int pvmpolltype = PvmPollSleep;	/* memory polling style */
static int pvmpolltime = BUSYWAIT;		/* time value for poll wait */

#ifdef LOG
static FILE *logfp = 0;					/* my own log file */
#endif

static int pvmrouteopt = PvmDontRoute;	/* task-task routing style */
static void (*pvmoldtermhdlr)() = 0;
static int pvmsettermhdlr = 1;			/* need to install TERM handler */
static int outta_here = 0;


/**************************
 **  Internal Functions  **
 **                      **
 **************************/

/*	bailout()
*
*	Called by low-level stuff in f.e. frag.c.  Don't really want to
*	bail in libpvm.
*/

void
pvmbailout(n)
	int n;
{
	n = n;	/* ayn rand was here */
}


/*	pvmlogerror()
*
*	Log a libpvm error message.  Prepends a string identifying the task.
*/

pvmlogerror(s)
	char *s;
{
	if (pvmmytid == -1)
		fprintf(stderr, "libpvm [pid%d]: %s", pvmmyupid, s);
	else
#ifdef LOG
	{
		fprintf(logfp, "libpvm [t%x]: %s", pvmmytid, s);
		fflush(logfp);
	}
#else
		fprintf(stderr, "libpvm [t%x]: %s", pvmmytid, s);
#endif
}


/*	pvmlogperror()
*
*	Log a libpvm error message.  Prepends a string identifying the
*	task and appends the system error string for _errno.
*/

pvmlogperror(s)
	char *s;
{
	char *em;

	em = ((errno >= 0 && errno < sys_nerr)
		? sys_errlist[errno] : "Unknown Error");
	if (pvmmytid == -1)
		fprintf(stderr, "libpvm [pid%d]: %s: %s\n", pvmmyupid, s, em);
	else
		fprintf(stderr, "libpvm [t%x]: %s: %s\n", pvmmytid, s, em);
}


/*	getdsock()
*
*	Get address of pvmd socket, set up pvmdsad.
*/

void
getdsock()
{
	char buf[128];
	int d;
	int n;
	char *p;

	if (!(p = getenv("PVMSOCK"))) {
		if (!(p = pvmdsockfile())) {
			pvmlogerror("getdsock() pvmdsockfile() failed\n");
			return;
		}
		if ((d = open(p, O_RDONLY, 0)) == -1) {
			pvmlogperror(p);
			return;
		}
		n = read(d, buf, sizeof(buf));
		(void)close(d);
		if (n == -1) {
			pvmlogperror("getdsock() read addr file");
			return;
		}
		if (n == 0) {
			pvmlogerror("getdsock() read addr file: wrong length read\n");
			return;
		}
		buf[n] = 0;
		p = buf;
	}

	hex_inadport(p, &pvmdsad);
	pvmdsad.sin_family = AF_INET;
}


/*	prodpvmd()
*
*	wake up the pvmd, which is sleeping on sockets and not shared memory.
*	XXX this sucks.
*/

void
prodpvmd()
{
	static char dummy[TDFRAGHDR];

	if (pvmdsock == -1) {
		if ((pvmdsock = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
			pvmlogperror("prodpvmd() socket");
			return;
		}
		if (connect(pvmdsock, (struct sockaddr*)&pvmdsad, sizeof(pvmdsad))
		== -1) {
			pvmlogperror("prodpvmd() connect");
			close(pvmdsock);
			pvmdsock = -1;
			return;
		}
		BZERO(dummy, sizeof(dummy));

	} else {
		write(pvmdsock, dummy, TDFRAGHDR);
	}
}


/*	peer_wait()
*
*	Check inbox for messages, block on a semaphore if no message.
*/

int
peer_wait()
{
	struct msgboxhdr *inbp = (struct msgboxhdr *)pvminbox;

#ifndef USERECVSEMAPHORE

	PAGELOCK(&inbp->mb_lock);
	while (inbp->mb_read == inbp->mb_last)
#ifdef	IMA_SUNMP
		cond_wait(&inbp->mb_cond, &inbp->mb_lock);
#endif
#ifdef	IMA_RS6KMP
		pthread_cond_wait(&inbp->mb_cond, &inbp->mb_lock);
#endif
	PAGEUNLOCK(&inbp->mb_lock);

#else	/*USERECVSEMAPHORE*/

	struct sembuf sops;

	sops.sem_num = 0;
	sops.sem_op = -1;
	sops.sem_flg = 0;
	while (inbp->mb_read == inbp->mb_last) {
		if (debugmask & PDMSEM) {
			sprintf(pvmtxt, "peer_wait(): Waiting on semop id = %d\n", mysemid);
			pvmlogerror(pvmtxt);
		}
		if (semop(mysemid, &sops, 1) == -1) {
			if (errno != EINTR) {
				sprintf(pvmtxt, "peer_wait(): Error waiting for semop id = %d",
						mysemid);
				pvmlogperror(pvmtxt);
				return -1;
			}
		}
		else if (debugmask & PDMSEM) {
			sprintf(pvmtxt, "peer_wait(): Processing Event on semop id = %d\n",
					mysemid);
			pvmlogerror(pvmtxt);
		}
	}

#endif	/*USERECVSEMAPHORE*/
	return 0;
}


/*	pvmmctl()
*
*	Entry points for libpvm control messages.
*
*	Called by mxinput() when a control message
*	(code between TC_FIRST and TC_LAST) is received.
*/

static int
pvmmctl(up)
	struct umbuf *up;
{
	int rbf;				/* temp rx message storage */
	int sbf = 0;			/* return message to send */
	struct umbuf *up2;
	int ttpro;				/* protocol revision */
	int ackd;				/* allow connection if 0 */
	struct peer *pp;
	struct msgboxhdr *dboxp;
	char buf[256];			/* to convert sockaddr, misc */

	rbf = pvm_setrbuf(up->ub_mid);

	switch (up->ub_cod) {

	case TC_SHMAT:
		if (!(pp = peer_conn(up->ub_src, (int *)0)) || pp == (struct peer *)-1L) {
			sprintf(pvmtxt, "pvmmctl() can't connect to src t%x\n",
				up->ub_src);
			pvmlogerror(pvmtxt);
			ackd = -1;
		} else
			ackd = pvmmytid;
		dboxp = (struct msgboxhdr *)pp->p_buf;
		dboxp->mb_attached = ackd;
		break;

	case TC_CONREQ:
		sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		ttpro = TDPROTOCOL;
		ackd = 1;
		pvm_pkint(&ttpro, 1, 1);
		pvm_pkint(&ackd, 1, 1);
		pvm_pkstr("");
		sbf = pvm_setsbuf(sbf);
		up2 = midtobuf(sbf);
		up2->ub_dst = up->ub_src;
		up2->ub_cod = TC_CONACK;
		break;

	case TC_TASKEXIT:
		if (!outta_here) {
			int tid;
			extern struct peer *peers;

			pvm_upkint(&tid, 1, 1);

			for (pp = peers->p_link; pp != peers; pp = pp->p_link)
				if (pp->p_tid == tid) {
					pp->p_exited = 1;		/* mark as deleted */
					break;
				}
		}
		break;

	case TC_CONACK:
		sprintf(pvmtxt, "pvmmctl() unexpected TC msg from t%x!\n", up->ub_src);
		pvmlogerror(pvmtxt);

	case TC_NOOP:
		break;

	case TC_OUTPUT:
		pvmclaimo();
		break;

	case TC_SETTMASK:
		pvm_upkstr(buf);
		if (strlen(buf) + 1 == TEV_MASK_LENGTH)
			BCOPY(buf, pvmtrcmask, TEV_MASK_LENGTH);
		else
			pvmlogerror("pvmmctl() bogus trace mask\n");
		break;

	default:
		sprintf(pvmtxt, "pvmmctl() from t%x code=%d ?\n",
				up->ub_src, up->ub_cod);
		pvmlogerror(pvmtxt);
	break;
	}

	pvm_setrbuf(rbf);
	umbuf_free(up->ub_mid);
	return sbf;
}


/*	mroute()
*
*   Route a message to a destination.
*   Returns when
*       outgoing message (if any) fully sent and
*       (timed out (tmout) or
*           at least one message fully received)
*   Returns >=0 the number of complete messages downloaded, or
*   negative on error.
*/

int
mroute(mid, dtid, code, tmout)
	int mid;				/* message */
	int dtid;				/* dest */
	int code;				/* type code */
	struct timeval *tmout;	/* get at least one message */
{
	struct umbuf *txup;			/* tx message or null */
	struct frag *txfp = 0;		/* cur tx frag or null */
	int gotem = 0;				/* count complete msgs downloaded */
	int block;					/* get at least one message */
	int loopcount = 0;
	struct msgboxhdr *inbp;		/* incoming box */
	struct timeval tnow, tstop;
	int sbf;					/* reply to control message */
	int cc;
	int freethis = 0;			/* (control) message came from stack */
	int tstkp = 0;
	int tstk[100];				/* XXX shouldn't be a stack */

	/* XXX do we really have to do this? */
	if ((dtid == TIDPVMD && code == TM_MCA) || dtid == TIDGID)
		return node_mcast(mid, dtid, code);

	if (tmout) {
		if (tmout->tv_sec || tmout->tv_usec) {
			gettimeofday(&tnow, (struct timezone *)0);
			tstop.tv_sec = tnow.tv_sec + tmout->tv_sec;
			tstop.tv_usec = tnow.tv_usec + tmout->tv_usec;
			block = 1;
		} else
			block = 0;
	} else {
		block = 1;
		tstop.tv_sec = -1;
		tstop.tv_usec = -1;
	}

	if (txup = midtobuf(mid)) {
		txfp = txup->ub_frag->fr_link;
		txfp = txfp->fr_buf ? txfp : 0;
	}

	inbp = (struct msgboxhdr *)pvminbox;

	do {
		if (block && tstop.tv_sec != -1) {
			gettimeofday(&tnow, (struct timezone *)0);
			if (tnow.tv_sec > tstop.tv_sec
			|| (tnow.tv_sec == tstop.tv_sec && tnow.tv_usec >= tstop.tv_usec))
				break;
		}

		if (pvmpolltype == PvmPollSleep
		&& loopcount++ > pvmpolltime && !txfp && tstop.tv_sec == -1) {
			PAGELOCK(&inbp->mb_lock);
			if (inbp->mb_read == inbp->mb_last) {
				inbp->mb_sleep = 1;
				PAGEUNLOCK(&inbp->mb_lock);
				if (peer_wait() == -1)
					return PvmSysErr;
				loopcount = 0;
			} else
				PAGEUNLOCK(&inbp->mb_lock);
		}

		if (inbp->mb_read != inbp->mb_last) {
			if ((sbf = peer_recv(&gotem)) == -1)
				return PvmSysErr;
			if (sbf > 0) {
				if (txfp)
					tstk[tstkp++] = sbf;
				else {
					txup = midtobuf(sbf);
					dtid = txup->ub_dst;
					code = txup->ub_cod;
					txfp = txup->ub_frag->fr_link;
					txfp = txfp->fr_buf ? txfp : 0;
					freethis = 1;
				}
			}
		}

		if (txfp) {
			if ((cc = peer_send(txup, txfp, dtid, code)) < 0)
				return cc;
			if (cc) {
				txfp = txfp->fr_link;
				if (!txfp->fr_buf) {
					if (freethis)
						umbuf_free(txup->ub_mid);
					if (tstkp > 0) {
						txup = midtobuf(tstk[--tstkp]);
						dtid = txup->ub_dst;
						code = txup->ub_cod;
						txfp = txup->ub_frag->fr_link;
						txfp = txfp->fr_buf ? txfp : 0;
						freethis = 1;
					} else
						txfp = 0;
				}
			}
		}

	} while (txfp || (block && !gotem));

	return gotem;
}



/*	peer_recv()
*
*	Receive a message fragment from another task.
*	Reassemble frags into messages and place on rxlist.
*	On completing a control message, call pvmmctl(), which consumes it.
*	Returns:
*		0 normally,
*		negative if error,
*		or message handle of a reply control message to be sent.
*/

int
peer_recv(gotem)
	int *gotem;
{
	struct umbuf *rxup;			/* rx message */
	struct umbuf *up;
	struct frag *fp;
	int sdr;
	int src;
	int ff;
	struct peer *pp = 0;
	int next;					/* frag being received */
	struct shmpkhdr *inmsgs;	/* incoming messages */
	struct msgboxhdr *inbp;		/* incoming box */
	int sbf = 0;				/* reply control message */

	inbp = (struct msgboxhdr *)pvminbox;
	inmsgs = (struct shmpkhdr *)(inbp + 1);

	next = (inbp->mb_read + 1) % pvminboxsz;
	sdr = inmsgs[next].ph_sdr;
	fp = fr_new(0);
	if ((sdr & ~pvmtidhmask) != TIDPVMD) {
		if (!(pp = peer_conn(sdr, (int *)0)) || pp == (struct peer *)-1L) {
			sprintf(pvmtxt, "peer_recv() can't connect to sender t%x\n", sdr);
			pvmlogerror(pvmtxt);
			return PvmSysErr;
		}
		fp->fr_dat = pp->p_buf + INBOXPAGE*pgsz + inmsgs[next].ph_dat;
	} else
		fp->fr_dat = pvmdoutbuf + inmsgs[next].ph_dat;
	fp->fr_buf = fp->fr_dat - (inmsgs[next].ph_dat & (pgsz-1))
				+ PVMPAGEHDR;
	fp->fr_max = pvmfrgsiz;
	ff = inmsgs[next].ph_flag;

	src = inmsgs[next].ph_src;
	fp->fr_len = inmsgs[next].ph_len;
	fp->fr_max = pvmfrgsiz;

	if (debugmask & TDMPACKET) {
		sprintf(pvmtxt, "peer_recv() sdr t%x src t%x len %d dst t%x flag %d\n",
			sdr, src, fp->fr_len, inmsgs[next].ph_dst, ff);
		pvmlogerror(pvmtxt);
	}

	/*
	* if start of message, make new umbuf, add to frag pile
	*/
	if (ff & FFSOM) {
		rxup = midtobuf(umbuf_new());
		rxup->ub_cod = inmsgs[next].ph_cod;
		rxup->ub_enc = inmsgs[next].ph_enc;
		rxup->ub_wid = inmsgs[next].ph_wid;
		rxup->ub_crc = inmsgs[next].ph_crc;
		rxup->ub_src = src;
#ifdef IMA_CSPP
		if (pp && pp->p_node != current_node())
			rxup->ub_flag |= UB_DIFFNODE;
#endif
		LISTPUTBEFORE(rxfrag, rxup, ub_link, ub_rlink);
	}

	/* locate frag's message */

	for (rxup = rxfrag->ub_link; rxup != rxfrag; rxup = rxup->ub_link)
		if (rxup->ub_src == src)
			break;

	if (rxup == rxfrag) {	/* uh oh, no message for it */
		pvmlogerror("peer_recv() frag with no message\n");
		fr_unref(fp);

	} else {
		LISTPUTBEFORE(rxup->ub_frag, fp, fr_link, fr_rlink);
		rxup->ub_len += fp->fr_len;
	/*
	* if end of message, move to rxlist and count it
	*/
		if (ff & FFEOM) {
			LISTDELETE(rxup, ub_link, ub_rlink);
#ifdef	MCHECKSUM
			if (rxup->ub_crc != umbuf_crc(rxup)) {
				sprintf(pvmtxt,
				"peer_recv() message src t%x cod %d bad checksum\n",
						rxup->ub_src, rxup->ub_cod);
				pvmlogerror(pvmtxt);
				umbuf_free(rxup->ub_mid);

			} else {
#endif
				rxup->ub_codef = enctovec(rxup->ub_enc);
				LISTPUTBEFORE(pvmrxlist, rxup, ub_link, ub_rlink);
				(*gotem)++;
				/* XXX */
				if (rxup->ub_cod >= (int)TC_FIRST && rxup->ub_cod <= (int)TC_LAST) {
					rxup = rxup->ub_rlink;
					sbf = pvmmctl(rxup->ub_link);
				}
#ifdef	MCHECKSUM
			}
#endif
		}
	}
	inbp->mb_read = next;
	return sbf;
}


/*	peer_send()
*
*	Send fragment to another process.
*	Establish connection if necessary, exchange handshake message.
*
*	Returns 1 if sent, 0 otherwise, negative on error.
*/

int
peer_send(txup, txfp, dtid, code)
	struct umbuf *txup;		/* tx message or null */
	struct frag *txfp;		/* cur tx frag or null */
	int dtid;				/* dest */
	int code;				/* type code */
{
	char *cp = 0;				/* points to copy-databuf (if necessary) */
	int ff;
	int loc;					/* location of data in shared segment */
	int next;					/* frag being received */
	struct peer *pp;
	struct shmpkhdr *dmsgs;
	struct msgboxhdr *dboxp;	/* receiving box of peer */

	if (!txfp->fr_u.dab) {
		pvmlogerror("peer_send() PvmDataInPlace not implemented\n");
		return PvmNotImpl;
	}

	if ((dtid & pvmtidhmask) == (pvmmytid & pvmtidhmask)
	&& (dtid & ~pvmtidhmask) != TIDPVMD)		/* to local task */
	{
		int new_connection;

		if (!(pp = peer_conn(dtid, &new_connection)))
			return 0;
		if (pp != (struct peer *)-1L) {
			dboxp = (struct msgboxhdr *)pp->p_buf;
			if (new_connection) {
				int sbf, l;
				struct msgboxhdr *inbp = (struct msgboxhdr *)pvminbox;
				static struct timeval ztv = { 0, 0 };

				sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
				l = TDPROTOCOL;
				pvm_pkint(&l, 1, 1);
				sbf = pvm_setsbuf(sbf);
				inbp->mb_attached = 0;
	/* XXX glggg, could we just return to mroute here instead of calling it? */
				mroute(sbf, dtid, TC_SHMAT, &ztv);
				pvm_freebuf(sbf);
				while (!inbp->mb_attached)
					mroute(0, 0, 0, &ztv);		/* avoid deadlock */
				if (inbp->mb_attached != dtid) {
					sprintf(pvmtxt,
						"peer_send: mb_attached: expected %x, got %x\n",
						dtid, inbp->mb_attached);
					pvmlogerror(pvmtxt);
				}
			}

		} else
			dboxp = (struct msgboxhdr *)pvmdinbox;
	} else
		dboxp = (struct msgboxhdr *)pvmdinbox;

	if ((dboxp->mb_last + 1) % pvminboxsz == dboxp->mb_read)
		return 0;		/* full */

	/*
	* if page is private, copy and replace it with one in shared buf
	*/

	if ((loc = txfp->fr_dat - outmsgbuf) > outbufsz * pvmpgsz || loc < 0) {
		if (nbufsowned == outbufsz) {
			int i;

			pvmlogerror("peer_send() Message(s) too long for shared buffer, deadlocked.\n");
			PAGELOCK(&((struct shmpghdr *)infopage)->pg_lock);
			for (i = 0; i < maxpidtid; i++)
				if (pidtids[i].pt_tid == pvmmytid) {
					pidtids[i].pt_cond = 1;
					break;
				}
			PAGEUNLOCK(&((struct shmpghdr *)infopage)->pg_lock);
		}

		cp = 0;
		do {
			if (cp)
				da_unref(cp);
			cp = da_new(MAXHDR + txfp->fr_len);
		} while ((loc = cp - outmsgbuf + MAXHDR) > outbufsz*pvmpgsz || loc < 0);

		BCOPY(txfp->fr_dat, cp + MAXHDR, txfp->fr_len);
		txfp->fr_dat = cp + MAXHDR;
		da_unref(txfp->fr_buf);
		txfp->fr_buf = cp;
	}

	if (txfp->fr_rlink == txup->ub_frag)
		ff = FFSOM;
	else
		ff = 0;
	if (txfp->fr_link == txup->ub_frag)
		ff |= FFEOM;

	if (debugmask & TDMPACKET) {
		sprintf(pvmtxt, "peer_send() dst t%x len %d page %d flag %d\n",
			dtid, txfp->fr_len, loc/pgsz + 1, ff);
		pvmlogerror(pvmtxt);
	}

	dmsgs = (struct shmpkhdr *)(dboxp + 1);
	PAGELOCK(&dboxp->mb_lock);
	next = (dboxp->mb_last + 1) % pvminboxsz;
	/* if receive buffer full, must fail here and try again - sgi:jpb */
	if ( next == dboxp->mb_read ) {		/* full */
		PAGEUNLOCK(&dboxp->mb_lock);
		return 0;
	}
	dmsgs[next].ph_src = pvmmytid;
	dmsgs[next].ph_dst = dtid;
	dmsgs[next].ph_sdr = pvmmytid;
	dmsgs[next].ph_dat = loc;
	dmsgs[next].ph_len = txfp->fr_len;
	dmsgs[next].ph_flag = ff;
	if (ff & FFSOM) {
		dmsgs[next].ph_cod = code;
		dmsgs[next].ph_enc = txup->ub_enc;
		dmsgs[next].ph_wid = txup->ub_wid;
#ifdef	MCHECKSUM
		dmsgs[next].ph_crc = umbuf_crc(txup);
#else
		dmsgs[next].ph_crc = 0;
#endif
	} else {
		dmsgs[next].ph_cod = 0;
		dmsgs[next].ph_enc = 0;
		dmsgs[next].ph_wid = 0;
		dmsgs[next].ph_crc = 0;
	}
	da_ref(txfp->fr_buf);
	dboxp->mb_last = next;

	if (dboxp != (struct msgboxhdr *)pvmdinbox && dboxp->mb_sleep) {
#if	defined(IMA_SUNMP) || defined(IMA_RS6KMP)
#ifdef	IMA_SUNMP
		cond_signal(&dboxp->mb_cond);
#endif
#ifdef	IMA_RS6KMP
		pthread_cond_signal(&dboxp->mb_cond);
#endif
#else
		peer_wake(pp);
#endif
		dboxp->mb_sleep = 0;
	}

	/* wake up pvmd */

	if (dboxp == (struct msgboxhdr *)pvmdinbox
	&& (dboxp->mb_last + pvminboxsz - 1) % pvminboxsz == dboxp->mb_read) {
		PAGEUNLOCK(&dboxp->mb_lock);
		(void)prodpvmd();
	} else
		PAGEUNLOCK(&dboxp->mb_lock);

	return 1;
}


int
node_mcast(mid, dtid, code)
	int mid;	/* message id */
	int dtid;	/* destination */
	int code;	/* type */
{
	static int *tids = 0;	/* intended recipients of multicast message */
	static int ntids = -1;
	static int ntask;		/* number of tids */
	static struct timeval ztv = { 0, 0 };

	int i;
	long count = 0;
	int cc = 0;
	int dummy;

	/* intercept multicast info */

	if (dtid == TIDPVMD) {
		int sbf = mid;

		pvm_setrbuf(mid);
		pvm_upkint(&ntask, 1, 1);
		if (ntask > ntids) {
			if (tids)
				PVM_FREE(tids);
			tids = TALLOC(ntask, int, "tids");
			ntids = ntask;
		}
		pvm_upkint(tids, ntask, 1);
		/* sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo)); */
		pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		dummy = TIDGID;
		pvm_pkint(&dummy, 1, 1);
		pvm_setrbuf(pvm_setsbuf(sbf));
		return 0;
	}

	for (i = 0; i < ntask; i++)
		if (tids[i] != pvmmytid)
			cc = mroute(mid, tids[i], code, &ztv);

	ntask = 0;

	return cc;
}


/*	msendrecv()
*
*	Single op to send a system message (usually to our pvmd) and get
*	the reply.
*	Returns message handle or negative if error.
*/

int
msendrecv(other, code)
	int other;				/* dst, src tid */
	int code;				/* message code */
{
	int cc;
	struct umbuf *up;

	if (pvmsbufmid <= 0)
		return PvmNoBuf;

	/* send code to other */
	if (debugmask & TDMMESSAGE) {
		sprintf(pvmtxt, "msendrecv() to t%x code %s\n", other,
				pvmnametag(code, (int *)0));
		pvmlogerror(pvmtxt);
	}
	if ((cc = mroute(pvmsbufmid, other, code, (struct timeval *)0)) < 0)
		return cc;

	if (code == TM_MCA)		/* for node_mcast() */
		return 1;

	/* recv code from other */
	for (up = pvmrxlist->ub_link; 1; up = up->ub_link) {
		if (up == pvmrxlist) {
			up = up->ub_rlink;
			if ((cc = mroute(0, 0, 0, (struct timeval *)0)) < 0)
				return cc;
			up = up->ub_link;
		}

		if (debugmask & TDMMESSAGE) {
			sprintf(pvmtxt, "msendrecv() from t%x code %s\n",
					up->ub_src, pvmnametag(up->ub_cod, (int *)0));
			pvmlogerror(pvmtxt);
		}
		if (up->ub_src == other && up->ub_cod == code)
			break;
	}
	LISTDELETE(up, ub_link, ub_rlink);
	if (pvmrbufmid > 0)
		umbuf_free(pvmrbufmid);
	pvmrbufmid = 0;
	if (cc = pvm_setrbuf(up->ub_mid))
		return cc;
	return up->ub_mid;
}


static void
catch_kill(sig)
	int sig;
{
	signal(SIGTERM, pvmoldtermhdlr);
	if (pvmoldtermhdlr && pvmoldtermhdlr != SIG_IGN)
		pvmoldtermhdlr(sig);
	/* XXX yes, i know the table isn't locked.  we're in a sighandler */
	if (pidtids && pidtids[mypidtid].pt_stat != ST_EXIT)
		pidtids[mypidtid].pt_stat = ST_FINISH;
	pvmendtask();
	exit(sig);
}


static int
read_int(val)
	int *val;
{
	return *val;
}


/*	pvmbeatask()
*
*	Initialize libpvm, config process as a task.
*	This is called as the first step of each libpvm function so no
*	explicit initialization is required.
*
*	Returns 0 if okay, else error code.
*
*	XXX needs work.  lots of inconsistent state is left on error return.
*/

int
pvmbeatask()
{
	struct shmpkhdr *inmsgs;
	struct pidtidhdr *pvminfo;
	struct msgboxhdr *dboxp;		/* receiving box of pvmd */
	int next;
	int altpid;						/* pid of ancestor forked by pvmd */
	int msgcnt;
	char *msgbuf;					/* my message buffer */
	int bufid;
	int i;
	int pid;
	int bid;
	int rc;
	char *p;
	int cc = 0;
	int sbf = 0, rbf = 0;			/* saved rx and tx message handles */

#ifdef LOG
	char fname[32];
#endif
	int key, firstkeytried;
	int mytid;

	if (pvmmytid != -1)
		return 0;

	if ((pvm_useruid = getuid()) == -1) {
		pvmlogerror("pvmbeatask() can't getuid()\n");
		return PvmSysErr;
	}

	if (p = getenv("PVMTASKDEBUG")) {
		debugmask = pvmxtoi(p);
		if (debugmask) {
			sprintf(pvmtxt,"task debug mask is 0x%x\n", debugmask);
			pvmlogerror(pvmtxt);
		}
	}

	pvmmyupid = getpid();

#ifdef LOG
#ifdef IMA_CSPP
	int scid = get_scid();

	if (scid > 1)
		sprintf(fname, "/tmp/pvmt.%d.%d", pvm_useruid, scid);
	else
#endif
		sprintf(fname, "/tmp/pvmt.%d", pvm_useruid);
		if ((logfp = fopen(fname, "a")) == NULL)
			pvmlogerror("pvmbeatask() can't open log file\n");
#endif

	/*
	* get expected pid from environment in case we were started by
	* the pvmd and someone forked again
	*/

	if (p = getenv("PVMEPID"))
		altpid = atoi(p);
	else
		altpid = 0;

	pgsz = sysconf(_SC_PAGESIZE);
	pvmpgsz = FRAGPAGE*pgsz;
	pvmfrgsiz = pvmpgsz - PVMPAGEHDR;
	pvminboxsz =
		(INBOXPAGE*pgsz - sizeof(struct msgboxhdr))/sizeof(struct shmpkhdr);

	/*
	*	initialize received-message list and fragment reassembly list
	*/

	rxfrag = umbuf_get();
	BZERO((char*)rxfrag, sizeof(struct umbuf));
	rxfrag->ub_link = rxfrag->ub_rlink = rxfrag;

	pvmrxlist = umbuf_get();
	BZERO((char*)pvmrxlist, sizeof(struct umbuf));
	pvmrxlist->ub_link = pvmrxlist->ub_rlink = pvmrxlist;

	peer_init();

	/*
	* SIGTERM handler to clean up our shared memory
	*/

	if (pvmsettermhdlr) {
		pvmoldtermhdlr = signal(SIGTERM, catch_kill);
		pvmsettermhdlr = 0;
	}

	/*
	* get pvmd's message buffer, check protocol revision
	*/

	key = pvmshmkey(0);
	if ((bufid = shmget((key_t)key, 0, PERMS)) == -1) {
		pvmlogperror("pvmbeatask() shmget: can't connect to pvmd");
		return PvmSysErr;
	}
	if ((pvmdinbox = (char *)shmat(bufid, 0, 0)) == (char *)-1L) {
		pvmlogperror("pvmbeatask() shmat pvmd");
		return PvmSysErr;
	}
	infopage = pvmdinbox + INBOXPAGE*pgsz;
	pvmdoutbuf = infopage + pgsz;

	pvminfo = (struct pidtidhdr *)(infopage + PVMPAGEHDR);

	while (read_int(&(pvminfo[0])) == 0) {
		sleep(1);
		if (debugmask & PDMMEM)
			pvmlogerror("Waiting for pvmd to set protocol\n");
	}
	if (pvminfo->i_proto != TDPROTOCOL) {
		sprintf(pvmtxt, "beatask() t-d protocol mismatch (%d/%d)\n",
			TDPROTOCOL, pvminfo[0]);
		pvmlogerror(pvmtxt);
		return PvmSysErr;
	}

	/*
	* send it a request for connection/task assignment
	*/

	dboxp = (struct msgboxhdr *)pvmdinbox;
	inmsgs = (struct shmpkhdr *)(dboxp + 1);
	PAGELOCK(&dboxp->mb_lock);
	while ((next = (dboxp->mb_last + 1) % pvminboxsz) == dboxp->mb_read) ;
	/* XXX yuck, overloading these fields */
	inmsgs[next].ph_src = pvmmyupid;
	inmsgs[next].ph_dst = altpid;
	inmsgs[next].ph_dat = -1;
	inmsgs[next].ph_sdr = 0;
	inmsgs[next].ph_len = 0;
	inmsgs[next].ph_flag = 0;
	inmsgs[next].ph_cod = 0;
	inmsgs[next].ph_enc = 0;
	inmsgs[next].ph_wid = 0;
	inmsgs[next].ph_crc = 0;
	dboxp->mb_last = next;
	PAGEUNLOCK(&dboxp->mb_lock);

	pvmdpid = pvminfo->i_dpid;
	(void)getdsock();

	PAGELOCK(&dboxp->mb_lock);
	if ((next - 1) % pvminboxsz == dboxp->mb_read) {
		PAGEUNLOCK(&dboxp->mb_lock);
		(void)prodpvmd();
	} else
		PAGEUNLOCK(&dboxp->mb_lock);

	/*
	* get global parameters from pvmd buffer
	*/

	pvmmyndf = pvminfo->i_ndf;
	shmbufsiz = pvminfo->i_bufsiz;
	outbufsz = (shmbufsiz - INBOXPAGE*pgsz)/pvmpgsz;
	nbufsowned = 0;
	pidtids = (struct pidtid *)(pvminfo + 1);
	maxpidtid = (pgsz - sizeof(struct pidtidhdr) - PVMPAGEHDR)/sizeof(struct pidtid);

	/*
	* wait for pvmd to write us an entry in pidtid table
	*/

	pid = altpid ? altpid : pvmmyupid;
	mytid = -1;
	while (mytid == -1) {
		int ntids;			/* number of entries in pid-tid table */

		PAGELOCK(&((struct shmpghdr *)infopage)->pg_lock);
		ntids = min(maxpidtid, ((struct shmpghdr *)infopage)->pg_ref);
		for (i = 0; i < ntids; i++)
			if (pidtids[i].pt_pid == pid) {
				mytid = pidtids[i].pt_tid;
				pvmmyptid = pidtids[i].pt_ptid;
				/* pidtids[i].pt_pid = pvmmyupid; */
				mypidtid = i;
				break;
			}
		PAGEUNLOCK(&((struct shmpghdr *)infopage)->pg_lock);
	}

	/*
	* create shared memory segment (and semaphore)
	* if we can't get the first key, keep trying others
	* XXX this could be moved back to after the protocol num check.
	*/

	firstkeytried = key = pvmshmkey(getpid());
	while (1) {
		myshmbufid = shmget((key_t)key, shmbufsiz, IPC_CREAT|IPC_EXCL|PERMS);
		if (myshmbufid == -1) {
			if (errno != EACCES && errno != EEXIST) {
				pvmlogperror("pvmbeatask() shmget");
				return PvmSysErr;
			}

		} else {
#ifdef	USERECVSEMAPHORE
			mysemid = semget((key_t)key, 1, IPC_CREAT|IPC_EXCL|PERMS);
			if (mysemid == -1) {
				if (errno != EACCES && errno != EEXIST) {
					pvmlogperror("pvmbeatask() semget");
					shmctl(myshmbufid, IPC_RMID, (struct shmid_ds *)0);
					return PvmSysErr;

				} else {
					shmctl(myshmbufid, IPC_RMID, (struct shmid_ds *)0);
				}

			} else {
				break;
			}
#else
			break;
#endif
		}

		key = nextpvmshmkey(key);
		if (key == firstkeytried) {
			pvmlogerror("pvmbeatask() can't find a free key!\n");
			return PvmSysErr;
		}
	}

#ifdef IMA_CSPP
	if ((pvminbox = (char *)shm_search(myshmbufid)) == (char *)-1L)
#else
	if ((pvminbox = (char *)shmat(myshmbufid, 0, 0)) == (char *)-1L)
#endif
	{
		pvmlogperror("pvmbeatask() shmat");
		shmctl(myshmbufid, IPC_RMID, (struct shmid_ds *)0);
		return PvmSysErr;
	}

	outmsgbuf = pvminbox + INBOXPAGE*pgsz;
	msgbufinit(pvminbox);
	/* XXX PAGELOCK(pvminfo); */
	pidtids[mypidtid].pt_key = key;
	pidtids[mypidtid].pt_stat = ST_SHMEM;
#ifdef IMA_CSPP
	pidtids[mypidtid].pt_node = current_node();
#endif
	/* XXX PAGEUNLOCK(pvminfo); */

	pvmmytid = mytid;

	/*
	*	Request task trace/output paramters from daemon
	*/

	sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
	rbf = pvm_setrbuf(0);
	if ((cc = msendrecv(TIDPVMD, TM_GETOPT)) > 0) {
		pvm_upkint(&rc, 1, 1);	/* throw out tid, ptid */
		pvm_upkint(&rc, 1, 1);
		pvm_upkint(&pvmouttid, 1, 1);
		pvm_upkint(&pvmoutcod, 1, 1);
		pvm_upkint(&pvmtrctid, 1, 1);
		pvm_upkint(&pvmtrccod, 1, 1);
		pvmcouttid = pvmouttid;
		pvmcoutcod = pvmoutcod;
		pvmctrctid = pvmtrctid;
		pvmctrccod = pvmtrccod;
		cc = 0;
	}
	pvm_freebuf(pvm_setrbuf(rbf));
	pvm_freebuf(pvm_setsbuf(sbf));

	/* get trace mask from envar or zero it */

	if ((p = getenv("PVMTMASK")) && strlen(p) + 1 == TEV_MASK_LENGTH)
		BCOPY(p, pvmtrcmask, TEV_MASK_LENGTH);
	else
		TEV_INIT_MASK(pvmtrcmask);

	BCOPY(pvmtrcmask, pvmctrcmask, TEV_MASK_LENGTH);

	return cc;
}


/* XXX shouldn't clean up unless pvmmytid set?  or at least should check. */

int
pvmendtask()
{
	int i;
	struct shmid_ds shmds;
	struct umbuf *up;
	int sbf;
	static struct timeval ztv = { 0, 0 };
	struct peer *pp;
	extern struct peer *peers;

	/*
	* free any left-over messages.
	*/

	pvmsbufmid = 0;
	pvmrbufmid = 0;
	for (i = 1; i < pvmmidhsiz; i++)
		if (up = pvmmidh[i].m_umb)
			umbuf_free(up->ub_mid);

	/*
	* notify all connected tasks that we are exiting
	*/

	outta_here = 1;

	if (peers) {
		sbf = pvm_initsend(PvmDataFoo);
		pvm_pkint(&pvmmytid, 1, 1);

		for (pp = peers->p_link; pp != peers; pp = pp->p_link)
			if (pp->p_tid && pp->p_tid != pvmmytid)
				mroute(sbf, pp->p_tid, TC_TASKEXIT, &ztv);

		pvm_freebuf(sbf);
	}

	if (pidtids) {
		PAGELOCK(&((struct shmpghdr *)infopage)->pg_lock);
		pidtids[mypidtid].pt_stat = ST_EXIT;
		PAGEUNLOCK(&((struct shmpghdr *)infopage)->pg_lock);
	}

	shmdt(pvminbox);
	if (shmctl(myshmbufid, IPC_RMID, (struct shmid_ds *)0) == -1)
		pvmlogperror("pvmendtask() shmctl RMID");
#ifdef USERECVSEMAPHORE
	if (semctl(mysemid, 0, IPC_RMID) == -1)
		pvmlogperror("pvmendtask() semctl RMID");
	mysemid = -1;
#endif
	pvminbox = 0;
	shmdt(pvmdinbox);
	pvmdinbox = 0;
	infopage = 0;
	pvmdoutbuf = 0;
	pidtids = 0;
	mypidtid = -1;

	peer_cleanup();

	pvmmytid = -1;
	if (pvmdsock != -1) {
		(void)close(pvmdsock);
		pvmdsock = -1;
	}

	/* XXX free rxfrag and rxlist */
#ifdef LOG
	fclose(logfp);
#endif

	return 0;
}


/*	check_for_exit()
*
*	If peer struct for task id marked as exited, check for messages
*	in heap from the task.  If none (borrowing no pages), detach.
*/

void
check_for_exit(src)
	int src;
{
	extern struct peer *peers;
	struct peer *pp;

	for (pp = peers->p_link; pp != peers; pp = pp->p_link)
		if (pp->p_tid == src) {
			if (pp->p_exited) {
				int i;
				int detach = 1;
				struct umbuf *up;

				for (i = 1; i < pvmmidhsiz; i++) {
					if ((up = pvmmidh[i].m_umb) && (up->ub_src == src)) {
						detach = 0;
						break;
					}
				}

				if (detach) {
					peer_detach(pp);
				}
			}
			break;
		}
}


/************************
 **  Libpvm Functions  **
 **                    **
 ************************/


int
pvm_getopt(what)
	int what;
{
	int rc = 0;
	int err = 0;
	int x;

	if (x = pvmtoplvl) {
		pvmtoplvl = 0;
		if (pvmmytid != -1 && TEV_DO_TRACE(TEV_GETOPT0)) {
			pvm_pkint(&what, 1, 1);
			TEV_FIN;
		}
	}

	switch (what) {
	case PvmRoute:
		rc = pvmrouteopt;
		break;

	case PvmDebugMask:
		rc = debugmask;
		break;

	case PvmAutoErr:
		rc = pvmautoerr;
		break;

	case PvmOutputTid:
		rc = pvmcouttid;
		break;

	case PvmOutputCode:
		rc = pvmcoutcod;
		break;

	case PvmTraceTid:
		rc = pvmctrctid;
		break;

	case PvmTraceCode:
		rc = pvmctrccod;
		break;

	case PvmFragSize:
		break;

	case PvmResvTids:
		rc = pvmrescode;
		break;

	case PvmSelfOutputTid:
		rc = pvmouttid;
		break;

	case PvmSelfOutputCode:
		rc = pvmoutcod;
		break;

	case PvmSelfTraceTid:
		rc = pvmtrctid;
		break;

	case PvmSelfTraceCode:
		rc = pvmtrccod;
		break;

	case PvmShowTids:
		rc = pvmshowtaskid;
		break;

	case PvmPollTime:
		rc = pvmpolltime;
		break;

	case PvmPollType:
		rc = pvmpolltype;
		break;

	default:
		err = 1;
		break;
	}

	if (x) {
		if (pvmmytid != -1 && TEV_DO_TRACE(TEV_GETOPT1)) {
			pvm_pkint(&rc, 1, 1);
			TEV_FIN;
		}
		pvmtoplvl = x;
	}

	if (err)
		return lpvmerr("pvm_getopt", PvmBadParam);
	return rc;
}


int
pvm_setopt(what, val)
	int what;
	int val;
{
	int rc = 0;
	int err = 0;
	int sbf, rbf;
	int x;
	char buf[16];

	if (x = pvmtoplvl) {
		pvmtoplvl = 0;
		if (pvmmytid != -1 && TEV_DO_TRACE(TEV_SETOPT0)) {
			pvm_pkint(&what, 1, 1);
			pvm_pkint(&val, 1, 1);
			TEV_FIN;
		}
	}

	switch (what) {
	case PvmRoute:
		switch (val) {
		case PvmDontRoute:
		case PvmAllowDirect:
		case PvmRouteDirect:
			rc = pvmrouteopt;
			pvmrouteopt = val;
			break;

		default:
			rc = PvmBadParam;
			err = 1;
			break;
		}
		break;

	case PvmDebugMask:
		rc = debugmask;
		debugmask = val;
		break;

	case PvmAutoErr:
		rc = pvmautoerr;
		pvmautoerr = val;
		break;

	case PvmOutputTid:
		if (val && val != pvmmytid
		&& (val != pvmouttid || pvmcoutcod != pvmoutcod)) {
			rc = PvmBadParam;
			err = 1;

		} else {
			rc = pvmcouttid;
			pvmcouttid = val;
		}
		break;

	case PvmOutputCode:
		if (pvmcouttid && pvmcouttid != pvmmytid && val != pvmoutcod) {
			rc = PvmBadParam;
			err = 1;

		} else {
			rc = pvmcoutcod;
			pvmcoutcod = val;
		}
		break;

	case PvmTraceTid:
		if (val && val != pvmmytid
		&& (val != pvmtrctid || pvmctrccod != pvmtrccod)) {
			rc = PvmBadParam;
			err = 1;

		} else {
			rc = pvmctrctid;
			pvmctrctid = val;
		}
		break;

	case PvmTraceCode:
		if (pvmctrctid && pvmctrctid != pvmmytid && val != pvmtrccod) {
			rc = PvmBadParam;
			err = 1;

		} else {
			rc = pvmctrccod;
			pvmctrccod = val;
		}
		break;

	case PvmFragSize:
/*
		if (val < MAXHDR + 4 || val > 1048576) {
*/
			rc = PvmBadParam;
			err = 1;
/*
		} else {
			rc = pvmfrgsiz;
			pvmfrgsiz = val;
		}
*/
		break;

	case PvmResvTids:
		rc = pvmrescode;
		pvmrescode = val;
		break;

	case PvmSelfOutputTid:
		sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		rbf = pvm_setrbuf(0);
		what = TS_OUTTID;
		pvm_pkint(&what, 1, 1);
		sprintf(buf, "%x", 0xffffffff & val);
		pvm_pkstr(buf);
		if ((rc = msendrecv(TIDPVMD, TM_SETOPT)) > 0) {
			pvm_freebuf(pvm_setrbuf(rbf));
			rc = pvmouttid;
			pvmouttid = val;
			pvmcouttid = pvmouttid;
			pvmcoutcod = pvmoutcod;

		} else {
			pvm_setrbuf(rbf);
			err = 1;
		}
		pvm_freebuf(pvm_setsbuf(sbf));
		break;

	case PvmSelfOutputCode:
		sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		rbf = pvm_setrbuf(0);
		what = TS_OUTCOD;
		pvm_pkint(&what, 1, 1);
		sprintf(buf, "%x", 0xffffffff & val);
		pvm_pkstr(buf);
		if ((rc = msendrecv(TIDPVMD, TM_SETOPT)) > 0) {
			pvm_freebuf(pvm_setrbuf(rbf));
			rc = pvmoutcod;
			pvmoutcod = val;
			pvmcouttid = pvmouttid;
			pvmcoutcod = pvmoutcod;

		} else {
			pvm_setrbuf(rbf);
			err = 1;
		}
		pvm_freebuf(pvm_setsbuf(sbf));
		break;

	case PvmSelfTraceTid:
		sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		rbf = pvm_setrbuf(0);
		what = TS_TRCTID;
		pvm_pkint(&what, 1, 1);
		sprintf(buf, "%x", 0xffffffff & val);
		pvm_pkstr(buf);
		if ((rc = msendrecv(TIDPVMD, TM_SETOPT)) > 0) {
			pvm_freebuf(pvm_setrbuf(rbf));
			rc = pvmtrctid;
			pvmtrctid = val;
			pvmctrctid = pvmtrctid;
			pvmctrccod = pvmtrccod;

		} else {
			pvm_setrbuf(rbf);
			err = 1;
		}
		pvm_freebuf(pvm_setsbuf(sbf));
		break;

	case PvmSelfTraceCode:
		sbf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
		rbf = pvm_setrbuf(0);
		what = TS_TRCCOD;
		pvm_pkint(&what, 1, 1);
		sprintf(buf, "%x", 0xffffffff & val);
		pvm_pkstr(buf);
		if ((rc = msendrecv(TIDPVMD, TM_SETOPT)) > 0) {
			pvm_freebuf(pvm_setrbuf(rbf));
			rc = pvmtrccod;
			pvmtrccod = val;
			pvmctrctid = pvmtrctid;
			pvmctrccod = pvmtrccod;

		} else {
			pvm_setrbuf(rbf);
			err = 1;
		}
		pvm_freebuf(pvm_setsbuf(sbf));
		break;

	case PvmShowTids:
		rc = pvmshowtaskid;
		pvmshowtaskid = val;
		break;

	case PvmPollTime:
		rc = pvmpolltime;
		if (val < 0) {
			rc = PvmBadParam;
			err = 1;
			break;
		}
		pvmpolltime = val;
		break;

	case PvmPollType:
		rc = pvmpolltype;
		if ((val != PvmPollConstant) && (val != PvmPollSleep)) {
			rc = PvmBadParam;
			err = 1;
			break;
		}
		pvmpolltype = val;
		break;

	default:
		rc = PvmBadParam;
		err = 1;
		break;
	}

	if (x) {
		if (pvmmytid != -1 && TEV_DO_TRACE(TEV_SETOPT1)) {
			pvm_pkint(&rc, 1, 1);
			TEV_FIN;
		}
		pvmtoplvl = x;
	}

	if (err)
		return lpvmerr("pvm_setopt", rc);
	return rc;
}


int
pvm_perror(s)
	char *s;
{
	if (pvmmytid == -1)
		fprintf(stderr, "libpvm [pid%d]: ", pvmmyupid);
	else
		fprintf(stderr, "libpvm [t%x]: ", pvmmytid);
	fprintf(stderr, "%s: %s\n",
		(s ? s : "(null)"),
		(pvm_errno <= 0 && pvm_errno > -pvm_nerr
				? pvm_errlist[-pvm_errno] : "Unknown Error"));
	return 0;
}


int
pvm_getfds(fds)		/* XXX this function kinda sucks */
	int **fds;			/* fd list return */
{
	int cc;

	cc = PvmNotImpl;
	return (cc < 0 ? lpvmerr("pvm_getfds", cc) : cc);
}


int
pvm_start_pvmd(argc, argv, block)
	int argc;		/* number of args to pass to pvmd (>= 0) */
	char **argv;	/* args for pvmd or null */
	int block;		/* if true, don't return until add hosts are started */
{
	char *sfn;
	struct stat sb;
	int cc;
	char *fn;			/* file to exec */
	char **av;
	int pfd[2];
	int n;
	FILE *ff;
	char buf[128];
	int x;

	if (x = pvmtoplvl) {
		pvmtoplvl = 0;
		if (pvmmytid != -1 && TEV_DO_TRACE(TEV_START_PVMD0)) {
			pvm_pkint(&argc, 1, 1);
			pvm_pkint(&block, 1, 1);
			for (cc = 0; cc < argc; cc++)
				pvm_pkstr(argv[cc]);
			TEV_FIN;
		}
	}

	if (argc < 0 || !argv)
		argc = 0;

	if ((pvm_useruid = getuid()) == -1) {
		pvmlogerror("can't getuid()\n");
		cc = PvmSysErr;
		goto bail;
	}

	if (!(sfn = pvmdsockfile())) {
		pvmlogerror("pvm_start_pvmd() pvmdsockfile() failed\n");
		cc = PvmSysErr;
		goto bail;
	}

	if (stat(sfn, &sb) != -1) {
		cc = PvmDupHost;
		goto bail;
	}

#ifdef	IMA_TITN
	if (socketpair(AF_UNIX, SOCK_STREAM, 0, pfd) == -1)
#else
	if (pipe(pfd) == -1)
#endif
	{
		cc = PvmSysErr;
		goto bail;
	}

	fn = pvmgetpvmd();

	av = TALLOC(argc + 2, char *, "argv");
	if (argc > 0)
		BCOPY((char *)&argv[0], (char *)&av[1], argc * sizeof(char*));
	av[0] = fn;
	av[argc + 1] = 0;

	if (!fork()) {
		(void)close(pfd[0]);
	/* fork again so the pvmd is not the child - won't have to wait() for it */
		if (!fork()) {
			if (pfd[1] != 1)
				dup2(pfd[1], 1);
			for (n = getdtablesize(); n-- > 0; )
				if (n != 1)
					(void)close(n);
			(void)open("/dev/null", O_RDONLY, 0);	/* should be 0 */
			(void)open("/dev/null", O_WRONLY, 0);	/* should be 2 */
			(void)signal(SIGINT, SIG_IGN);
			(void)signal(SIGQUIT, SIG_IGN);
			(void)signal(SIGTSTP, SIG_IGN);
			execvp(av[0], av);
		}
		_exit(0);
	}
	(void)close(pfd[1]);
	(void)wait(0);
	PVM_FREE(av);

	if (!(ff = fdopen(pfd[0], "r"))) {
		cc = PvmSysErr;
		(void)close(pfd[0]);
		goto bail;
	}

	strcpy(buf, "PVMSOCK=");
	n = strlen(buf);
	if (!fgets(buf + n, sizeof(buf) - n - 1, ff)) {
		cc = PvmCantStart;
		fclose(ff);
		goto bail;
	}
	fclose(ff);
	if (strlen(buf + n) < 2) {
		cc = PvmCantStart;
		goto bail;
	}
	n = strlen(buf);
	if (buf[n - 1] == '\n')
		buf[n - 1] = 0;
	pvmputenv(STRALLOC(buf));
/*
	fprintf(stderr, "pvm_start_pvmd: %s\n", buf);
*/

	if (cc = BEATASK)
		goto bail;

	if (block) {
		struct pvmhostinfo *hip;
		int t = 1;

		pvm_config((int*)0, (int*)0, &hip);
		while ((cc = pvm_addhosts(&hip[0].hi_name, 1, (int*)0)) == PvmAlready) {
			sleep(t);
			if (t < 8)
				t *= 2;
		}
		if (cc != PvmDupHost)
			goto bail;
		cc = BEATASK;
	}

bail:

	if (x) {
		if (TEV_DO_TRACE(TEV_START_PVMD1)) {
			pvm_pkint(&cc, 1, 1);
			TEV_FIN;
		}
		pvmtoplvl = x;
	}

	return (cc < 0 ? lpvmerr("pvm_start_pvmd", cc) : 0);
}


/*	tev_begin()
*
*	Start a trace event.  Initialize a message, write time and event kind.
*/

int
tev_begin(kind)
	int kind;
{
	struct timeval now;

	gettimeofday(&now, (struct timezone *)0);
	pvmtrcmid = pvm_mkbuf(PvmDataFoo);
	pvmtrcsbf = pvm_setsbuf(pvmtrcmid);
	pvm_pkint((int *)&now.tv_sec, 1, 1);
	pvm_pkint((int *)&now.tv_usec, 1, 1);
	pvm_pkint(&pvmmytid, 1, 1);
	pvm_pkint(&kind, 1, 1);
/*
	fprintf(stderr, "tev_begin() mid %d\n", pvmtrcmid);
*/
	return 1;
}


/*	tev_fin()
*
*	End a trace event.  Send message.
*/

int
tev_fin()
{
	int routetmp;
	static struct timeval ztv = { 0, 0 };

/*
	fprintf(stderr, "tev_fin() mid %d\n", pvmtrcmid);
*/
	if (pvmmytid != pvmtrctid) {
		if ((routetmp = pvmrouteopt) == PvmRouteDirect)
			pvmrouteopt = PvmAllowDirect;
		mroute(pvmtrcmid,  pvmtrctid, pvmtrccod, &ztv);
		pvmrouteopt = routetmp;
	}
	pvm_setsbuf(pvmtrcsbf);
	pvmtrcsbf = 0;
	pvm_freebuf(pvmtrcmid);
	pvmtrcmid = 0;
	return 0;
}


/*	tev_do_trace()
*
*	Export TEV_DO_TRACE() so user code doesn't have to import
*	masks and trace tid, etc.
*/

int
tev_do_trace(kind)
	int kind;
{
	return TEV_DO_TRACE(kind);
}


int
pvm_precv(tid, tag, cp, len, dt, rtid, rtag, rlen)
	int tid;
	int tag;
	void *cp;
	int len;
	int dt;
	int *rtid;
	int *rtag;
	int *rlen;
{
	static int last_rbf = 0;

	int rbf;
	int cc = 0;
	int l;

	switch (dt) {

	case PVM_BYTE:
		len *= sizeof(char);
		break;

	case PVM_SHORT:
	case PVM_USHORT:
		len *= sizeof(short);
		break;

	case PVM_INT:
	case PVM_UINT:
		len *= sizeof(int);
		break;

	case PVM_LONG:
	case PVM_ULONG:
		len *= sizeof(long);
		break;

	case PVM_FLOAT:
		len *= sizeof(float);
		break;

	case PVM_CPLX:
		len *= sizeof(float) * 2;
		break;

	case PVM_DOUBLE:
		len *= sizeof(double);
		break;

	case PVM_DCPLX:
		len *= sizeof(double) * 2;
		break;

	case PVM_STR:
		cc = PvmNotImpl;
		break;

	default:
		cc = PvmBadParam;
		break;
	}

	if (!cc) {
		if (last_rbf > 0) {
			pvm_freebuf(last_rbf);
			last_rbf = 0;
		}
		rbf = pvm_setrbuf(0);
		cc = pvm_recv(tid, tag);
		if (cc > 0) {
			pvm_bufinfo(cc, &l, rtag, rtid);
			if (rlen)
				*rlen = l;
			if (l < len)
				len = l;
			pvm_upkbyte((char *)cp, len, 1);
			last_rbf = cc;
			cc = 0;
		}
		pvm_setrbuf(rbf);
	}
	if (cc < 0)
		lpvmerr("pvm_precv", cc);
	return cc;
}


/* find dynamic buffer */
char *
dynbuf(tid, len)
	int tid;
	int len;
{
	struct peer *pp;
	int fd;
	char fname[32];
	struct shmpghdr *ph;

	while (!(pp = peer_conn(tid, (int *)0)))
		;
/* XXX this doesn't check for peer_conn returning -1 */
	if (len > SHMBUFSIZE && len > pp->p_dlen && pp->p_dbuf) {
		munmap((caddr_t)pp->p_dbuf, SHMBUFSIZE);
		pp->p_dbuf = 0;
	}

	if (!(ph = (struct shmpghdr *)pp->p_dbuf)) {
		sprintf(fname, PVMSHMFILE, tid);
		if ((fd = open(fname, O_CREAT|O_RDWR, 0600)) == -1 ||
		(pp->p_dbuf = (char *)mmap(0, max(len,SHMBUFSIZE), PROT_READ|PROT_WRITE,
#if defined(IMA_SGIMP) || defined(IMA_SGIMP64)
		MAP_SHARED|MAP_AUTOGROW, fd, 0)) == (char *)-1L)
#else
		MAP_SHARED, fd, 0)) == (char *)-1)
#endif
		{
			pvmlogperror(fname);
			return (char *)-1L;
		}
#ifdef IMA_SUNMP
		/* fill buffer with 0's */
		lseek(fd, len - 1, SEEK_SET);
		write(fd, fname, 1);
#endif
		close(fd);
		pp->p_dlen = len;
	} else if (ph->pg_ref) {	/* previous msg has not been recv'd */
		while (ph->pg_ref)
			if (peer_wait() == -1)
				return (char *)-1L;
#ifdef IMA_SUNMP
	} else if (pp->p_dlen < len) {
		if ((fd = open(fname, O_CREAT|O_RDWR, 0600)) == -1) {
			pvmlogperror(fname);
			return (char *)-1L;
		}
		lseek(fd, len - 1, SEEK_SET);
		write(fd, fname, 1);
		close(fd);
		pp->p_dlen = len;
#endif
	}

	return pp->p_dbuf;
}

int
pvm_psend(tid, tag, cp, len, dt)
	int tid;
	int tag;
	void *cp;
	int len;
	int dt;
{
	int sbf;
	int cc = 0;

	switch (dt) {

	case PVM_BYTE:
		len *= sizeof(char);
		break;

	case PVM_SHORT:
	case PVM_USHORT:
		len *= sizeof(short);
		break;

	case PVM_INT:
	case PVM_UINT:
		len *= sizeof(int);
		break;

	case PVM_LONG:
	case PVM_ULONG:
		len *= sizeof(long);
		break;

	case PVM_FLOAT:
		len *= sizeof(float);
		break;

	case PVM_CPLX:
		len *= sizeof(float) * 2;
		break;

	case PVM_DOUBLE:
		len *= sizeof(double);
		break;

	case PVM_DCPLX:
		len *= sizeof(double) * 2;
		break;

	case PVM_STR:
		cc = PvmNotImpl;
		break;

	default:
		cc = PvmBadParam;
		break;
	}

	if (!cc) {
#if 0
		if ((tid & pvmtidhmask) == (pvmmytid & pvmtidhmask)
		&& (tid & ~pvmtidhmask) != TIDPVMD) {		/* to local task */
			char *dbuf;

			len += sizeof(struct shmpghdr);
			if ((dbuf = dynbuf(tid, len)) != (char *)-1L) {
				BCOPY(cp, dbuf, len);

			} else
				cc = PvmSysErr;

		} else
#endif /*0*/
		{
			sbf = pvm_setsbuf(pvm_mkbuf(PvmDataRaw));
			pvm_pkbyte((char *)cp, len, 1);
			if ((cc = pvm_send(tid, tag)) > 0)
				cc = 0;
			pvm_freebuf(pvm_setsbuf(sbf));
		}
	}
	if (cc < 0)
		lpvmerr("pvm_psend", cc);
	return cc;
}

