/*
 * routines to build HeNCE programs
 *
 * $Id: build.c,v 1.2 1994/06/11 20:36:31 moore Exp $
 *
 * $Log: build.c,v $
 * Revision 1.2  1994/06/11  20:36:31  moore
 * make temp file name (session_key) longer to cope with AIX's larger pids
 * when checking for shared file systems, call proxy_copy_lock_file
 *   instead of proxy_copy_file
 * tighten up use of per_host[h].been_here to make sure that uninitialized
 *   values don't count as 'already been here'
 * get rid of some spurious debugging printfs
 * add some more messages to the pvm_sink window to make things (hopefully)
 *   less confusing
 *
 * Revision 1.1  1994/02/17  20:18:30  moore
 * Initial revision
 *
 */

#include <stdio.h>

#include <X11/Intrinsic.h>

#include <Errno.h>
#include <Malloc.h>
#include <Stat.h>
#include <String.h>

#include "arches.h"
#include "build.h"
#include "config.h"
#include "costmat.h"
#include "global.h"
#include "mode.h"
#include "proxy.h"
#include "pvmglue.h"


/*
 * XXX to do
 *
 * [ ] check for presence of {std,rb,dlist,hence,htypes}.h
 *    (probably in $PVM_ROOT/include)
 * [ ] check for presence of lib{xxx}.a
 *    (probably in $PVM_ROOT/lib/$PVM_ARCH)
 * [ ] try to compile each file on each machine; don't give up
 *     after just one.
 */

static void
set_cell_color (h, s, color)
int h, s, color;
{
    if (cm_SetColor (global.costMatrix, h, s, color))
	config_repaint_cells (h, s);
}

/*
 * per-host info (for this layer)
 *
 * + architecture (so we know how to make wrappers)
 */

struct per_host {
    char *arch;			/* architecture */
    int been_here;		/* flag sez we've already scribbled here */
} *per_host;

/*
 * per-subr info (for this layer)
 *
 * + filename of subr source file
 */

struct per_subr {
    char *filename;
} *per_subr;

static char session_key[100];

#define HOSTNAME(h) (global.costMatrix->hosts[h])
#define SUBRNAME(s) (global.costMatrix->subs[s])

/*
 * allocate the local state info that we need while building a HeNCE
 * program
 */

static void
alloc_local_state (cm)
struct cost_matrix *cm;
{
    int s, h;

    per_subr = TALLOC (cm->nsub, struct per_subr);
    for (s = 0; s < cm->nsub; ++s) {
	per_subr[s].filename = NULL;
    }

    per_host = TALLOC (cm->nhost, struct per_host);
    for (h = 0; h < cm->nhost; ++h) {
	per_host[h].arch = NULL;
	per_host[h].been_here = -1;
    }
    sprintf (session_key, ".htool.%d.%d", getpid (), time (0));
}

/*
 * free the local state info that was used while we were building
 * a HeNCE program
 */

static void
free_local_state (cm)
struct cost_matrix *cm;
{
    int s, h;
    struct queue *q, *nextq;

    if (per_subr) {
	for (s = 0 ; s < cm->nsub; ++s) {
	    if (per_subr[s].filename != NULL)
		FREE (per_subr[s].filename);
	}
	FREE (per_subr);
    }
    per_subr = NULL;
    
    if (per_host) {
	for (h = 0; h < cm->nhost; ++h) {
	    if (per_host[h].arch)
		FREE (per_host[h].arch);
	}
	FREE (per_host);
    }
    per_host = NULL;
    *session_key = '\0';
}

/*
 * make sure all required source files are present.  Remember their
 * filenames, sizes and mod times.
 *
 * XXX maybe dynamically determine whether a particular source file
 * is C or FORTRAN.
 */

static int
check_source_files (cm, language)
struct cost_matrix *cm;
int language;
{
    int errs = 0;
    int s;

    for (s = 0; s < cm->nsub; ++s) {
	char sourcefile[1024];
	struct stat buf;

	if (cm->subs[s] == NULL || cm->subs[s][0] == '\0')
	    continue;
	if (strcmp (cm->subs[s], "null") == 0) /* ARRRGGH! */
	    continue;
	sprintf (sourcefile, "%s.%s", cm->subs[s],
		 language == LANG_C ? "c" : "f");
	if (stat (sourcefile, &buf) < 0) {
	    msg_Format ("can't stat \"%s\": %s\n",
			sourcefile, strerror (errno));
	    ++errs;
	}
	else
	    per_subr[s].filename = STRDUP (sourcefile);
    }
    return -errs;
}


/*
 * copy the source file for 'subrnum' to the indicated host
 * return 0 on success, -1 on error.
 */

static int
copy_file (hostnum, subrnum)
int hostnum;
int subrnum;
{
    int x;

#if 0
    msg_Format ("copying %s to %s\n", per_subr[subrnum].filename,
		HOSTNAME(hostnum));
#endif

    x = proxy_copy_file (per_subr[subrnum].filename, /* local file name */
			 hostnum,
			 per_subr[subrnum].filename); /* remote filename */
    if (x < 0) {
	if (x == pFileExistsErr) {
#if 0
	    fprintf (stderr, "%s:%s already exists\n", HOSTNAME(hostnum),
		     per_subr[subrnum].filename);
#endif
	    msg_Format ("%s:%s already exists\n", HOSTNAME(hostnum),
			per_subr[subrnum].filename);
	    return 0;
	}
	msg_Format ("copy of %s to %s failed: %s\n",
		    per_subr[subrnum].filename,
		    HOSTNAME(hostnum),
		    proxy_strerror (x));
	set_cell_color (hostnum, subrnum, COLOR_BROKEN);
    }
    return x;
}

static char *
wrapper_name (subrnum, language)
int subrnum;
int language;
{
    static char tempfile[1024];

    sprintf (tempfile, "%sw_%s.c",
	     language == LANG_C ? "c" : "f",
	     global.costMatrix->subs[subrnum]);
    return tempfile;
}

/*
 * copy the wrapper source file for 'subrnum' to the indicated host
 * return 0 on success, -1 on error.
 *
 * checks modtime and size before copying, in case we already have
 * a current copy there.
 */

static int
copy_wrapper (hostnum, subrnum, language)
int hostnum;
int subrnum;
{
    /*
     * 1. generate a wrapper for this host's architecture.
     * 2. copy it to the remote host.
     *
     * XXX this is lame...shouldn't generate a wrapper more than
     * once for any particular architecture, but for now we
     * generate a wrapper separately for each subr on each host.
     */

    char tempfile[1024];
    int x;

    strcpy (tempfile, wrapper_name (subrnum, language));
    if (subdefs_write_wrapper (tempfile,
			       global.costMatrix->subs[subrnum],
			       language,
			       per_host[hostnum].arch) < 0) {
	msg_Format ("write wrapper (%s,%s,%s,%s) failed\n",
		    wrapper_name (subrnum, language),
		    global.costMatrix->subs[subrnum],
		    language == LANG_C ? "c" : "FORTRAN",
		    per_host[hostnum].arch);
	set_cell_color (hostnum, subrnum, COLOR_BROKEN);
	unlink (tempfile);
	return -1;
    }

    /*
     * copy this to the architecture-specific subdirectory
     * of the remote system
     */
    if ((x = proxy_copy_file (tempfile, hostnum, tempfile)) < 0) {
	msg_Format ("copy of %s to %s failed: %s",
		    tempfile, HOSTNAME(hostnum),
		    proxy_strerror (x));
    }
    unlink (tempfile);
    return x;
}

static int
copy_fmain (hostnum)
int hostnum;
{
    char tempfile[1024];
    char *fmain = "fmain.f";
    FILE *fp;
    int x;

    strcpy (tempfile, "fmain.f");
    if ((fp = fopen (tempfile, "w")) == NULL) {
	msg_Format ("can't create file \"%s\": %s\n", tempfile,
		    strerror (errno));
	return -1;
    }
    fprintf (fp, "C\tDummy FORTRAN main for HeNCE subroutine\n");
    fprintf (fp, "\tcall fslave\n");
    fprintf (fp, "\tstop\n");
    fprintf (fp, "\tend\n");
    fclose (fp);

    if ((x = proxy_copy_file (tempfile, hostnum, fmain)) < 0) {
	msg_Format ("copy of %s to %s failed: %s",
		    tempfile, HOSTNAME(hostnum),
		    proxy_strerror (x));
    }
    unlink (tempfile);
    return x;
}


static int
been_here (h, session_key)
int h;
char *session_key;
{
    int x;

    if (per_host[h].been_here < 0) {
	x = proxy_copy_lock_file (session_key, h, session_key);
	per_host[h].been_here = (x == pFileExistsErr);
	/*
	 * XXX if we've already been here, how to mark in the cost matrix?
	 * the result isn't necessarily successful, it depends on how
	 * the 'master' machine fared.  Need to figure out which is
	 * the master machine.
	 *
	 * But for now, simply mark it as done.
	 */
	if (per_host[h].been_here == 1) {
	    set_cell_color (h, -1, COLOR_DONE);
#if 1
	    msg_Format ("Build: not building on %s: it shares files with another machine.\n", HOSTNAME(h));
#endif
	}
    }
    return per_host[h].been_here == 1;
}

static int
queue_cmd (cmd, h, s)
char *cmd;
int h, s;
{
    char buf[200];
    int x;

    sprintf (buf, cmd, s);
    if ((x = proxy_queue_cmd (h, s, buf)) < 0) {
	msg_Format ("Error: can't queue cmd \"%s\" on %s: %s\n",
		    buf, HOSTNAME(h), proxy_strerror (x));
	set_cell_color (h, s, COLOR_BROKEN);
    }
    return x;
}

static int
queue_compile_cmd (h, s, language, debug)
int h, s;
int language;
int debug;
{
    char *cmd;

    cmd = arch_get_compile_cmd (per_host[h].arch,
				language,
				SUBRNAME(s),
				per_subr[s].filename,
				wrapper_name (s, language), debug);
#if 0
    fprintf (stderr, "queueing %s on %s\n", cmd, HOSTNAME(h));
#endif
    return proxy_queue_cmd (h, s, cmd);
}

static int
queue_install_cmd (h, s)
{
    int x;
    char buf[1024];

#if 1
    sprintf (buf, "rm -f %s/%s",
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s));
    if ((x = proxy_queue_cmd (h, s, buf)) < 0)
	return x;
    sprintf (buf, "cp %s %s/%s",
	     SUBRNAME(s),
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s));
    if ((x = proxy_queue_cmd (h, s, buf)) < 0)
	return x;
#else
    /*
     * this is supposed to work around a bug in some UNIXes
     * that won't let you rm a file that's being executed...
     * but it doesn't work here, since the mv may fail.
     * (and we shouldn't be executing this file anyway.)
     */
    sprintf (buf, "mv %s/%s %s/%s.old",
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s),
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s));
    if ((x = proxy_queue_cmd (h, s, buf)) < 0)
	return x;
    sprintf (buf, "cp %s %s/%s",
	     SUBRNAME(s),
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s));
    if ((x = proxy_queue_cmd (h, s, buf)) < 0)
	return x;
    sprintf (buf, "rm -f %s/%s.old",
	     "$(HOME)/pvm3/bin/$(PVM_ARCH)",
	     SUBRNAME(s));
    if ((x = proxy_queue_cmd (h, s, buf)) < 0)
	return x;
#endif
    return 0;
}

void
build_Everything (w, cli, cd)
Widget w;
XtPointer cli;
XtPointer cd;
{
    struct cost_matrix *cm = global.costMatrix;
    int h, s;
    int fail = 0;
    int x;
    FILE *fp;
    char *reason;
    int debug = (int) cli;

#if 0
    printf ("build_Everything: cli=%d, cd=%d\n", cli, cd);
#endif
    /*
     * XXX make sure:
     * + no windows are open,
     * + we aren't running or tracing a HeNCE program
     */

    /*
     * set build mode  (i.e. don't allow any changes to
     * graph or cost matrix while we are doing this).
     * (do this before configuring machine, and before
     * checking the graph)
     */
    set_mode (MODE_BUILD);

    msg_Format ("Starting build...\n");
    ts_append (global.pvm_sink, "(starting build)\n");

    /*
     * make sure graph is valid
     */
    if (global.graph == NULL || gr_Empty (global.graph)) {
	msg_Format ("Build failed: empty graph\n");
	fail = 1;
	goto abort2;
    }
    if (graph_critic (global.graph) != 0) {
	msg_Format ("Build failed: invalid HeNCE graph\n");
	fail = 1;
	goto abort2;
    }
    if (subdefs_check_graph (global.graph, defaults.subDefsFile) < 0) {
	msg_Format ("Build failed: errors in graph\n");
	fail = 1;
	goto abort2;
    }

    /*
     * make sure there's a valid cost matrix (before starting pvm)
     */
    if (cm_IsEmpty (global.costMatrix)) {
#if 0
	msg_Format ("Build failed: no cost matrix\n");
	fail = 1;
	goto abort2;
#else
	msg_Format ("Build: setting up a default cost matrix\n");
	if (global.costMatrix == NULL)
	    global.costMatrix = cm_New ();
	cm_BuildDefaultCostMatrix (global.graph, global.costMatrix);
	newmatrices (global.costMatrix);
#endif
    }
    /*
     * make sure pvm is running (before configuring hosts)
     */
    if (pvmglue_start_pvm () < 0) {
	msg_Format ("Build failed: can't start pvm\n");
	fail = 1;
	goto abort2;
    }

    /*
     * clear cost matrix colors
     */
    for (h = -1; h < global.costMatrix->nhost; ++h) {
	for (s = -1; s < global.costMatrix->nsub; ++s) {
	    if (h == -1 && s == -1)
		continue;
	    cm_SetColor (global.costMatrix, h, s, COLOR_NORMAL);
	}
    }
    config_repaint_cells (-1, -1);

    /*
     * make sure each subr in the graph is in the cost matrix
     * and has at least one host assigned to it; configure
     * required hosts
     */
    if (cm_AddHostsToPvm (global.graph, global.costMatrix) != 0) {
	msg_Format ("Build failed: can't configure pvm\n");
	config_repaint_cells (-1, -1);
	fail = 1;
	goto abort2;
    }

    /*
     * initialize per-host and per-subr state
     */
    alloc_local_state (global.costMatrix);

    /*
     * create an empty file containing the session key.  Later on
     * we will attempt to copy it to each system to see whether
     * we've already scribbled files on that system (e.g. if
     * it shares a common file systems with other systems)
     */
    if ((fp = fopen (session_key, "w")) != NULL)
	fclose (fp);
    
    /*
     * for each host, get its architecture
     */
    for (h = 0; h < global.costMatrix->nhost; ++h) {
	if (HOSTNAME(h) == NULL || *HOSTNAME(h) == '\0')
	    continue;
	if ((per_host[h].arch = pvmglue_get_arch (HOSTNAME(h))) == NULL) {
	    msg_Format ("Build error: can't get architecture for %s\n",
			HOSTNAME(h));
	    ++fail;
	}
	else
	    per_host[h].arch = STRDUP (per_host[h].arch);
    }
    if (fail > 0) {
	msg_Format ("Build failed: can't get architectures\n");
	goto abort2;
    }

    /*
     * make sure source files exist
     * XXX maybe remember mod times.
     */
    if (check_source_files (global.costMatrix, global.language) < 0) {
	msg_Format ("Build failed: missing source files\n");
	fail = 1;
	goto abort2;
    }

    /*
     * for each host {
     *     start proxyd
     *	   check proxyd version
     * }
     */
    if ((x = proxy_init (global.costMatrix->nhost)) < 0) {
	msg_Format ("Build failed: proxy_init: %s\n",
		    proxy_strerror (x));
	fail = 1;
	goto abort2;
    }
    for (h = 0; h < global.costMatrix->nhost; ++h) {
	if (HOSTNAME(h) == NULL || *HOSTNAME(h) == '\0')
	    continue;
	if ((x = proxy_start_server (h)) < 0) {
	    ++fail;
	    set_cell_color (h, -1, COLOR_BROKEN);
	    msg_Format ("Error starting proxyd on %s: %s\n",
			HOSTNAME(h), proxy_strerror (x));
	    continue;
	}
	else if ((x = proxy_check_server_version (h)) < 0) {
	    ++fail;
	    set_cell_color (h, -1, COLOR_BROKEN);
	    msg_Format ("Error checking proxyd version on %s: %s\n",
			HOSTNAME(h), proxy_strerror (x));
	    continue;
	    
	}
    }
    if (fail > 0) {
	msg_Format ("Build failed: can't start proxyd servers\n");
	fail = 1;
	goto abort;
    }

    /*
     * for each host {
     *     if we've already scribbled on this file system
     *         continue
     *     else {
     *         for each subr used by each host {
     *             copy source file to host
     *             generate and copy wrapper for that host
     *         }
     *     }
     * }
     *
     * (above stuff must be done serially, because the filesystems
     * might be shared; so we can detect whether a file is already
     * up-to-date.  Also, write a special token in each host so
     * we can tell if tho hosts are the same; if they are; only compile
     * on one of them.)
     */

    for (h = 0; h < global.costMatrix->nhost; ++h) {
	if (HOSTNAME(h) == NULL || *HOSTNAME(h) == '\0')
	    continue;
	/*
	 * if we're already scribbled here, skip to next host
	 *
	 * XXX bug...if machine share file systems, and the first machine
	 * of a particular architecture doesn't include all of the programs,
	 * the ones not included on the first machine will never be compiled.
	 */
	if (been_here (h, session_key) == 1)
	    continue;

	if (global.language == LANG_FORTRAN) {
	    if (copy_fmain (h) < 0) {
		reason = "fmain.f copy failed";
		goto hostfail;
	    }
	}

	for (s = 0; s < global.costMatrix->nsub; ++s) {
	    if (global.costMatrix->subs[s] == NULL ||
		global.costMatrix->subs[s][0] == '\0' ||
		strcmp (global.costMatrix->subs[s], "null") == 0)
		continue;
	    if (global.costMatrix->mat[s + h * global.costMatrix->nsub] <= 0)
		continue;
	    set_cell_color (h, s, COLOR_IDLE);
	    if (copy_file (h, s) < 0) {
		reason = "file copy failed";
		goto hostfail;
	    }
	    if (copy_wrapper (h, s, global.language) < 0) {
		reason = "wrapper copy failed";
		goto hostfail;
	    }
	    if (queue_compile_cmd (h, s, global.language, debug) < 0) {
		reason = "queue_compile_cmd failed";
		goto hostfail;
	    }
	    if (queue_install_cmd (h, s) < 0) {
		reason = "queue_install_cmd failed";
		goto hostfail;
	    }
	}
	continue;

    hostfail:
	++fail;
	set_cell_color (h, s, COLOR_BROKEN);
    }
    if (fail > 0) {
	msg_Format ("Build failed: %s\n", reason);
	fail = 1;
	goto abort;
    }
    if (proxy_run_cmds () < 0) {
	msg_Format ("Build failed.\n");
	fail = 1;
	goto abort;
    }


    /* FALL THROUGH */

 abort:
    /*
     * nuke obj files and session_key file on each host.
     * clean up per-host and per-subr state
     * set compose mode
     */
    for (h = 0; h < global.costMatrix->nhost; ++h) {
	char buf[200];

	if (HOSTNAME(h) == NULL || *HOSTNAME(h) == '\0')
	    continue;
	sprintf (buf, "rm -f %s", session_key);
	queue_cmd (buf, h, s);
    }

    proxy_run_cmds ();

    for (h = 0; h < global.costMatrix->nhost; ++h) {
	if (HOSTNAME(h) == NULL || *HOSTNAME(h) == '\0')
	    continue;

	if ((x = proxy_shutdown_server (h)) < 0) {
	    msg_Format ("proxyd_shutdown error on %s: %s\n",
			HOSTNAME(h), proxy_strerror (x));
	}
    }

    /* FALL THROUGH */

 abort2:
    if (*session_key)
	unlink (session_key);
    free_local_state (global.costMatrix);


    if (fail) {
	ts_append (global.info_sink, "(build failed)\n");
	ts_append (global.pvm_sink, "(build failed)\n");
    }
    else {
	ts_append (global.pvm_sink, "(build complete)\n");
	ts_append (global.info_sink, "(build complete)\n");
    }
    set_mode (MODE_COMPOSE);
}

/*
 * for each subr in cost matrix (that has a host assigned to it),
 * write wrappers for the local arch and the current language
 *
 * XXX should we update the cost matrix from the graph, or warn
 * the user if any nodes are in the graph but aren't in the cost matrix?
 */

void
build_WriteWrappers (x)
void *x;
{
    int s, h;
    struct cost_matrix *cm = global.costMatrix;

    if (cm == NULL)
	return;
    if (subdefs_check_graph (global.graph, defaults.subDefsFile) > 0) {
	msg_Format ("Can't generate wrappers: errors in graph\n");
	return;
    }
    for (s = 0; s < cm->nsub; ++s) {
	int need_wrapper = 0;

	if (cm->subs[s] == NULL || *(cm->subs[s]) == '\0')
	    continue;
	if (strcmp (cm->subs[s], "null") == 0) /* AAARRRRGGGH! */
	    continue;

	/* see if any host uses this subr */
	for (h = 0; h < cm->nhost; ++h) {
	    if (cm->hosts[h] == NULL || *(cm->hosts[h]) == '\0')
		continue;
	    if (cm->mat[s + h * cm->nsub] > 0) {
		need_wrapper = 1;
		break;
	    }
	}
	if (need_wrapper) {
	    char filename[1024];

	    sprintf (filename, "%sw_%s.c",
		     global.language == LANG_C ? "c" : "f",
		     cm->subs[s]);
	    subdefs_write_wrapper (filename, cm->subs[s], global.language,
				   ARCHSTR);
	    msg_Format ("wrote %s\n", filename);
	}
    }
}
