#!/usr/bin/env python
#
# dsh v0.1.3 (c) Andre Fachat
# distributed under GPL (this is too small to include a copy, go to
# www.gnu.org to get a copy or refer to your favorite GNU program for the
# file COPYING)
#
# This handy script uses the dshd daemon to find the currently least
# loaded machine in a cluster. It then distributes the command given 
# to this machine (via rsh or ssh). The directory where dsh is started must 
# be at the same place on the remote machine. 
# To avoid NFS problems a temporary file is created by the local
# process and the remote process waits for it to exist
# (needs the "waitfile" shell script). After completion the remote
# process removes the file and exits.
# The local process waits for the child to terminate and then waits
# for the temp file to disappear, to be sure all NFS stuff has been done.
#
# possible improvements:
# - catch SIGINT and send to remote process
# - own cmdline options for verbosity (print remote host name) etc
#

# 
# Defines
#

#stport=8181	# 8181 gives a list of machines (one each line) with 
		# respective load and mem values
stport=8282	# 8282 gives one line with the best machine according to dshd
		# all report lines have the same format, so we don't really
		# care about the port. 
shell="rsh"	# shell to use for remote execution

rmhelper="dshexec" 
		# takes tmpfname and command as parameters:
 		# waits for the file to exist, 
		# execs command and removes the file
		# then returns right return value

# This can be changed as you like it best.
# It's only useful, however if you use port 8181 where you get the list
# of the cluster. If you only get one (the best) machine from port 8282
# it always takes this one.
def eval(host):
        v = 1.0/(float(host.load) + 0.1)
        # if low on memory, add some virtual load
        if ( host.mem ) < 1000000 :
                v = v + 5.0
        return v

#
# Code follows 
#

import string
import socket
import time
import sys
import posix
import os
import tempfile
import glob

localname=socket.gethostbyaddr(socket.gethostname())[0]

if len(sys.argv) < 2 or sys.argv[1]=="-?" or sys.argv[1]=="-h" or sys.argv[1]=="--help":
	print "Usage: dsh command [command args]"
	print " dsh distributes the command to a least loaded system"
	print " according to the information given by dshd"
	sys.exit(0)

class Host:
	name=localname
	load=1000.0
	mem=0.0
	eval=0

# one could think of making this a unix socket, for security purposes
stsock=socket.socket(socket.AF_INET, socket.SOCK_STREAM, 6)
local = 0
try:
	stsock.connect(("", stport))
except socket.error:
	local = 1

if not local :
	fd = stsock.makefile("r")
	stsock.close()
	list=fd.readlines()
	fd.close()		

	best = Host()
	for i in list:
		x = Host()
		ll=string.split(i)
		x.name=ll[0]
		x.load=ll[1]
		x.mem=ll[2]
		x.eval=eval(x)
		
		if x.eval > best.eval:
			best = x
			
	if best.name == localname:
		local = 1

# print "local=",local,", host=",best.name,", best=",best.eval

if not local:
	# create temporary lock file to work around NFS latency
	# remote process waits for this file to exist
	# -> all other files should exist on the NFS server / remote
	# machine  as well
	tempfile.tempdir=posix.getcwd()
	tmpfname=tempfile.mktemp()
	# print "tmpfname=",tmpfname
	posix.system(string.join(("touch ",tmpfname)))
 
	rargs=[shell, best.name, "(cd", posix.getcwd(), ";",
		rmhelper, tmpfname ]
	for i in range(1,len(sys.argv)):
		rargs.append(sys.argv[i])
	rargs.append(")")

	# print "args=", rargs

	# sys.stderr.write("host: " + eval.name + "\n")

	pid=posix.fork()

	if pid == 0:
		os.execvp(shell, rargs)		# exec and exit
		sys.exit(-1)

	retval=posix.wait()	

	# Now wait till the lock file does not exist anymore
	# use shell globbing to see if the file still exists :-)
	while len(glob.glob(tmpfname)) > 0:
		pass

	sys.exit(retval[1])

else:
	# sys.stderr.write("Local host\n")
	os.execvp(sys.argv[1], sys.argv[1:])

	

