#!/usr/bin/env python
#
# dshd v0.1.2 (c) Andre Fachat
# distributed under GPL (this is too small to include a copy, go to
# www.gnu.org to get a copy or refer to your favorite GNU program for the
# file COPYING)
#
# This daemon runs in the background on each computer in a cluster.
# The cluster is defined in the file etcname (see below) 
# The format is one machine per line with
#    machinename loadscale memscale
# where loadscale and memscale are multiplied with the respective 
# load and mem values before evaluation.
# The daemon sends its state information (load, mem) to all machines
# in the cluster. Then it tries to receive the information 
# from the other machines. If it does not receive a state info during
# maxloops loops it removed the machine from the list - it might be down.
# 
# Telnetting to stport gives the state info of the complete cluster
# Telnetting to dport gives the state of the best machine. The load
# of this machine is locally increased (extraload) to handle the latency 
# between starting and the new state info to be received.
#
# This is not particular an example of good programming.
# I am especially unexperienced with socket programming, so this might
# be improvable.
# Also there may be memory leaks that I did not find.
#
# Further possible improvements: 
# - cluster definition also by broadcast addresses
# - include memory value in evaluation
# - make evaluation function more flexible
#

#
# Defines
#
etcname="/usr/local/etc/dsh.cluster"	# Cluster definition
txport=8181				# UDP port sending own status
rxport=8282				# UDP port receiving remote status
stport=8181				# TCP port for status for all machines
dport=8282				# TCP port for name of best machine
waittime=1.5				# waiting time each loop in secs
maxloops=2				# how many loops a state packet
					# is allowed to be missing 
bufsize=10000

# This can be changed as you like it best.
def eval(host):
	v = 1.0/(float(host.load + host.extraload) * host.loadscale + 0.1)
	# if low on memory, add some virtual load
	if ( host.mem * host.memscale ) < 1000000 : 
		v = v + 5.0
	return v
	
#
# Code follows 
#

import string
import socket
import time
import getopt
import sys
import posix
import pprint

localname=socket.gethostbyaddr(socket.gethostname())[0]
background=0
printpid=0

try:
	optlist, args = getopt.getopt(sys.argv[1:], '?f:t:r:bp')
except getopt.error, val:
	sys.stderr.write("Unknown option " + pprint.pformat(val) + "\n")
	sys.exit(1)

for i in optlist:
	if i[0] == '-p':
		printpid=1
	if i[0] == '-b':
		background=1
	if i[0] == '-f':
		etcname=i[1]
	if i[0] == '-t':
		txport=string.atoi(i[1])
	if i[0] == '-r':
		rxport=string.atoi(i[1])
	if i[0] == '-?':
		print "Usage: dshd [-f clusterdesc] [-b] [-p] [-t txport] [-r rxport]"
		print "  -f filename    = location of cluster file"
		print "  -b             = fork to background"
		print "  -p             = print own pid on startup (only if background)"
		print "  -t txport      = use other port than ",txport," for full status"
		print "  -r rxport      = use other port than ",rxport," for best node report"
		sys.exit(1)


class Host:
	name=localname
	load=1000.0
	extraload=0.0
	loadscale=1.0
	mem=0
	memscale=1.0
	loops=0
	eval=0.0
	def hostline(self):
		return string.join([self.name, " ", str(self.loadscale * (self.load + self.extraload)), " ", str(self.memscale * self.mem) ])
	def realline(self):
		return string.join([self.name, " ", str(self.load), " ", str(self.mem) ])
	def printhost(self):
		print self.hostline()
	def doeval(self):
		self.eval = eval(self)
		return self.eval

try:
	file=open(etcname, "r")
except IOError, var:
	sys.stderr.write("Error opening file " + etcname + " : " + var.strerror + "\n")
	sys.exit(1)

flist=file.readlines()
file.close()

machines={}
for i in flist:
	x = Host()
	ll=string.split(i)
	if len(ll)>1:
		x.loadscale=string.atof(ll[1])
		if len(ll)>2:
			x.memscale=string.atof(ll[2])
	x.name = socket.gethostbyaddr(ll[0])[0]
	machines[x.name] = x


def getlocal(x):
	file=open("/proc/loadavg","r")
	line=file.readline()
	file.close()
	x.load=string.atof(string.split(line)[0])
	file=open("/proc/meminfo","r")
	line=file.readline()
	line=file.readline()
	mm=string.split(line)
	x.mem=string.atoi(mm[3])	# for now simply the free value
	return x

txsock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM, 17)
try:
	txsock.bind((localname, txport))
except socket.error, var:
	sys.stderr.write("UDP Socket error port " + repr(txport) + ": "  + var[1] + "\n")
	sys.exit(1)
txsock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
txsock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)

rxsock=socket.socket(socket.AF_INET, socket.SOCK_DGRAM, 17)
try:
	rxsock.bind((localname, rxport))
except socket.error, var:
	sys.stderr.write("UDP Socket error port " + repr(rxport) + ": "  + var[1] + "\n")
	sys.exit(1)
rxsock.setblocking(0)

# one could think of making this a unix socket, for security purposes
stsock=socket.socket(socket.AF_INET, socket.SOCK_STREAM, 6)
try:
	stsock.bind(("", stport))
except socket.error, var:
	sys.stderr.write("TCP Socket error port " + repr(stport) + ": "  + var[1] + "\n")
	sys.exit(1)
stsock.setblocking(0)
stsock.listen(5)

# one could think of making this a unix socket, for security purposes
dsock=socket.socket(socket.AF_INET, socket.SOCK_STREAM, 6)
try:
	dsock.bind(("", dport))
except socket.error:
	sys.stderr.write("TCP Socket error port " + repr(dport) + ": "  + var[1] + "\n")
	sys.exit(1)
dsock.setblocking(0)
dsock.listen(5)

# fork to go to background after all setup is done
if background:
	pid=posix.fork()
	if pid:
		if printpid:
			print pid
		sys.exit(0)

while 1:
	time.sleep(waittime)
	locinfo = getlocal(machines[localname])
	locstring = locinfo.realline()

	# fire and forget UDP packets with local info
	for i in machines.keys():
		x = machines[i]
		x.extraload = x.extraload * 0.8
		if i != localname :
			x.loops = x.loops + 1
			if x.loops > maxloops:
				x.mem = 0		# invalidate machine
				x.loops = 0
			try:
	 			txsock.sendto(locstring, (i,rxport))
				# print "Send '", locstring, "' to (",i, ",", rxport ,")"
			except socket.error:
				pass
				#print "Send failed"

	# receive UDP packets with state from other hosts
	fl=0
	while fl==0:
		try:
			rxval=rxsock.recvfrom(10000)
			rmname = socket.gethostbyaddr(rxval[1][0])[0]
			rmport = rxval[1][1]
			# print rmname, rmport
			if rmport == txport:
				ll = string.split(rxval[0])
				# print ll
				if ll[0] == rmname and machines.has_key(rmname): 
					x = machines[rmname]
					x.load = string.atof(ll[1])
					x.mem = string.atof(ll[2])
					x.loops = 0
		except socket.error:
			fl=1

	# send status report to local process connecting on stsock
	fl=0
	while fl==0:
	    try:
		stconn = stsock.accept()
		newsock = stconn[0]		# new socket
		newaddr = stconn[1]		# remote address
		rmname = socket.gethostbyaddr(newaddr[0])[0]
		rmport = newaddr[1]
		# print "status request from addr= ", rmname, ", port ", str(rmport)
		if rmname == localname or rmname == "localhost":
			report=""
			for i in machines.keys():
				if machines[i].mem > 0:
					report= string.join( (report, machines[i].hostline(), "\012"))
			newsock.send(report)
		newsock.close()
	    except socket.error:
		fl=1

	# send name of best machine to local process on dsock
	fl=0
	while fl==0:
	    try:
		stconn = dsock.accept()
		newsock = stconn[0]		# new socket
		newaddr = stconn[1]		# remote address
		rmname = socket.gethostbyaddr(newaddr[0])[0]
		rmport = newaddr[1]
		# print "status request from addr= ", rmname, ", port ", str(rmport)
		if rmname == localname or rmname == "localhost":
			y=machines[localname]
			y.doeval()
			for i in machines.keys():
				x=machines[i]
				if x.mem > 0:
					x.doeval()
					if x.eval > y.eval:
						y=x
			report=string.join((y.hostline(), "\012"))
			newsock.send(report)
			y.extraload = y.extraload+1
		newsock.close()
	    except socket.error:
		fl=1

