#!/usr/bin/python
#
# rpmwatcher -- fetch RPM updates over the net
#
# By Eric S. Raymond <esr@thyrsus.com>
#

import sys, time, os, string, socket, getopt, urllib, re, smtplib

action_log = []

def log_action(str):
    # Save status messages, also emit them to stderr if verbose is on
    action_log.append(str)
    if verbose:
        sys.stderr.write(str)

def fatal(str):
    # Record a fatal error
    action_log.append("Fatal error: " + str)
    sys.stderr.write(str)

def parse_rpm(rpm):
    # Parse the given RPM into a (rpm, stem, version-build) tuple.
    # The version and build parts are canonicalized.  Purpose of the
    # canonicalization is to make it possible to do comparisons on
    # version/build with a simple lexicographic comparison (see rpm_compare).  
    match = rpm_pattern.search(os.path.basename(rpm))
    if not match:
        fatal("Ill-formed RPM name " + rpm + "\n")
        sys.exit(1)
    # Canonicalize version part.  First, supply 0 for minor version/patchlevel
    # if either is missing.  Then pad resulting fields to length 6 so changes
    # in the length of leading numeric parts get correctly interpreted.
    # This code can be fooled by version or build fields like "18p7" or "0a16",
    # if the number of digits in the suffix goes up but the first digit decreases.
    version_list = string.split(match.group(2), '.')
    if (len(version_list) < 3):
        version_list.append("0")
    if (len(version_list) < 3):
        version_list.append("0")
    version_list = map(lambda x: string.zfill(x, 6), version_list)
    # Build level gets padded to length 6
    return (rpm,
            match.group(1),
            string.join(version_list, '.') + "-" + string.zfill(match.group(3), 6)
            )

def compare_rpm(rpm1, rpm2):
    # Return a status token comparing RPM tuples
    if rpm1[1] != rpm2[1]:
        return None		# Not comparable; different packages
    elif rpm1[2] == rpm2[2]:
        return 'same'		# Version/build identical
    elif rpm1[2] > rpm2[2]:
        return 'newer'
    else:
        return 'older'

def remove_duplicates(urls, name):
    # Pare down the  given parsed-URL list to newest versions
    deletions = {}
    if verbose:
        log_action("Pruning " + name + " RPMs...\n")
    for i in range(0, len(urls)):
        for j in range(0, len(urls)):
            status = compare_rpm(urls[i], urls[j])
            if status == 'same' and i < j:
                if verbose:
                    log_action("Duplicate "+ urls[i][0] + " removed\n")
                deletions[i] = 1
            elif status == 'newer':
                if verbose:
                    log_action("Older "+urls[j][0]+" removed ("+urls[i][0]+")\n")
                deletions[j] = 1
    # Deletions have to be done last-to-first to avoid perturbing indices
    for i in range(0, len(urls)):
        j = len(urls) - 1 - i
        if deletions.has_key(j):
            del urls[j]
    if debug:
        log_action("Collected " + name + " RPMs after pruning:\n")
        for url in updates:
            log_action("    " + url[0] + "\n")


if __name__ == '__main__':

    rcfile = os.environ["HOME"] + "/.rpmwatcher"
    localdir = os.environ["HOME"] + "/RPMS"
    architecture = "i[3456]86"

    # Process arguments
    (options, arguments) = getopt.getopt(sys.argv[1:], "vdf:cm:nl")
    fetchnew = mailto = copy = debug = verbose = None;
    for (switch, val) in options:
        if switch == '-l':
            long = 1
        elif switch == '-v':
            long = verbose = 1
        elif switch == '-d':
            debug = verbose = long = 1
        elif switch == '-f':
            rcfile = val
        elif switch == '-c':
            copy = 1
        elif switch == '-m':
            mailto = val
        elif switch == '-n':
            fetchnew = 1

    # Read the run control file
    watchlist = []
    ignores = {}
    try:
        fp = open(rcfile)
    except:
        fatal("Couldn't open run control file " + rcfile + ".\n")
        sys.exit(1)
    else:
        while 1:
            line = fp.readline()
            if not line:
                break
            tokens = string.split(string.strip(line))
            if not tokens:
                continue
            if tokens[0] == 'watch':
                watchlist.append(tokens[1])
                lastwatch = tokens[1]
                ignores[lastwatch] = []
            elif tokens[0] == 'local':
                localdir = tokens[1]
            elif tokens[0] == 'architecture':
                architecture = tokens[1]
            elif tokens[0] == 'ignore':
                if not lastwatch:
                    fatal("The `ignore' directive is illegal before the first `watch'")
                    sys.exit(1)
                else:
                    ignores[lastwatch].append(tokens[1])
        fp.close()
    if not os.path.isdir(localdir):
        fatal("Local directory " + localdir + " does not exist!\n")
        sys.exit(1)
    if debug:
        log_action("Watchlist: \n")
        for url in watchlist:
            log_action("    " + url + "\n")
        log_action("Local directory: " + localdir + "\n")
        log_action("Architecture: " + architecture + "\n")

    # Get the list of installed RPMs into the variable `installed'
    tmpfile = "/usr/tmp/rpmwatcher" + `os.getpid()`
    cmd = "rpm -qa >" + tmpfile
    try:
        s = os.system(cmd)
	if s != 0:
            fatal("`" + cmd + "' run failure, status " + `s` + "\n")
            raise SystemExit
    except:
        log_action("Unknown error while running rpm -qa\n")
        os.remove(tmpfile)
        sys.exit(1)

    try:
        fp = open(tmpfile)
    except:
        log_action("Can't reopen " + tmpfile + "\n")
        exit(1)
    else:
        installed = []
        while 1:
            line = fp.readline()
            if not line:
                break
            else:
                installed.append(string.strip(line))
    fp.close()
    try:
        os.remove(tmpfile)
    except:
        pass
#    if debug:
#        log_action("Installed RPMs:\n" + `installed` + "\n")

    # Now list fetched RPMs from local directory into the variable `fetched'
    fetched = filter(lambda x: os.path.isfile(os.path.join(localdir, x)), os.listdir(localdir))

    # If this line fails, you're using Python 1.4 or older
    trailer = r"((\." + architecture + r"|noarch)\.rpm)"

    # Fetch watchlist pages and assemble an RPM list from them.
    updates = []
    http_pattern = re.compile(r"\W(ftp://[A-Za-z0-9_./-]*" + trailer + ")\W")
    ftp_pattern = re.compile(r"\W([A-Za-z0-9_.-]*" + trailer + ")\W")
    
    for url in watchlist:
        if url[0:5] == "http:":
            prog = http_pattern
        elif url[0:4] == "ftp:":
            prog = ftp_pattern
        else:
            log_action("Page type of "+url+" is unknown, skipping.\n")
            continue
        if debug:
            log_action("Retrieving " + url + ":\n")
        try:
            (filename, headers) = urllib.urlretrieve(url)
        except:
            log_action("Couldn't read " + url + " (network or server error)\n")
            continue
        if debug:
            log_action("Filename " + filename + ":\n")
            log_action("Headers:\n" + string.join(headers.headers) + "\n")
        try:
            fp = open(filename)
        except:
            log_action("Can't open local copy " + filename + "\n")
            sys.exit(1)
        else:
            if debug:
                log_action("Candidate lines:" + "\n")
            while 1:
                line = fp.readline()
                if not line:
                    break
                if string.find(line, ".rpm") > -1:
                    match = prog.search(line)
                    if debug:
                        log_action(line + "\n")
                        if match:
                            log_action("*** Matches: " + `match.group(1)` + "\n")
                    if match:
                        for regexp in ignores[url]:
                            if re.search(regexp, match.group(1)):
                                if debug:
                                    log_action("*** Match rejected by `ignore "+ regexp + "'\n")
                                match = None
                                break
                    if match and not match in updates:
                        if prog == http_pattern:
                            updates.append(match.group(1))
                        elif prog == ftp_pattern:
                            updates.append(os.path.join(url, match.group(1)))
            fp.close()
    if debug:
        log_action("Collected updates:\n")
        for url in updates:
            log_action("    " + url + "\n")

    # Digest these lists into tuple forms for comparison
    rpm_pattern = re.compile("([\w+.-]*)-([^-]+)-([^.]*[0-9])" + trailer + "?")
    if debug:
        log_action("Template: " + rpm_pattern.pattern + "\n")
        log_action("Parsing installed RPM names...\n")
    installed = map(parse_rpm, installed)
    if debug:
#        log_action(`installed` + "\n")
        log_action("Parsing fetched RPM names...\n")
    fetched   = map(parse_rpm, fetched)
    if debug:
#        log_action(`fetched` + "\n")
        log_action("Parsing update RPM names...\n")
    updates   = map(parse_rpm, updates)
    if debug:
#        log_action(`updates` + "\n")
        log_action("Name parsing complete.\n")

    remove_duplicates(installed, "installed")  # generally only the kernel
    remove_duplicates(updates, "updates")
    remove_duplicates(fetched, "fetched")

    # Now compare the updates to the installed and fetched lists
    against_installed = {}
    against_fetched = {}
    for url in updates:
        for local in installed:
            status = compare_rpm(url, local)
            if status:
                against_installed[url] = (status, local[0])
                break
        if not against_installed.has_key(url):
            against_installed[url] = (None, None)
        for local in fetched:
            status = compare_rpm(url, local)
            if status:
                against_fetched[url] = (status, local[0])
        if not against_fetched.has_key(url):
            against_fetched[url] = (None, None)

    # We can deduce the proper action from these two statuses:
    needed = []
    for url in updates:
        head =  url[0] + ": \n   "
        installed_status = against_installed[url][0]
        fetched_status = against_fetched[url][0]
        installed_version = against_installed[url][1]
        fetched_version = against_fetched[url][1]

        if installed_status == None and fetched_status == None:
            if fetchnew:
                if long:
                    log_action(head + "UPDATE, no relevant RPM either installed or fetched.\n")
                needed.append(url)
            else:
                if long:
                    log_action(head + "no relevant RPM either installed or fetched.\n")
        elif installed_status == None and fetched_status == 'newer':
            if long:
                log_action(head + "UPDATE, newer than " + fetched_version
                       + " which is fetched but not installed.\n")
            needed.append(url)
        elif installed_status == None and fetched_status == 'same':
            if long:

                log_action(head + "this version has been fetched but not installed.\n")
        elif installed_status == None and fetched_status == 'older':
            if long:
                log_action(head + "newer version " + fetched_version
                       + " is fetched but not installed.\n")

        elif installed_status == 'newer' and fetched_status == None:
            if long:
                log_action(head + "UPDATE, newer than installed version " + installed_version
                       + " (no relevant fetched versions).\n")
            needed.append(url)
        elif installed_status == 'newer' and fetched_status == 'newer':
            log_action(head +
                       "UPDATE, newer than both installed version " + installed_version +
                       " and fetched version " + fetched_version + "\n")
            needed.append(url)
        elif installed_status == 'newer':	# handles two cases
            if long:
                log_action(head + "supersedes installed but not fetched version.\n")

        elif installed_status == 'same' and fetched_status == None:
            if long and verbose:
                log_action(head + "already installed (no relevant fetched versions).\n")
        elif installed_status == 'same' and fetched_status == 'same':
            if long and verbose:
                log_action(head+"already installed (fetched version same).\n")
        elif installed_status == 'same':	# handles two cases
            if long:
                log_action(head +
                       "already installed (fetched version " + fetched_version
                       + " is " + fetched_status +").\n")

        elif installed_status == 'older' and fetched_status == None:
            if long:
                log_action(head + "older than installed version " + installed_version +
            " (no relevant fetched versions).\n")
        elif installed_status == 'older':	# handles three cases
            if long:
                log_action(head +
                       "newer version " + installed_version +
                       "already installed (fetched version " + fetched_version +
                       " is " + fetched_status +").\n")
        else:
            log_action(head + "internal error!\n")

    # OK, now we have the need list.
    if mailto:
        if needed:
            log_action("\nRecommended updates:\n")
        else:
            log_action("\nNo recommended updates.\n")
    for update in needed:
        if copy:
            if verbose:
                log_action("Downloading " + update[0] + "...\n")
            try:
                (filename, headers) = urllib.urlretrieve(update[0])
                os.system("cp " + filename + " " + localdir + "/" + os.path.basename(update[0]))
            except:
                log_action("Download of " + os.path.basename(update[0]) + " failed.\n")
        elif mailto:
            log_action(update[0] + "\n")
        else:
            print update[0]
    if verbose:
        log_action("Done.\n")

    # Maybe we need to mail the action log now
    if mailto:
        msg = string.join(action_log, '')
        msg = "Subject: rpmwatcher report on new RPMs\n\n" + msg
        if needed and not copy:
            msg = msg + \
        	"\nYou can download these updates " \
        	"by running rpmwatcher from the command line.\n" 
        msg = msg + "--\n\t\t\t\tThe RPM Watcher\n"
        try:
            server = smtplib.SMTP("localhost")
        except socket.error, details:
            fatal(sys.argv[0] + ": connect to localhost failed.\n")
            sys.exit(1)            
        hostname = socket.gethostbyaddr(socket.gethostname())[0]
        me = os.environ['USER'] + "@" + hostname
        server.sendmail(me, [mailto], msg)
        server.quit()

# The following sets edit modes for GNU EMACS
# Local Variables:
# mode:python
# End:
