#!/usr/bin/env python


#     httphaps captures HTTP packets, and generates stats about them.
#     Copyright (C) 2003  Henrik Hansen, Jakob Frlich
#
#     This program is free software; you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation; either version 2 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program; if not, write to the Free Software
#     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


import pcap, socket, struct, sys, time, getopt
from threading import *


# ip : {bytes_from, syn_to, req_to, resp_from, content_from}
#
# bytes_from: bytes sent from ip
# syn_to: SYN requests sent to ip (packets where SYN bit is set)
# req_from: num of different HTTP requests sent from ip (GET, HEAD, etc.)
# resp_from: num of different HTTP responses sent from ip (404, 200, etc.)
# content_from: num of different content types sent from ip (text/html, image/png, etc.)
ip_stats = {}

# num of different server types sent to all IPs 
server_stats = {}


def process_stat(filename):
    # FIXME: We should probably not rewrite the whole file every time
    lines = []
    if filename is None or filename == '-':
        outfile = sys.stdout
    else:
        try:
            outfile = open(filename, 'w')
        except IOError:
            print "Error opening %s for writing!" % filename
            sys.exit(1)

    s = "<?xml version='1.0'?>\n"
    lines.append(s)
    s = "<httphaps>\n"
    lines.append(s)
    s = "\t<ip_stats>\n"
    lines.append(s)
    
    for ip, stats in ip_stats.items():
        s = "\t\t<ip>\n"
        lines.append(s)
        s = "\t\t\t%s\n" % ip
        lines.append(s)
        
        try:
            ip_stats[ip]['hostname']
        except KeyError:
            try:
                hostname = socket.gethostbyaddr(ip)[0]
            except socket.herror:
                hostname = 'unknown'
            ip_stats[ip]['hostname'] = hostname

        if ip_stats[ip]['hostname'] != "unknown":
            s = "\t\t\t<hostname>%s</hostname>\n" % ip_stats[ip]['hostname']
            lines.append(s)
            
        try:
            s = "\t\t\t<bytes_from>%s</bytes_from>\n" % ip_stats[ip]['bytes_from']
            lines.append(s)
        except KeyError:
            pass
        try:
            s = "\t\t\t<syn_to>%s</syn_to>\n" % ip_stats[ip]['syn_to']
            lines.append(s)
        except KeyError:
            pass

        try:
            ip_stats[ip]['req_from']
            s = "\t\t\t<req_from>\n"
            lines.append(s)
                
            for method, num in ip_stats[ip]['req_from'].items():
                s = "\t\t\t\t<method>\n"
                lines.append(s)
                s = "\t\t\t\t\t%s\n" % method
                lines.append(s)
                s = "\t\t\t\t\t<num_req>%s</num_req>\n" % num
                lines.append(s)
                s = "\t\t\t\t</method>\n"
                lines.append(s)
                    
            s = "\t\t\t</req_from>\n"
            lines.append(s)
        except KeyError:
            pass

        try:
            ip_stats[ip]['resp_from']
            s = "\t\t\t<resp_from>\n"
            lines.append(s)

            for resp_id, num in ip_stats[ip]['resp_from'].items():
                s = "\t\t\t\t<status_code>\n"
                lines.append(s)
                s = "\t\t\t\t\t%s\n" % resp_id
                lines.append(s)
                s = "\t\t\t\t\t<num_resp>%s</num_resp>\n" % num
                lines.append(s)
                s = "\t\t\t\t</status_code>\n"
                lines.append(s)          
                
            s = "\t\t\t</resp_from>\n"
            lines.append(s)
        except KeyError:
            pass

        try:
            ip_stats[ip]['content_from']
            s = "\t\t\t<content_from>\n"
            lines.append(s)
            
            for content, num in ip_stats[ip]['content_from'].items():
                s = "\t\t\t\t<content_type>\n"
                lines.append(s)
                s = "\t\t\t\t\t%s\n" % content
                lines.append(s)
                s = "\t\t\t\t\t<num_content>%s</num_content>\n" % num
                lines.append(s)
                s = "\t\t\t\t</content_type>\n"
                lines.append(s)

            s = "\t\t\t</content_from>\n"
            lines.append(s)
        except KeyError:
            pass
        s = "\t\t</ip>\n"
        lines.append(s)

    s = "\t</ip_stats>\n"
    lines.append(s)

    s = "\t<server_stats>\n"
    lines.append(s)

    for serv, num in server_stats.items():
        s = "\t\t<server>\n"
        lines.append(s)
        s = "\t\t\t%s\n" % serv
        lines.append(s)
        s = "\t\t\t<num_server_resp>%s</num_server_resp>\n" % num
        lines.append(s)
        s = "\t\t</server>\n"
        lines.append(s)
            
    s = "\t</server_stats>\n"
    lines.append(s)
    s = "</httphaps>\n"
    lines.append(s)
    
    outfile.writelines(lines)
    outfile.close()
    

def decode_ip_packet(s):
    d = {}
    d['total_len'] = socket.ntohs(struct.unpack('H',s[2:4])[0])
    d['destination_addr'] = pcap.ntoa(struct.unpack('i',s[16:20])[0])
    d['source_addr'] = pcap.ntoa(struct.unpack('i',s[12:16])[0])
    d['header_len'] = ord(s[0]) & 0x0f
    return d


def decode_tcp_packet(s):
    d = {}
    d['syn'] = ord(s[13]) & 0x02
    d['header_len'] = (ord(s[12]) >> 4) & 0x0f
    return d


def decode_http_packet(s):
    header = []
    # leading empty lines are ignored
    for ch, i in zip(s, range(len(s))):
        if ch != '\n':
            s = s[i:]
            break
    # lines = s.lstrip('\n').splitlines()
    lines = s.splitlines()
    
    try:
        start_line = lines[0]

        valid_req = ["OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT"]
        w = start_line.split()[0]
        if w in valid_req:
            return ("req", start_line)
        
        if w.startswith("HTTP"):
            for l in lines[1:]:
                # Header is finished, when an empty line is encountered
                if not l:
                    return ("resp", start_line, header)
                header.append(l)
    except IndexError:
        return


# Process the raw ethernet packet
def process_packet(pktlen, data, timestamp):
    if not data:
        return

    decoded_ip = decode_ip_packet(data[14:])

    # header_len is in 32 bit words
    start_tcp = 14 + (decoded_ip['header_len'] * 4)
    decoded_tcp = decode_tcp_packet(data[start_tcp:])
    start_http = start_tcp + (decoded_tcp['header_len'] * 4)
    decoded_http = decode_http_packet(data[start_http:])
    
    b = ip_stats.setdefault(decoded_ip['source_addr'], {}).setdefault('bytes_from', 0)
    b += decoded_ip['total_len']
    ip_stats[decoded_ip['source_addr']]['bytes_from'] = b
    
    if decoded_tcp['syn']:
        req = ip_stats.setdefault(decoded_ip['destination_addr'], {}).setdefault('syn_to', 0)
        req += 1
        ip_stats[decoded_ip['destination_addr']]['syn_to'] = req

    if decoded_http:
        words_start = decoded_http[1].split()

        if decoded_http[0] == "req":
            req_from = ip_stats[decoded_ip['source_addr']].setdefault('req_from', {})
            http_req = req_from.setdefault(words_start[0], 0)
            http_req += 1
            req_from[words_start[0]] = http_req
            
        elif decoded_http[0] == "resp":
            resp_from = ip_stats[decoded_ip['source_addr']].setdefault('resp_from', {})
            http_resp = resp_from.setdefault(words_start[1], 0)
            http_resp += 1
            resp_from[words_start[1]] = http_resp
            
            for line in decoded_http[2]:
                if line.startswith("Content-Type"):
                    content_from = ip_stats[decoded_ip['source_addr']].setdefault('content_from', {})
                    l = line.split()
                    ct = content_from.setdefault(l[1], 0)
                    ct += 1
                    content_from[l[1]] = ct
                elif line.startswith("Server"):
                    l = line.split()
                    numserv = server_stats.setdefault(l[1], 0)
                    numserv += 1
                    server_stats[l[1]] = numserv


def usage():
    s = """Usage: httphaps [options] file

    -i, --interval    interval in seconds.
    -s, --snaplength  snaplength in bytes.
    -t, --timeout     read timeout in milliseconds.
    -d, --device      device.
    -e, --exit        exit after <sec> seconds.
    -c, --count       exit after <count> packets.  Overrides -e.
    -r, --read 	      read packets from <file>.  Overrides -d.  Stdin is used if <file> is '-'.
    --help            print this message to stdout and exit.
    --version         print version information and exit.

Please report bugs to hsh@freecode.dk."""
    print s


def version():
    s = """httphaps 0.1
Copyright (C) 2003 Henrik S. Hansen and Jakob Frlich 
httphaps comes with NO WARRANTY,
to the extent permitted by law.
You may redistribute copies of httphaps
under the terms of the GNU General Public License.
For more information about these matters,
see the file named COPYING."""
    print s


if __name__ == "__main__":
    shortopts = "i:s:t:d:e:c:r:"
    longopts = ["interval=", "snaplength=", "timeout=", "interface=", "exit=", "count=", "read=", "help", "version"]

    try:
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)
    except getopt.GetoptError, o:
        print "%s." % str(o).capitalize()
        usage()
        sys.exit(2)

    for o, a in opts:
        if o == "--version":
            version()
            sys.exit(0)
        elif o == "--help":
            usage()
            sys.exit(0)

    if len(args) != 1:
        print "httphaps takes exactly 1 argument."
        usage()
        sys.exit(2)
    
    interval = 360
    snaplength = 1024
    timeout = 100

    try:
        device = pcap.lookupdev()
    except:
        print "Device not found.  Are you root?"
        sys.exit(1)
        
    exit = None
    count = None
    file = None
    
    try:
        for o, a in opts:
            if o in ("-i", "--interval"):
                interval = int(a)
            elif o in ("-s", "--snaplength"):
                snaplength = int(a)
            elif o in ("-t", "--timeout"):
                timeout = int(a)
            elif o in ("-d", "--interface"):
                device = a
            elif o in ("-e", "--exit"):
                exit = int(a)
            elif o in ("-c", "--count"):
                count = int(a)
            elif o in ("-r", "--read"):
                file = a
    except:
        print "Option %s takes a numeric argument." % o
        usage()
        sys.exit(2)
    
    p = pcap.pcapObject()

    if file:
        if file == '-':
            file = sys.stdin
        try:
            p.open_offline(file)
        except:
            print "Error opening %s for reading." % file
            sys.exit(1)
    else:
        try:
            net, mask = pcap.lookupnet(device)
            p.open_live(device, snaplength, 0, timeout)
        except:
            print "No such device."
            sys.exit(1)
    
    p.setfilter("tcp port 80", 0, 0)
    start_time = time.time()

    if count:
        try:
            timecount = time.time()
            
            while count > 0:
                count -= 1
                # p.next() returns a (pktlen, data, timestamp) tuple
                apply(process_packet, p.next())
                if (time.time() - timecount) >= interval:
                    process_stat(args[0])
                    timecount = time.time()
        except KeyboardInterrupt:
            print 'Keyboard interrupt!'

    elif exit:
        try:
            timecount = time.time()
            
            while (time.time() - start_time) < exit:
                apply(process_packet, p.next())
                if (time.time() - timecount) >= interval:
                    process_stat(args[0])
                    timecount = time.time()
        except KeyboardInterrupt:
            print 'Keyboard interrupt!'

    else:
        try:
            timecount = time.time()
            
            while 1:
                apply(process_packet, p.next())
                if (time.time() - timecount) >= interval:
                    process_stat(args[0])
                    timecount = time.time()
        except KeyboardInterrupt:
            print 'Keyboard interrupt!'
            
    print '%d packets received, %d packets dropped, %d packets dropped by interface.' % p.stats()
    print 'Time spent: %s seconds.' % int(time.time() - start_time)
    print 'Shutting down...'
    process_stat(args[0])
