# fishpond.com.au video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import convertAuthor
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    format              = "DVD"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    # Find marketplace pricing
    marketinfo = source


    # Main extraction
    i = string.find(source, "> - did not match any products.")

    if i != -1:
        return

    i = string.find(source, ">Products meeting the search criteria<")

    if i != -1:
        source = searchForPlus(source, ">Products meeting the search criteria<")
        source = searchForPlus(source, "class=\"productListing-data\"")
        source = searchForPlus(source, "href=\"")
        i = string.find(source, "\"")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)
        t2 = open("trace2.html", "w")
        t2.write(source)
        t2.close()

    source = searchForPlus(source, "<!-- body_text //-->")


    # Find Title
    source = searchForPlus(source, "class=\"pageHeading\"")
    source = searchForPlus(source, ">")

    if source.startswith("<h1") == 1:
        source = searchForPlus(source, ">")

    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Price
    i = string.find(source, "Our Price:")

    if i != -1:
        source = source[i:]
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        if source[i:].startswith("</s>") == 1:
            listprice = value
            source = searchForPlus(source, "$")
            source = searchFor(source, "$")
            i = string.find(source, "<")
            value = stripText(source[0:i])


    # Find Image
    tag = "<a href=\"http://image."
    i = string.find(source, tag)

    if i == -1:
        tag = "<img src=\"http://image."
        i = string.find(source, tag)

    if i != -1:
        source = searchFor(source, tag)
        source = searchForPlus(source, "\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "dvd-no-image.gif")

        if i != -1:
            image = ""


    # Find Format
    i = string.find(source, ">Format:")

    if i != -1:
        format = searchForPlus(source, ">Format:")
        format = searchForPlus(format, ">")
        i = string.find(format, "<")
        format = stripText(format[0:i])

        i = string.find(format, "(")

        if i != -1:
            j = string.find(format, ")")
            mpaa = stripText(format[i+1:j])

            format = stripText(format[0:i]) + stripText(format[j+1:])

            i = string.find(format, ", ")

            if i != -1:
                category = stripText(format[i+1:])
                format = stripText(format[0:i])


    # Find Studio
    i = string.find(source, ">Publisher:<")

    if i != -1:
        studio = searchForPlus(source, ">Publisher:<")
        studio = searchForPlus(studio, ">")
        i = string.find(studio, "<")
        studio = stripText(studio[0:i])


    # Find Release date
    i = string.find(studio, ", ")

    if i != -1:
        date = stripText(studio[i+1:])
        studio = stripText(studio[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Copyright date
    i = string.find(source, ">Released Year:")

    if i != -1:
        copyDate = searchForPlus(source, ">Released Year:")
        copyDate = searchForPlus(copyDate, ">")
        i = string.find(copyDate, "<")
        copyDate = stripText(copyDate[0:i])


    # Find Runtime
    i = string.find(source, ">RunTime:")

    if i != -1:
        running = searchForPlus(source, ">RunTime:")
        running = searchForPlus(running, "<td")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find UPC
    i = string.find(source, ">UPC:<")

    if i != -1:
        upc = searchForPlus(source, ">UPC:<")
        upc = searchForPlus(upc, ">")
        i = string.find(upc, "<")
        upc = stripText(upc[0:i])


    # Find Comments
    tag = "<br>\n<p>"
    i = string.find(source, tag)

    if i == -1:
        tag = "<br><p>"
        i = string.find(source, tag)

    print "I=",i

    if i != -1 and i < 3000:
        comments = searchForPlus(source, tag)
        i = string.find(comments, "</td>")
        comments = stripText(comments[0:i])

        i = string.find(comments, "<object ")

        if i != -1:
            comments = stripText(comments[0:i])

        i = string.find(comments, "<B>Actors</B>")

        if i != -1:
            comments = stripText(comments[0:i])

        i = string.find(comments, "<table")

        if i != -1:
            comments = stripText(comments[0:i])

        while (searchFor(comments, "\n") != None):
            i = string.find(comments, "\n")
            comments = comments[0:i] + " " + comments[i+1:]

        comments = string.replace(comments, "<br />", "\n")
        comments = string.replace(comments, "<br>", "\n")
        comments = string.replace(comments, "<p>", "\n")
        comments = string.replace(comments, "<P>", "\n")
        comments = string.replace(comments, "</p>", "\n")
        comments = string.replace(comments, "</P>", "\n")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "</b>", "")

        while (searchFor(comments, "\n ") != None):
            i = string.find(comments, "\n ")
            comments = comments[0:i+1] + comments[i+2:]


    # Find Actors
    actorList = []
    i = string.find(source, "Cast:<")

    if i != -1:
        actors = searchForPlus(source, "Cast:<")
        actors = searchForPlus(actors, "<td")
        actors = searchForPlus(actors, ">")
        i = string.find(actors, "</td>")
        actors = stripText(actors[0:i]) + ", "

        while (searchFor(actors, ", ") != None):
            i = string.find(actors, ", ")
            actor = stripText(actors[0:i])

            i = string.find(actor, "<")

            if i != -1:
                actor = stripText(actor[0:i])

            actor = convertAuthor(actor)

            actorList.append(actor)
            actors = searchForPlus(actors, ", ")

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    if len(actorList) == 0:
        i = string.find(source, "Actor(s):")

        if i != -1:
            actors = searchForPlus(source, "Actor(s):")
            i = string.find(actors, "</div>")
            actors = stripText(actors[0:i])

            while (searchFor(actors, "&nbsp;-&nbsp;") != None):
                actors = searchForPlus(actors, "&nbsp;-&nbsp;")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])
                actor = stripHTML(actor)

                i = string.rfind(actor, " ")

                if i != -1:
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Director:<")

    if i != -1:
        director = searchForPlus(source, ">Director:<")
        director = searchForPlus(director, "<td")
        director = searchForPlus(director, ">")
        i = string.find(director, "<")
        director = stripText(director[0:i])
        director = convertAuthor(director)


    # Find Screenwriter
    i = string.find(source, ">Writer:<")

    if i != -1:
        screenwriter = searchForPlus(source, ">Writer:<")
        screenwriter = searchForPlus(screenwriter, "<td")
        screenwriter = searchForPlus(screenwriter, ">")
        i = string.find(screenwriter, "<")
        screenwriter = stripText(screenwriter[0:i])
        screenwriter = convertAuthor(screenwriter)


    # Find Composer
    i = string.find(source, ">Composer:<")

    if i != -1:
        composer = searchForPlus(source, ">Composer:<")
        composer = searchForPlus(composer, "<td")
        composer = searchForPlus(composer, ">")
        i = string.find(composer, "<")
        composer = stripText(composer[0:i])
        composer = convertAuthor(composer)


    # Find Sound
    i = string.find(source, ">Audio:<")

    if i != -1:
        sound = searchForPlus(source, ">Audio:<")
        sound = searchForPlus(sound, "<td")
        sound = searchForPlus(sound, ">")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])


    # Find Widescreen
    i = string.find(source, ">Widescreen: <")

    if i != -1:
        wide = searchForPlus(source, ">Widescreen: <")
        wide = searchForPlus(wide, "<td")
        wide = searchForPlus(wide, ">")
        i = string.find(wide, "<")
        wide = stripText(wide[0:i])

        if wide == "Yes":
            wide = "Y"
        else:
            wide = "N"




try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
