# Amazon video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">LIST:<")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">LIST:<")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">All-Time Sales Rank:<")

    if (i != -1):
        salesrank = searchForPlus(marketinfo, ">All-Time Sales Rank:<")
        salesrank = searchForPlus(salesrank, ">")
        i = string.find(salesrank, "<")
        salesrank = stripText(salesrank[0:i])


    # Main extraction
    # Find Title
    i = string.find(source, "<td class=\"fontxlarge\"")

    if i == -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return

    source = searchForPlus(source, "<td class=\"fontxlarge\"")
    source = searchForPlus(source, "<strong>")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Format
    i = string.find(source, "<nobr>")

    if i != -1:
        format = searchForPlus(source, "<nobr>")
        format = searchForPlus(format, "'>")
        i = string.find(format, "<")
        format = stripText(format[0:i])


    # Find Image
    i = string.find(source, "/movies")

    if (i != -1):
        source = source[i-50:]
        source = searchForPlus(source, " src='")
        i = string.find(source, "'")
        image = stripText(source[0:i])

        i = string.find(image, "dvd-no-image.gif")

        if i != -1:
            image = ""


    # Find Price
    i = string.find(source, ">$")

    if (i != -1):
        source = source[i+1:]
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, ">Synopsis<")

    if (i != -1):
        source = searchForPlus(source, ">Synopsis<")
        source = searchForPlus(source, "class=")
        source = searchForPlus(source, ">")
        i = string.find(source, "</td>")
        tempcomment = stripText(source[0:i])

        tempcomment = string.replace(tempcomment, "\n", "")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "i</font>", " ")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<Br>", "\n")
        tempcomment = string.replace(tempcomment, "<bR>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")

        while (searchFor(tempcomment, "<font ") != None):
            i = string.find(tempcomment, "<font ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        comments = tempcomment


    # Find Actors
    actorTag = ">Actors:"
    i = string.find(source, actorTag)

    if i != -1:
        source = searchForPlus(source, actorTag)
        i = string.find(source, "</td>")
        actors = stripText(source[0:i])

        actorList = []

        while (searchFor(actors, "\">") != None):
            actors = searchForPlus(actors, "\">")
            i = string.find(actors, "</a>")
            actor = stripText(actors[0:i])
            actor = string.replace(actor, "<b>", "")
            actor = string.replace(actor, "</b>", "")

            i = string.rfind(actor, " ")

            if (i != -1):
                actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

            actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Directors
    i = string.find(source, ">Directors:")

    if i != -1:
        director = ""
        directors = searchForPlus(source, ">Directors:")
        i = string.find(directors, "</td>")
        directors = stripText(directors[0:i])

        i = string.find(directors, "<br /><br />")

        if i != -1:
            directors = stripText(directors[0:i])

        while (searchFor(directors, "\">") != None):
            directors = searchForPlus(directors, "\">")
            i = string.find(directors, "</a>")
            temp = stripText(directors[0:i])
            temp = string.replace(temp, "<b>", "")
            temp = string.replace(temp, "</b>", "")

            i = string.rfind(temp, " ")

            if (i != -1):
                temp = stripText(temp[i:]) + ", " + stripText(temp[0:i])

            if director != "":
                director = director + " / " + temp
            else:
                director = temp


    # Find Writers
    i = string.find(source, ">Writers:")

    if i != -1:
        screenwriter = ""
        screenwriters = searchForPlus(source, ">Writers:")
        i = string.find(screenwriters, "</td>")
        screenwriters = stripText(screenwriters[0:i])

        i = string.find(screenwriters, "<br /><br />")

        if i != -1:
            screenwriters = stripText(screenwriters[0:i])


        while (searchFor(screenwriters, "\">") != None):
            screenwriters = searchForPlus(screenwriters, "\">")
            i = string.find(screenwriters, "</a>")
            temp = stripText(screenwriters[0:i])
            temp = string.replace(temp, "<b>", "")
            temp = string.replace(temp, "</b>", "")

            i = string.rfind(temp, " ")

            if (i != -1):
                temp = stripText(temp[i:]) + ", " + stripText(temp[0:i])

            if screenwriter != "":
                screenwriter = screenwriter + " / " + temp
            else:
                screenwriter = temp


    # Find Comments part 2
    i = string.find(source, ">Features:<")

    if (i != -1):
        source = searchForPlus(source, ">Features:<")
        source = searchForPlus(source, "class=")
        source = searchForPlus(source, ">")
        i = string.find(source, "</td>")
        tempcomment = stripText(source[0:i])

        tempcomment = string.replace(tempcomment, "\n", "")
        tempcomment = string.replace(tempcomment, "\011", " ")
        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "i</font>", " ")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<Br>", "\n")
        tempcomment = string.replace(tempcomment, "<bR>", "\n")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "<B>", "")
        tempcomment = string.replace(tempcomment, "</B>", "")
        tempcomment = string.replace(tempcomment, "<br />", "\n")

        while (searchFor(tempcomment, "<font ") != None):
            i = string.find(tempcomment, "<font ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        if comments != "":
            comments = comments + "\n\n" + tempcomment
        else:
            comments = tempcomment


    # Find Widescreen, color
    i = string.find(source, ">Video:")

    if i != -1:
        video = searchForPlus(source, ">Video:")
        video = searchForPlus(video, "<tr>")
        i = string.find(video, "</tr>")
        video = stripText(video[0:i])

        i = string.find(video, "Widescreen")

        if i != -1:
            wide = "Y"
        else:
            wide = "N"

        i = string.find(video, "Color")

        if i != -1:
            color = "Color"

        i = string.find(video, "B&W")

        if i != -1:
            color = "B&W"


    # Find Sound
    i = string.find(source, ">Audio:<")

    if i != -1:
        sound = searchForPlus(source, ">Audio:<")
        sound = searchForPlus(sound, ": ")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])
        i = string.find(sound, "[CC]")

        if i != -1:
            sound = stripText(sound[0:i])
            closedCap = "Y"


    # Find Release date
    i = string.find(source, ">Release Date:")

    if i != -1:
        date = searchForPlus(source, ">Release Date:")
        date = searchForPlus(date, ">")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Find UPC
    i = string.find(source, ">UPC Code:")

    if i != -1:
        upc = searchForPlus(source, ">UPC Code:")
        upc = searchForPlus(upc, ">")
        i = string.find(upc, "<")
        upc = stripText(upc[0:i])


    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        studio = searchForPlus(source, ">Studio:<")
        studio = searchForPlus(studio, "'>")
        i = string.find(studio, "<li>")
        studio = stripText(studio[0:i])
        i = string.find(studio, "<")

        if i != -1:
            studio = stripText(studio[0:i])


    # Find Copyright date
    i = string.find(source, ">Production Year:")

    if i != -1:
        copyDate = searchForPlus(source, ">Production Year:")
        copyDate = searchForPlus(copyDate, ">")
        i = string.find(copyDate, "<")
        copyDate = stripText(copyDate[0:i])


    # Find Running time
    i = string.find(source, ">Length:")

    if i != -1:
        running = searchForPlus(source, ">Length:")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find Rating
    i = string.find(source, ">Rating:")

    if i != -1:
        mpaa = searchForPlus(source, ">Rating:")
        mpaa = searchForPlus(mpaa, ">")
        i = string.find(mpaa, "<")
        mpaa = stripText(mpaa[0:i])


    # Find Category
    i = string.find(source, ">Category<")

    if i != -1:
        category = searchForPlus(source, ">Category<")
        category = searchForPlus(category, "\">")
        i = string.find(category, "<")
        category = stripText(category[0:i])


    # Find Series
    i = string.find(source, ">Series<")

    if i != -1:
        series = searchForPlus(source, ">Series<")
        series = searchForPlus(series, "\">")
        i = string.find(series, "<")
        series = stripText(series[0:i])



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
