# Amazon video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    format              = "DVD"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)
        t2 = open("trace2.html", "w")
        t2.write(source)
        t2.close()


    # If Amazon lists other editions, remove them.
    # complicates price extraction
    i = string.find(source, ">Other Versions")

    if i != -1:
        j = string.find(source[i:], "</div></div>")
        source = source[0:i+1] + source[i+j:]


    i = string.find(source, ": Video")

    if i != -1 and i < 1000:
        # Old VHS (video) format page
        amVideo()
        return


    # Find marketplace pricing
    marketinfo = source
    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find New/Used together
    i = string.rfind(marketinfo, "/offer-listing/")

    if i != -1:
        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br />")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used &amp; new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used &amp; new")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "used & new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used & new")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "available offers<")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchFor(usedinfo, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all used<")

        if (i != -1):
            usedprice = searchForPlus(usedinfo, ">See all used<")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all new<")

        if (i != -1):
            newprice = searchForPlus(usedinfo, ">See all new<")
            newprice = searchFor(newprice, ">$")
            newprice = searchForPlus(newprice, ">")
            i = string.find(newprice, "<")
            newprice = stripText(newprice[0:i])

        i = string.find(usedinfo, ">See all collectible<")

        if (i != -1):
            collectibleprice = searchForPlus(usedinfo, ">See all collectible<")
            collectibleprice = searchFor(collectibleprice, ">$")
            collectibleprice = searchForPlus(collectibleprice, ">")
            i = string.find(collectibleprice, "<")
            collectibleprice = stripText(collectibleprice[0:i])


    # Find New
    i = string.find(marketinfo, "offers/new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "offers/collectible")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/collectible")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectible<")

        if (i != -1):
            i = string.find(usedinfo, "collectible<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectible<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "offers/used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "used<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            usedprice = stripText(usedinfo[0:i])


    # Find Ranking
    i = string.find(marketinfo, ">Amazon.com Sales Rank")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.com Sales Rank")
        usedinfo = searchForPlus(usedinfo, "</b>")
        usedinfo = searchForPlus(usedinfo, "#")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])
        i = string.find(salesrank, " ")

        if i != -1:
            salesrank = stripText(salesrank[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])


    # Find Title
    tag = "class=\"asinTitle\""

    i = string.find(source, tag)

    if i == -1:
        tag = "class=\"sans\""

    source = searchForPlus(source, tag)
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])
    title = string.replace(title, "\t", " ")

    i = string.rfind(title, "[HD DVD]")

    if i != -1:
        format = "HD DVD"

    i = string.rfind(title, "(Combo HD DVD")

    if i != -1:
        format = "HD DVD Combo"

    i = string.rfind(title, "[Blu-ray]")

    if i != -1:
        format = "Blu-ray Disc"


    # Find Copyright date
    i = string.rfind(title, " (")

    if i != -1:
        copyDate = stripText(title[i+2:])
        i = string.find(copyDate, ")")
        copyDate = stripText(copyDate[0:i])

        if len(copyDate) != 4:
            copyDate = ""

        i = string.find(title, " (")
        title = stripText(title[0:i])

    i = string.rfind(title, " [")

    if i != -1:
        title = stripText(title[0:i])


    # Find user rating
    tag = "/>See all "
    i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        i = string.find(source, "/customer-reviews/ratings/stars")

        if i != -1:
            temp = searchFor(temp, "/customer-reviews/ratings/stars")
            i = string.find(temp, ".")
            temp = stripText(temp[0:i])
            temp = searchForPlus(temp, "stars-")
            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find Image
    if image == "":
        imageTag = "/images.amazon.com"
        i = string.find(source, imageTag)

        if i == -1 or i > 3000:
            imageTag = "images-amazon.com/images/P/"
            i = string.find(source, imageTag)

        if i != -1 and i < 50000:
            source = source[i-25:]
            source = searchForPlus(source, " src=\"")
            i = string.find(source, "\"")
            image = stripText(source[0:i])

            i = string.find(image, "dvd-no-image.gif")

            if i != -1:
                image = ""

            i = string.find(image, "no-img-lg.gif")

            if i != -1:
                image = ""

            if image != "":
                oldimage = image
                i = string.find(image, "._P")

                if i != -1:
                    j = string.find(image, "_SC")
                    image = image[0:i] + "." + image[j+3:]

                if oldimage != image:
                    image = string.replace(image, "_.", ".")


    # Find Price
    i = string.find(source, ">Price:<")

    if (i == -1):
        i = string.find(source, "class=price")

    if (i == -1):
        i = string.find(source, "class=\"price\"")

    if (i == -1):
        i = string.find(source, ">List Price:")

    if (i != -1):
        source = source[i:]

        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])
    else:
        value = usedprice


    # Find Format
    i = string.find(source, "dvd-gray-medium.")

    if i != -1:
        format = "DVD"

    i = string.find(source, "vhs-gray-medium.")

    if i != -1:
        format = "VHS"

    i = string.find(source, "vhs-medium.")

    if i != -1:
        format = "VHS"


    # Find category
    category = ""
    i = string.rfind(source, ">Genres:<")

    if i != -1:
        temp = stripText(source[i:])
        temp = searchForPlus(temp, ">Genres:<")
        i = string.find(temp, "</li>")
        temp = stripText(temp[0:i])

        i = string.find(temp, "</td>")

        if i != -1:
            temp = stripText(temp[0:i])

        while (searchFor(temp, "\">") != None):
            temp = searchForPlus(temp, "\">")
            i = string.find(temp, "<")

            if category == "":
                category = stripText(temp[0:i])
            else:
                category = category + " : " + stripText(temp[0:i])


    # Find Comments (synopsis)
    comments = ""
    i = string.find(source, ">Plot Synopsis:<")

    if i != -1:
        temp = searchForPlus(source, ">Plot Synopsis:<")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "</li>")
        temp = stripText(temp[0:i])

        i = string.find(temp, "</td>")

        if i != -1:
            temp = stripText(temp[0:i])

        temp = stripText(temp[0:i])
        temp = string.replace(temp, "&#233;", "e")
        temp = string.replace(temp, "&#34;", "\"")
        comments = "Plot Synopsis\n" + stripText(temp)
    else:
        i = string.find(source, ">Plot Outline:<")

        if i != -1:
            temp = searchForPlus(source, ">Plot Outline:<")
            temp = searchForPlus(temp, ">")
            i = string.find(temp, "</td>")
            temp = stripText(temp[0:i])
            temp = string.replace(temp, "&#233;", "e")
            temp = string.replace(temp, "&#34;", "\"")
            comments = "Plot Outline\n" + stripText(temp)


    # Find keywords
    keywords = ""
    i = string.find(source, ">Plot Keywords:<")

    if i != -1:
        temp = searchForPlus(source, ">Plot Keywords:<")
        i = string.find(temp, "</td>")
        temp = stripText(temp[0:i])
        i = string.find(temp, "</ul>")

        if i != -1:
            temp = stripText(temp[0:i])

        i = string.find(temp, "<span ")

        if i != -1:
            temp = stripText(temp[0:i])

        while (searchFor(temp, "\">") != None):
            temp = searchForPlus(temp, "\">")
            temp = string.replace(temp, "&#160;", " ")
            temp = string.replace(temp, "", " ")
            temp = string.replace(temp, "  ", " ")
            temp = string.replace(temp, "  ", " ")
            i = string.find(temp, "<")

            if temp.startswith("(Show all") == 1:
                continue;

            if temp.startswith("Show all") == 1:
                continue;

            if keywords == "":
                keywords = stripText(temp[0:i])
            else:
                keywords = keywords + ", " + stripText(temp[0:i])
        
            


    # Find Release date
    date = ""
    i = string.find(source, ">Release Date:")

    if i != -1:
        source = searchForPlus(source, ">Release Date:")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Rating
    i = string.find(source, ">Rating<")

    if i != -1:
        tempData = searchForPlus(source, ">Rating<")
        i = string.find(tempData, "<img src=\"")

        if i > 25:
            mpaa = "NR"
        else:
            tempData = searchForPlus(tempData, "<img src=\"")
            i = string.find(tempData, "\"")
            tempData = stripText(tempData[0:i])

            i = string.rfind(tempData, "/")

            if i != -1:
                tempData = stripText(tempData[i+1:])
                i = string.find(tempData, ".")
                mpaa = stripText(tempData[0:i])
            else:
                mpaa = tempData

            mpaa = string.upper(mpaa)


    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        tempData = searchForPlus(source, ">Studio:<")
        tempData = searchForPlus(tempData, ">")
        i = string.find(tempData, "<li>")
        studio = stripText(tempData[0:i])
        i = string.find(studio, "<")

        if i != -1:
            studio = stripText(studio[0:i])


    # Find Actors
    actorTag = ">Actors:"
    i = string.find(source, actorTag)

    if i == -1:
        actorTag = ">Starring"
        i = string.find(source, actorTag)

    if i != -1:
        tempData = searchForPlus(source, actorTag)
        i = string.find(tempData, "\n")
        actors = stripText(tempData[0:i])
        actorList = []

        while (searchFor(actors, "href") != None):
            i = string.find(actors, "href")

            if i != -1:
                actors = searchForPlus(actors, "href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                if actor == "See more":
                    continue

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    tag = ">Director:"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Directors:"
        i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        director = stripText(tempData[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Attributes
    i = string.rfind(source, ">Format:")

    if i != -1:
        tempData = stripText(source[i:])
        tempData = searchForPlus(tempData, ">")
        i = string.find(tempData, "</li>")
        attrs = stripText(tempData[0:i])

        # Find Color
        i = string.find(attrs, "Black & White")

        if i != -1:
            color = "Black & White"

        # Find Sound
        i = string.find(attrs, "HiFi Sound")

        if i != -1:
            sound = "HiFi Sound"

        # Find Sound
        i = string.find(attrs, "Dolby")

        if i != -1:
            sound = "Dolby"

        # Find Sound
        i = string.find(attrs, "DTS Surround Sound")

        if i != -1:
            sound = "DTS Surround Sound"

        # Find Sound
        i = string.find(attrs, "AC-3")

        if i != -1:
            sound = "AC-3"

        # Find Closed caption
        i = string.find(attrs, "closed-captioned")

        if i != -1:
            closedCap = "Y"

        # Find Widerscreen
        i = string.find(attrs, "widescreen")

        if i != -1:
            wide = "Y"

        i = string.find(attrs, "Widescreen")

        if i != -1:
            wide = "Y"


    # Find Release date, if I didn't get it earlier
    if date == "":
        i = string.find(source, "Release Date:")

        if i != -1:
            tempData = searchForPlus(source, "Release Date:")
            tempData = searchForPlus(tempData, ">")
            i = string.find(tempData, "<")
            date = stripText(tempData[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, " ")

                if i != -1:
                    date = stripText(date[i+1:])

            i = string.rfind(date, ".")

            if i != -1:
                date = stripText(date[0:i])


        # Find Running time
        i = string.find(source, "Run Time:")

        if i != -1:
            tempData = searchForPlus(source, "Run Time:")
            tempData = searchForPlus(tempData, ">")
            i = string.find(tempData, "<")
            running = stripText(tempData[0:i])

            i = string.find(running, " (")

            if i != -1:
                running = stripText(running[0:i])


    # Find Comments (Review)
    i = string.find(source, ">Editorial Reviews<")

    if (i != -1):
        source = searchForPlus(source, ">Editorial Reviews<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "</div>")
        tempcomment = source[0:i]

        i = string.find(tempcomment, "<hr ")

        if i != -1:
            tempcomment = stripText(tempcomment[0:i])

        i = string.find(tempcomment, "<p> <span class=\"h1\">")

        if i != -1:
            tempcomment = stripText(tempcomment[0:i])

        i = string.find(tempcomment, "<font")

        if (i != -1 and i < 100):
            tempcomment = searchForPlus(tempcomment, "<font")

        i = string.find(tempcomment, "<span")

        if (i != -1):
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        i = string.find(tempcomment, "<table ")

        if i != -1:
            j = string.find(tempcomment[i:], "</table>")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+8:]

        i = string.find(tempcomment, "... <")

        if i != -1:
            tempcomment = stripText(tempcomment[0:i+3])

        i = string.find(tempcomment, "See all Editorial Reviews")

        if i != -1:
            tempcomment = stripText(tempcomment[0:i])

        i = string.find(tempcomment, "</p> <p> ")

        if i != -1:
            tempcomment = stripText(tempcomment[0:i])

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "</p>", "")
        tempcomment = string.replace(tempcomment, "</P>", "")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#145;", "'")
        tempcomment = string.replace(tempcomment, "&#146;", "'")
        tempcomment = string.replace(tempcomment, "&#151;", "-")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&ccedil;", "")
        tempcomment = string.replace(tempcomment, "<strong>", "")
        tempcomment = string.replace(tempcomment, "</strong>", "")
        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "\n ", "\n")
        tempcomment = string.replace(tempcomment, "\n ", "\n")
        tempcomment = string.replace(tempcomment, "\n ", "\n")
        tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")
        tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")
        tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        if comments == "":
            comments = tempcomment
        else:
            comments = comments + "\n\n" + tempcomment






def amVideo():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"

    print "AmVideo"

    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Find marketplace pricing
    marketinfo = source
    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find New/Used together
    i = string.find(marketinfo, "offering/list")

    if i != -1:
        j = string.find(marketinfo, ">see all offers<")

        if j != -1:
            marketinfo = stripText(marketinfo[j:])
            i = string.find(marketinfo, "offering/list")

        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "</td>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, ">Used &amp; new<")

        if (i != -1):
            usedprice = searchForPlus(usedinfo, ">Used &amp; new<")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "used &amp; new")

        if (i != -1):
            usedcount = stripText(usedinfo[i-20:])
            usedcount = searchForPlus(usedcount, "b>")
            i = string.find(usedcount, " ")
            usedcount = stripText(usedcount[0:i])
            newcount = stripText(usedcount[0:i])
            usedprice = searchForPlus(usedinfo, "used &amp; new")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all used<")

        if (i != -1):
            usedprice = searchForPlus(usedinfo, ">See all used<")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all new<")

        if (i != -1):
            newprice = searchForPlus(usedinfo, ">See all new<")
            newprice = searchFor(newprice, ">$")
            newprice = searchForPlus(newprice, ">")
            i = string.find(newprice, "<")
            newprice = stripText(newprice[0:i])

        i = string.find(usedinfo, ">See all collectible<")

        if (i != -1):
            collectibleprice = searchForPlus(usedinfo, ">See all collectible<")
            collectibleprice = searchFor(collectibleprice, ">$")
            collectibleprice = searchForPlus(collectibleprice, ">")
            i = string.find(collectibleprice, "<")
            collectibleprice = stripText(collectibleprice[0:i])


    # Find New
    i = string.find(marketinfo, "offers/new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "offers/collectible")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/collectible")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectible<")

        if (i != -1):
            i = string.find(usedinfo, "collectible<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectible<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "offers/used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "used<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            usedprice = stripText(usedinfo[0:i])


    # Find Ranking
    i = string.find(marketinfo, ">Amazon.com Sales Rank")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.com Sales Rank")
        usedinfo = searchForPlus(usedinfo, "</b>")
        usedinfo = searchForPlus(usedinfo, "#")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])
        i = string.find(salesrank, " ")

        if i != -1:
            salesrank = stripText(salesrank[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Title
    tag = "<b class=\"sans\">"
    i = string.find(source, tag)

    if (i == -1):
        tag = "<b class=sans>"

    source = searchForPlus(source, tag)
    i = string.find(source, "<")
    title = stripText(source[0:i])
    title = string.replace(title, "\t", " ")


    # Find Copyright date
    i = string.rfind(title, " (")

    if i != -1:
        copyDate = stripText(title[i+2:])
        i = string.find(copyDate, ")")
        copyDate = stripText(copyDate[0:i])
        i = string.rfind(title, " (")
        title = stripText(title[0:i])


    # Find Image
    i = string.find(source, "images.amazon.com")

    if (i != -1):
        source = source[i-25:]
        source = searchForPlus(source, " src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "dvd-no-image.gif")

        if i != -1:
            image = ""

        i = string.find(image, "no-img-lg.gif")

        if i != -1:
            image = ""

        if image != "":
            oldimage = image
            i = string.find(image, "._P")

            if i != -1:
                j = string.find(image, "_SC")
                image = image[0:i] + "." + image[j+3:]

            if oldimage != image:
                image = string.replace(image, "_.", ".")


    # Find Price
    i = string.find(source, ">Price:<")

    if (i == -1):
        i = string.find(source, "class=price")

    if (i == -1):
        i = string.find(source, ">List Price:")

    if (i != -1):
        source = source[i:]

        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])
    else:
        value = usedprice


    # Find Release date
    date = ""
    i = string.find(source, ">Release Date:")

    if i != -1:
        source = searchForPlus(source, ">Release Date:")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Format
    i = string.find(source, "dvd-gray-medium.gif")

    if i != -1:
        format = "DVD"

    i = string.find(source, "vhs-gray-medium.gif")

    if i != -1:
        format = "VHS"

    i = string.find(source, "vhs-medium.gif")

    if i != -1:
        format = "VHS"


    # Find Rating
    i = string.find(source, ">Rated:<")

    if i != -1:
        tempData = searchForPlus(source, ">Rated:<")
        tempData = searchForPlus(tempData, ">")
        i = string.find(tempData, "<li>")
        tempData = stripText(tempData[0:i])

        i = string.find(tempData, "alt=\"")

        if i != -1:
            tempData = searchForPlus(tempData, "alt=\"")
            i = string.find(tempData, "\"")
            mpaa = stripText(tempData[0:i])
        else:
            mpaa = tempData

        i = string.rfind(mpaa, "\\")

        if i != -1:
            mpaa = stripText(mpaa[0:i])


    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        tempData = searchForPlus(source, ">Studio:<")
        tempData = searchForPlus(tempData, ">")
        i = string.find(tempData, "<li>")
        studio = stripText(tempData[0:i])
        i = string.find(studio, "<")

        if i != -1:
            studio = stripText(studio[0:i])


    # Find Actors
    actorTag = ">Starring"
    i = string.find(source, actorTag)

    if i != -1:
        source = searchForPlus(source, actorTag)
        i = string.find(source, "<br>")
        actors = stripText(source[0:i])
        actorList = []

        while (searchFor(actors, ",&nbsp;") != None):
            i = string.find(source, "href")

            if i != -1:
                actors = searchForPlus(actors, "href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

            actors = searchForPlus(actors, ",&nbsp;")

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Director:")

    if i != -1:
        source = searchForPlus(source, ">Director:")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        director = stripText(source[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Attributes
    i = string.find(source, ">Format:")

    if i != -1:
        source = searchForPlus(source, ">Format:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<br>")
        attrs = stripText(source[0:i])

        # Find Color
        i = string.find(attrs, "Black & White")

        if i != -1:
            color = "Black & White"

        # Find Sound
        i = string.find(attrs, "HiFi Sound")

        if i != -1:
            sound = "HiFi Sound"

        # Find Sound
        i = string.find(attrs, "Dolby")

        if i != -1:
            sound = "Dolby"

        # Find Sound
        i = string.find(attrs, "DTS Surround Sound")

        if i != -1:
            sound = "DTS Surround Sound"

        # Find Closed caption
        i = string.find(attrs, "Closed-captioned")

        if i != -1:
            closedCap = "Y"

        # Find Widerscreen
        i = string.find(attrs, "Widescreen")

        if i != -1:
            wide = "Y"


    # Find Release date, if I didn't get it earlier
    if date == "":
        i = string.find(source, "Release Date:")

        if i != -1:
            source = searchForPlus(source, "Release Date:")
            source = searchForPlus(source, ">")
            i = string.find(source, "<")
            date = stripText(source[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, " ")

                if i != -1:
                    date = stripText(date[i+1:])

            i = string.rfind(date, ".")

            if i != -1:
                date = stripText(date[0:i])


        # Find Running time
        i = string.find(source, "Run Time:")

        if i != -1:
            source = searchForPlus(source, "Run Time:")
            source = searchForPlus(source, ">")
            i = string.find(source, "<")
            running = stripText(source[0:i])


    # Find Comments
    i = string.find(source, ">Editorial Reviews<")

    if (i != -1):
        comments = ""
        source = searchForPlus(source, ">Editorial Reviews<")
        i = string.find(source, "<hr ")
        tempcomments = source[0:i]

        while (searchFor(tempcomments, "<a name=") != None):
            tempcomments = searchForPlus(tempcomments, "<a name=")
            i = string.find(tempcomments, "<font")

            if (i != -1 and i < 100):
                tempcomments = searchForPlus(tempcomments, "<font")

            i = string.find(tempcomments, "<span")

            if (i != -1):
                tempcomments = searchForPlus(tempcomments, "<span")

            tempcomments = searchForPlus(tempcomments, ">")
            i = string.find(tempcomments, "\n")
            tempcomment = stripText(tempcomments[0:i])

            i = string.find(tempcomment, "<span")

            if (i != -1):
                j = string.find(tempcomment[i:], ">")
                tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

            i = string.find(tempcomment, "<table ")

            if i != -1:
                j = string.find(tempcomment[i:], "</table>")
                tempcomment = tempcomment[0:i] + tempcomment[i+j+8:]

            i = string.find(tempcomment, "... <")

            if i != -1:
                tempcomment = stripText(tempcomment[0:i+3])

            i = string.find(tempcomment, "</p> <p> ")

            if i != -1:
                tempcomment = stripText(tempcomment[0:i])

            tempcomment = string.replace(tempcomment, "</a>", "")
            tempcomment = string.replace(tempcomment, "</A>", "")
            tempcomment = string.replace(tempcomment, "<p>", "\n\n")
            tempcomment = string.replace(tempcomment, "<P>", "\n\n")
            tempcomment = string.replace(tempcomment, "<BR>", "\n")
            tempcomment = string.replace(tempcomment, "<br>", "\n")
            tempcomment = string.replace(tempcomment, "<i>", "")
            tempcomment = string.replace(tempcomment, "</i>", "")
            tempcomment = string.replace(tempcomment, "<I>", "")
            tempcomment = string.replace(tempcomment, "</I>", "")
            tempcomment = string.replace(tempcomment, "<b>", "")
            tempcomment = string.replace(tempcomment, "</b>", "")
            tempcomment = string.replace(tempcomment, "</font>", "")
            tempcomment = string.replace(tempcomment, "</span>", "")
            tempcomment = string.replace(tempcomment, "&copy;", "")
            tempcomment = string.replace(tempcomment, "&#145;", "'")
            tempcomment = string.replace(tempcomment, "&#146;", "'")
            tempcomment = string.replace(tempcomment, "&#151;", "-")
            tempcomment = string.replace(tempcomment, "&#169;", "")
            tempcomment = string.replace(tempcomment, "&#8217;", "'")
            tempcomment = string.replace(tempcomment, "&quot;", "\"")
            tempcomment = string.replace(tempcomment, "&#8211;", "-")
            tempcomment = string.replace(tempcomment, "&ccedil;", "")
            tempcomment = string.replace(tempcomment, "  ", " ")
            tempcomment = string.replace(tempcomment, "\n ", "\n")

            while (searchFor(tempcomment, "<a ") != None):
                i = string.find(tempcomment, "<a ")
                j = string.find(tempcomment[i:], ">")

                if j == -1:
                    j = 2

                tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

            while (searchFor(tempcomment, "<A ") != None):
                i = string.find(tempcomment, "<A ")
                j = string.find(tempcomment[i:], ">")
                tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

            while (searchFor(tempcomment, "<img src") != None):
                i = string.find(tempcomment, "<img src")
                j = string.find(tempcomment[i:], ">")
                tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

            while (searchFor(tempcomment, "<span") != None):
                i = string.find(tempcomment, "<span")
                j = string.find(tempcomment[i:], ">")
                tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

            if comments != "":
                comments = comments + tempcomment
            else:
                comments = tempcomment


    # Find user rating
    i = string.find(source, ">Average Customer Review:<")

    if i != -1:
        temp = searchForPlus(source, ">Average Customer Review:<")
        temp = searchForPlus(temp, "<img src=\"")
        i = string.find(temp, "\"")
        temp = stripText(temp[0:i])
        i = string.rfind(temp, "/")

        if i != -1:
            temp = stripText(temp[i+1:])

        i = string.find(temp, ".")

        if i != -1:
            temp = stripText(temp[0:i])

        if temp.startswith("stars-") == 1:
            temp = searchForPlus(temp, "stars-")

        temp = string.replace(temp, "-", ".")

        if temp != "":
            rating = temp + " Stars"


    extractCategory()




def extractCategory():
    global title,author,format,bookclub,first,signed,read,date,publisher,place,isbn
    global value,category,copies,condition,rating,comments,source,image
    global fullDateFormat

    # Find category
    i = string.find(source, ">Browse for")

    if (i != -1):
        source = searchForPlus(source, ">Browse for")
    else:
        i = string.find(source, "> Browse for")

        if (i != -1):
            source = searchForPlus(source, "> Browse for")

    if (i != -1):
        source = searchForPlus(source, "<li>")
        i = string.find(source, "<li>")
        tempCatg = source[0:i]
        j = string.find(tempCatg, "Deals Under ")

        if j != -1:
            source = searchForPlus(source, "<li>")
            j = string.find(source, "<li>")
            tempCatg = source[0:j]

        j = string.find(tempCatg, "DVDs Under ")

        if j != -1:
            source = searchForPlus(source, "<li>")
            j = string.find(source, "<li>")
            tempCatg = source[0:j]

        j = string.find(tempCatg, "Directors<")

        if j != -1:
            source = searchForPlus(source, "<li>")
            j = string.find(source, "<li>")
            tempCatg = source[0:j]

    if (i != -1):
        category = ""

        while (searchFor(tempCatg, "href=") != None):
            tempCatg = searchForPlus(tempCatg, "href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")
            temp = stripText(tempCatg[0:i])

            if temp != "DVD" and temp != "VHS" and temp != "Genres" and temp != "Amazon.com Outlet" and temp != "DVD Outlet":
                if (category != ""):
                    category = category + " : "

                category = category + temp

            tempCatg = tempCatg[i:]




try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
