# Amazon UK video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    format              = "DVD"
    color               = "Color"
    sound               = "Mono"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)
        t2 = open("trace2.html", "w")
        t2.write(source)
        t2.close()


    i = string.find(source, "ineligible for Amazon.co.uk Marketplace selling.")

    if (i != -1):
        # Not found
        return;


    # Alternate format?
    i = string.find(source, "class=\"sans\">")

    if i != -1:
        am2()
        return


    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">List Price:")
        listprice = searchFor(listprice, "&pound;")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find New
    i = string.find(marketinfo, "sdp_new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "sdp_coll")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_coll")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectable<")

        if (i != -1):
            i = string.find(usedinfo, "collectable<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectable<")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "sdp_used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            try:
                usedinfo = searchForPlus(usedinfo, "used<")
                usedinfo = searchFor(usedinfo, ">")
                usedinfo = searchForPlus(usedinfo, ">")
                i = string.find(usedinfo, "<")
                usedprice = stripText(usedinfo[0:i])
            except:
                usedprice = ""


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.co.uk Sales Rank")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.co.uk Sales Rank")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"




    # Find Title
    source = searchForPlus(source, "size=+1><b>")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Copyright date
    i = string.rfind(title, "[")

    if i != -1:
        copyDate = stripText(title[i+1:])
        title = stripText(title[0:i])
        i = string.find(copyDate, "]")
        copyDate = stripText(copyDate[0:i])


    # Find Image
    i = string.find(source, "images-eu.amazon.com")

    if (i != -1):
        image = source[i-50:]
        image = searchForPlus(image, " href=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])
        image = string.replace(image, ".LZZZ", ".MZZZ")

        i = string.find(image, "ZZZ")

        if i == -1:
            image = ""

        i = string.find(image, "dvd-")

        if i != -1:
            image = ""

        i = string.find(image, "truck-icon")

        if i != -1:
            image = ""


    # Find Price
    i = string.find(source, ">Our Price:")

    if (i == -1):
        i = string.find(source, ">List Price:")

    if (i != -1):
        source = source[i:]

        source = searchFor(source, "&pound;")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])


    # Find Format
    i = string.find(source, "dvd-gray-medium-")

    if i != -1:
        format = "DVD"

    i = string.find(source, "vhs-medium-")

    if i != -1:
        format = "VHS"


    # Find Release date
    i = string.find(source, ">Release Date:")

    if i != -1:
        source = searchForPlus(source, ">Release Date:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Rating
    i = string.find(source, ">Classification:<")

    if i != -1:
        source = searchForPlus(source, ">Classification:<")
        source = searchForPlus(source, ">")
        i = string.find(source, "<br>")
        tempData = stripText(source[0:i])

        i = string.find(tempData, "alt=\"")

        if i != -1:
            tempData = searchForPlus(tempData, "alt=\"")
            i = string.find(tempData, "\"")
            mpaa = stripText(tempData[0:i])
        else:
            mpaa = tempData

        i = string.rfind(mpaa, "\\")

        if i != -1:
            mpaa = stripText(mpaa[0:i])


    # Find Actors
    actorTag = ">Starring:"
    i = string.find(source, actorTag)

    if i != -1:
        source = searchForPlus(source, actorTag)
        i = string.find(source, "<br>")
        actors = stripText(source[0:i])
        actorList = []

        while (searchFor(actors, ",&nbsp;") != None):
            i = string.find(actors, "<a href")

            if i != -1:
                actors = searchForPlus(actors, "<a href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

            actors = searchForPlus(actors, ",&nbsp;")

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Director:")

    if i != -1:
        source = searchForPlus(source, ">Director:")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        director = stripText(source[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Color
    i = string.find(source, "&#149;&nbsp;Color")

    if i != -1:
        color = "Color"

    i = string.find(source, "&#149;&nbsp;Black")

    if i != -1:
        color = "B&W"


    # Find Closed captioned
    i = string.find(source, "Closed-captioned")

    if i != -1:
        closedCap = "Y"


    # Find Widescreen
    i = string.find(source, "Widescreen")

    if i != -1:
        wide = "Y"

    # Find Writer
    i = string.find(source, ">Story:<")

    if i != -1:
        source2 = searchForPlus(source, ">Story:<")
        source2 = searchForPlus(source2, ">")
        i = string.find(source2, "<")
        writer = stripText(source2[0:i])


    # Find Screenplay, will overwrite with screenwriter
    i = string.find(source, ">Screenplay:<")

    if i != -1:
        source2 = searchForPlus(source, ">Screenplay:<")
        source2 = searchForPlus(source2, ">")
        i = string.find(source2, "<")
        screenwriter = stripText(source2[0:i])


    # Find Screenwriter
    i = string.find(source, ">Screen Writer:<")

    if i != -1:
        source2 = searchForPlus(source, ">Screen Writer:<")
        source2 = searchForPlus(source2, ">")
        i = string.find(source2, "<")
        screenwriter = stripText(source2[0:i])


    # Find Director of photograpy
    i = string.find(source, ">Director of Photography:<")

    if i != -1:
        source2 = searchForPlus(source, ">Director of Photography:<")
        source2 = searchForPlus(source2, ">")
        i = string.find(source2, "<")
        photographer = stripText(source2[0:i])


    # Find Composer
    i = string.find(source, ">Music Contributor(s):<")

    if i != -1:
        source2 = searchForPlus(source, ">Music Contributor(s):<")
        source2 = searchForPlus(source2, ">")
        i = string.find(source2, "<")
        composer = stripText(source2[0:i])


    #Find Category
    i = string.find(source, "<b>Subject / Style:</b>")

    if i != -1:
        source = searchForPlus(source, "<b>Subject / Style:</b>")
        i = string.find(source, "<")
        category = stripText(source[0:i])


    #Find Tracks
    i = string.find(source, ">Table of Contents<")

    if i != -1:
        source = searchForPlus(source, ">Table of Contents<")
        i = string.find(source, "</ol>")
        tracks = stripText(source[0:i])
        trackList = []

        while (searchFor(tracks, "<li>") != None):
            tracks = searchForPlus(tracks, "<li>")
            i = string.find(tracks, "</li>")

            if (i == -1):
                track = tracks
            else:
                track = stripText(tracks[0:i])

            i = string.find(track, "<br>")

            if (i != -1):
                track = stripText(track[0:i])


            trackList.append(track)

        if len(trackList) > 0:
            track1 = trackList[0]

        if len(trackList) > 1:
            track2 = trackList[1]

        if len(trackList) > 2:
            track3 = trackList[2]

        if len(trackList) > 3:
            track4 = trackList[3]

        if len(trackList) > 4:
            track5 = trackList[4]

        if len(trackList) > 5:
            track6 = trackList[5]

        if len(trackList) > 6:
            track7 = trackList[6]

        if len(trackList) > 7:
            track8 = trackList[7]

        if len(trackList) > 8:
            track9 = trackList[8]

        if len(trackList) > 9:
            track10 = trackList[9]

        if len(trackList) > 10:
            track11 = trackList[10]

        if len(trackList) > 11:
            track12 = trackList[11]

        if len(trackList) > 12:
            track13 = trackList[12]

        if len(trackList) > 13:
            track14 = trackList[13]

        if len(trackList) > 14:
            track15 = trackList[14]

        if len(trackList) > 15:
            track16 = trackList[15]

        if len(trackList) > 16:
            track17 = trackList[16]

        if len(trackList) > 17:
            track18 = trackList[17]

        if len(trackList) > 18:
            track19 = trackList[18]

        if len(trackList) > 19:
            track20 = trackList[19]


    # Find Comments
    i = string.find(source, ">Reviews<")

    if (i != -1):
        comments = ""
        source = searchForPlus(source, ">Reviews<")
        source = searchForPlus(source, "<br> ")
        i = string.find(source, "<p align=right>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<font")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<font")

        i = string.find(tempcomments, "<span")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<span")

        tempcomments = searchForPlus(tempcomments, ">")
        i = string.find(tempcomments, "\n")
        tempcomment = stripText(tempcomments[0:i])
        i = string.find(tempcomment, "<span")

        if (i != -1):
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<p> ") != None):
            i = string.find(tempcomment, "<p> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<P> ") != None):
            i = string.find(tempcomment, "<P> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<br> ") != None):
            i = string.find(tempcomment, "<br> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        while (searchFor(tempcomment, "<BR> ") != None):
            i = string.find(tempcomment, "<BR> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#133;", "...")
        tempcomment = string.replace(tempcomment, "&#145;", "'")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&#146;", "\'")
        tempcomment = string.replace(tempcomment, "&#150;", "-")
        tempcomment = string.replace(tempcomment, "<blockquote>", "")
        tempcomment = string.replace(tempcomment, "</blockquote>", "")
        tempcomment = string.replace(tempcomment, "<ul>", "")
        tempcomment = string.replace(tempcomment, "</ul>", "")
        tempcomment = string.replace(tempcomment, "<li>", "    ")
        tempcomment = string.replace(tempcomment, "</li>", "")

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        comments = tempcomment


    extractCategory()


def extractCategory():
    global title,author,format,bookclub,first,signed,read,date,publisher,place,isbn
    global value,category,copies,condition,rating,comments,source,image
    global fullDateFormat

    # Find category
    i = string.find(source, "<b>Browse for")

    if (i != -1):
        source = searchForPlus(source, "<b>Browse for")
    else:
        i = string.find(source, "<b> Browse for")

        if (i != -1):
            source = searchForPlus(source, "<b> Browse for")

    if (i != -1):
        source = searchForPlus(source, "<br>")
        i = string.find(source, "<br>")
        tempCatg = source[0:i]

    if (i != -1):
        category = ""

        while (searchFor(tempCatg, "href=") != None):
            tempCatg = searchForPlus(tempCatg, "href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")
            temp = stripText(tempCatg[0:i])

            if temp != "DVD" and temp != "Categories":
                if (category != ""):
                    category = category + " : "

                category = category + temp

            tempCatg = tempCatg[i:]






def am2():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    print "Alternate Amazon format"


    # If Amazon lists other editions, remove them.
    # complicates price extraction
    i = string.find(source, "<b>Other Versions")

    if i != -1:
        j = string.find(source[i:], "</div></div>")
        source = source[0:i+1] + source[i+j:]

    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchForPlus(listprice, "class=\"listprice\"")
        listprice = searchForPlus(listprice, ">")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])
    else:
        i = string.find(marketinfo, ">RRP:<")

        if i != -1:
            listprice = searchForPlus(marketinfo, ">RRP:<")
            listprice = searchForPlus(listprice, "class=\"listprice\"")
            listprice = searchForPlus(listprice, ">")
            i = string.find(listprice, "<")
            listprice = stripText(listprice[0:i])


    # Find New/Used together
    i = string.rfind(marketinfo, "/offer-listing/")

    if i != -1:
        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br />")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used & new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used & new")
            usedprice = searchForPlus(usedprice, "class=\"price\"")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.co.uk Sales Rank")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.co.uk Sales Rank")
        usedinfo = searchForPlus(usedinfo, ">")
        usedinfo = searchForPlus(usedinfo, "#")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])

        i = string.find(salesrank, " ")

        if i != -1:
            salesrank = stripText(salesrank[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])


    # Find Title
    source = searchForPlus(source, "<b class=\"sans\">")
    source = searchForPlus(source, "\"btAsinTitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    i = string.rfind(title, "[HD DVD]")

    if i != -1:
        format = "HD DVD"
        title = stripText(title[0:i])

    i = string.rfind(title, "[Blu-ray]")

    if i != -1:
        format = "Blu-ray Disc"
        title = stripText(title[0:i])


    # Find Copyright date
    i = string.rfind(title, "[")

    if i != -1:
        copyDate = stripText(title[i+1:])
        title = stripText(title[0:i])
        i = string.find(copyDate, "]")
        copyDate = stripText(copyDate[0:i])


    # Find user rating
    tag = "/>See all "
    i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        i = string.find(source, "/customer-reviews/ratings/stars")

        if i != -1:
            temp = searchFor(temp, "/customer-reviews/ratings/stars")
            i = string.find(temp, ".")
            temp = stripText(temp[0:i])
            temp = searchForPlus(temp, "stars-")
            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find Image
    if image == "":
        tag = "/images.amazon.com"
        i = string.find(source, tag)

        if i == -1 or i > 3000:
            tag = "images-amazon.com/"
            i = string.find(source, tag)

        if i != -1 and i < 3000:
            image = source[i-50:]
            image = searchForPlus(image, " src=\"")
            i = string.find(image, "\"")
            image = stripText(image[0:i])

            if string.find(image, "-rating-") != -1:
                source = searchForPlus(source, tag)
                i = string.find(source, tag)

                if i != -1:
                    image = source[i-50:]
                    image = searchForPlus(image, " src=\"")
                    i = string.find(image, "\"")
                    image = stripText(image[0:i])
        
            image = string.replace(image, ".LZZZ", ".MZZZ")
            image = string.replace(image, ".TZZZ", ".MZZZ")

            i = string.find(image, "_PE")

            if i != -1:
                j = string.find(image[i+1:], "_")
                image = stripText(image[0:i] + image[i+j+1:])

            i = string.find(image, "ZZZ")

            if i == -1:
                image = ""

            i = string.find(image, "dvd-")

            if i != -1:
                image = ""

            i = string.find(image, "truck-icon")

            if i != -1:
                image = ""


    # Find Price
    tag = ">Our Price:<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Price:<"
        i = string.find(source, tag)

    if i == -1:
        tag = "List Price:<"
        i = string.find(source, tag)

    if i == -1:
        tag = ">RRP:<"
        i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        temp = searchForPlus(temp, "class=\"price\"")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "<")
        value = stripText(temp[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])
    else:
        value = usedprice


    # Find Format
    i = string.find(source, "dvd-gray-medium-")

    if i != -1:
        format = "DVD"

    i = string.find(source, "vhs-medium-")

    if i != -1:
        format = "VHS"


    # Find Categories
    category = ""
    i = string.find(source, "by category<")

    if i != -1:
        str = searchForPlus(source, "by category<")
        str = searchForPlus(str, "<li>")
        i = string.find(str, "</li>")
        categories = stripText(str[0:i])
        i = string.find(categories, "<font")

        if i != -1:
            categories = stripText(categories[0:i])

        while (searchFor(categories, "href=") != None):
            categories = searchForPlus(categories, "href=")
            categories = searchForPlus(categories, ">")
            i = string.find(categories, "<")
            temp = stripText(categories[0:i])

            if temp == "DVD":
                continue;

            if temp == "Categories":
                continue;

            if (category != ""):
                category = category + " : "

            category = category + temp
            categories = categories[i:]


    # Find Release date
    source = searchForPlus(source, ">Product details<")
    i = string.find(source, "Release Date:<")

    if i != -1:
        date = searchForPlus(source, "Release Date:<")
        date = searchForPlus(date, ">")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Rating
    i = string.find(source, ">Classification:<")

    if i != -1:
        mpaa = searchForPlus(source, ">Classification:<")
        mpaa = searchForPlus(mpaa, ">")
        i = string.find(mpaa, "</li>")
        mpaa = stripText(mpaa[0:i])

        i = string.find(mpaa, "alt=\"")

        if i != -1:
            mpaa = searchForPlus(mpaa, "alt=\"")
            i = string.find(mpaa, "\"")
            mpaa = stripText(mpaa[0:i])
        else:
            mpaa = "NR"

        i = string.rfind(mpaa, "\\")

        if i != -1:
            mpaa = stripText(mpaa[0:i])


    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        studio = searchForPlus(source, ">Studio:<")
        studio = searchForPlus(studio, ">")
        i = string.find(studio, "<")
        studio = stripText(studio[0:i])


    # Find Color
    i = string.find(source, ">Format:")

    if i != -1:
        color = searchForPlus(source, ">Format:")
        color = searchForPlus(color, ">")
        i = string.find(color, "<")
        color = stripText(color[0:i])

        i = string.find(color, "Black & White")

        if i != -1:
            color = "Black & White"
        else:
            color = "Color"


    # Find Running time
    i = string.find(source, ">Run Time:<")

    if i != -1:
        running = searchForPlus(source, ">Run Time:<")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find Actors
    actorTag = ">Actors:<"
    i = string.find(source, actorTag)

    if i != -1:
        source = searchForPlus(source, actorTag)
        i = string.find(source, "</li>")
        actors = stripText(source[0:i])
        actorList = []

        while (searchFor(actors, "<a href") != None):
            i = string.find(actors, "<a href")

            if i != -1:
                actors = searchForPlus(actors, "<a href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                if actor == "See more":
                    continue

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

            #actors = searchForPlus(actors, ">, <")

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    tag = ">Director:"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Directors:"
        i = string.find(source, tag)

    if i != -1:
        director = searchForPlus(source, tag)
        director = searchForPlus(director, "\">")
        i = string.find(director, "<")
        director = stripText(director[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Comments
    i = string.find(source, ">Reviews<")

    if (i != -1):
        comments = ""
        source = searchForPlus(source, ">Reviews<")
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "See all Reviews<")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<font")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<font")

        i = string.find(tempcomments, "<span")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<span")

        tempcomments = string.replace(tempcomments, "\n", "")

        while (searchFor(tempcomments, "  ") != None):
            i = string.find(tempcomments, "  ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<p> ") != None):
            i = string.find(tempcomments, "<p> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<P> ") != None):
            i = string.find(tempcomments, "<P> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<br> ") != None):
            i = string.find(tempcomments, "<br> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        while (searchFor(tempcomments, "<BR> ") != None):
            i = string.find(tempcomments, "<BR> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        tempcomments = string.replace(tempcomments, "</a>", "")
        tempcomments = string.replace(tempcomments, "</A>", "")
        tempcomments = string.replace(tempcomments, "<p>", "\n\n")
        tempcomments = string.replace(tempcomments, "<P>", "\n\n")
        tempcomments = string.replace(tempcomments, "<BR>", "\n")
        tempcomments = string.replace(tempcomments, "<br>", "\n")
        tempcomments = string.replace(tempcomments, "<br />", "\n")
        tempcomments = string.replace(tempcomments, "<i>", "")
        tempcomments = string.replace(tempcomments, "</i>", "")
        tempcomments = string.replace(tempcomments, "<I>", "")
        tempcomments = string.replace(tempcomments, "</I>", "")
        tempcomments = string.replace(tempcomments, "<b>", "")
        tempcomments = string.replace(tempcomments, "</b>", "")
        tempcomments = string.replace(tempcomments, "</font>", "")
        tempcomments = string.replace(tempcomments, "</span>", "")
        tempcomments = string.replace(tempcomments, "&copy;", "")
        tempcomments = string.replace(tempcomments, "&#133;", "...")
        tempcomments = string.replace(tempcomments, "&#145;", "'")
        tempcomments = string.replace(tempcomments, "&#169;", "")
        tempcomments = string.replace(tempcomments, "&#8217;", "'")
        tempcomments = string.replace(tempcomments, "&quot;", "\"")
        tempcomments = string.replace(tempcomments, "&#8211;", "-")
        tempcomments = string.replace(tempcomments, "&#146;", "\'")
        tempcomments = string.replace(tempcomments, "&#150;", "-")
        tempcomments = string.replace(tempcomments, "<blockquote>", "")
        tempcomments = string.replace(tempcomments, "</blockquote>", "")
        tempcomments = string.replace(tempcomments, "<ul>", "")
        tempcomments = string.replace(tempcomments, "</ul>", "")
        tempcomments = string.replace(tempcomments, "<li>", "    ")
        tempcomments = string.replace(tempcomments, "</li>", "")

        while (searchFor(tempcomments, "\n ") != None):
            i = string.find(tempcomments, "\n ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<a ") != None):
            i = string.find(tempcomments, "<a ")
            j = string.find(tempcomments[i:], ">")

            if j == -1:
                j = 2

            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<A ") != None):
            i = string.find(tempcomments, "<A ")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<img src") != None):
            i = string.find(tempcomments, "<img src")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<span") != None):
            i = string.find(tempcomments, "<span")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        comments = tempcomments








try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
