# Amazon DE video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    # sound               = "Mono"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    i = string.find(source, "ineligible for Amazon.de Marketplace selling.")

    if (i != -1):
        # Not found
        return;


    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">Statt:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">Statt:")
        listprice = searchFor(listprice, "EUR")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find New
    i = string.find(marketinfo, "sdp_new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "sdp_coll")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_coll")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectable<")

        if (i != -1):
            i = string.find(usedinfo, "collectable<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectable<")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "sdp_used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            try:
                usedinfo = searchForPlus(usedinfo, "used<")
                usedinfo = searchFor(usedinfo, ">")
                usedinfo = searchForPlus(usedinfo, ">")
                i = string.find(usedinfo, "<")
                usedprice = stripText(usedinfo[0:i])
            except:
                usedprice = ""


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.de-Verkaufsrang")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.de-Verkaufsrang")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])


    # Find Title
    source = searchForPlus(source, "<b class=\"sans\">")
    source = searchForPlus(source, "\"btAsinTitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Copyright date
    copyDate = ""
    i = string.find(marketinfo, ">Produktion:")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Produktion:")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        copyDate = stripText(usedinfo[0:i])


    # Find Image
    if image == "":
        tag = "images.amazon.com"
        i = string.find(source, tag)

        if i == -1 or i > 3000:
            tag = "images-amazon.com/"
            i = string.find(source, tag)

        if i != -1 and i < 3000:
            image = source[i-50:]
            image = searchForPlus(image, " src=\"")
            i = string.find(image, "\"")
            image = stripText(image[0:i])
            image = string.replace(image, ".LZZZ", ".MZZZ")

            i = string.find(image, "ZZZ")

            if i == -1:
                image = ""

            i = string.find(image, "dvd-")

            if i != -1:
                image = ""

            i = string.find(image, "truck-icon")

            if i != -1:
                image = ""


    # Find Price
    i = string.find(source, ">Jetzt:")

    if (i == -1):
        i = string.find(source, ">Amazon-Preis:")

    if (i != -1):
        source = source[i:]

        source = searchFor(source, "EUR")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])


    # Find Format
    i = string.find(source, ">DVD-Erscheinungstermin:<")

    if i != -1:
        format = "DVD"

    i = string.find(source, "gray-medium-vhs")

    if i != -1:
        format = "VHS"


    # Find Rating
    i = string.find(source, ">FSK:<")

    if i != -1:
        mpaa = searchForPlus(source, ">FSK:<")
        mpaa = searchForPlus(mpaa, "> ")
        i = string.find(mpaa, "Jahren")
        mpaa = stripText(mpaa[0:i+6])
    else:
        mpaa = "Keine Freigabe"


    # Find Actors
    i = string.find(source, ">Darsteller:")

    if i != -1:
        actors = searchForPlus(source, ">Darsteller:")
        i = string.find(actors, "</li>")
        actors = stripText(actors[0:i])
        actorList = []

        while (searchFor(actors, "href=\"") != None):
            actors = searchForPlus(actors, "href=\"")
            actors = searchForPlus(actors, "\">")
            i = string.find(actors, "<")
            actor = stripText(actors[0:i])

            i = string.rfind(actor, " ")

            if (i != -1):
                actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

            actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Regisseur")

    if i != -1:
        director = searchForPlus(source, ">Regisseur")
        director = searchForPlus(director, "\">")
        i = string.find(director, "<")
        director = stripText(director[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Author
    i = string.find(source, ">Buch:<")

    if i != -1:
        author = searchForPlus(source, ">Buch:<")
        author = searchForPlus(author, "href")
        author = searchForPlus(author, ">")
        i = string.find(author, "<")
        author = stripText(author[0:i])


    # Find Composer
    i = string.find(source, ">Musik:<")

    if i != -1:
        composer = searchForPlus(source, ">Musik:<")
        composer = searchForPlus(composer, "href")
        composer = searchForPlus(composer, ">")
        i = string.find(composer, "<")
        composer = stripText(composer[0:i])


    # Find Sound
    i = string.find(source, "Sprachen:")
 
    if i != -1:
        sound = searchForPlus(source, "Sprachen:")
        i = string.find(sound, "<br>")
        sound = stripText(sound[0:i])
        sound = searchForPlus(sound, "</b>")
        i = string.find(sound, "(")
        sound = stripText(sound[0:i])


    # Find Closed captioned
    i = string.find(source, "Untertitel:")

    if i != -1:
        closedCap = "Y"


    # Find Release date
    i = string.find(source, ">DVD-Erscheinungstermin:<")

    if i != -1:
        date = searchForPlus(source, ">DVD-Erscheinungstermin:<")
        date = searchForPlus(date, ">")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find running time
    i = string.find(source, ">Spieldauer:<")

    if i != -1:
        running = searchForPlus(source, ">Spieldauer:<")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find widescreen
    i = string.find(source, ">Bildseitenformat:<")

    if i != -1:
        wide = searchForPlus(source, ">Bildseitenformat:<")
        wide = searchForPlus(wide, ">")
        i = string.find(wide, "<")
        wide = stripText(wide[0:i])

        if wide == "4:3":
            wide = "N"
        else:
            wide = "Y"



    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        studio = searchForPlus(source, ">Studio:<")
        studio = searchForPlus(studio, ">")
        i = string.find(studio, "<")
        studio = stripText(studio[0:i])


    # Find Comments
    i = string.find(source, ">Rezensionen<")

    if (i != -1):
        comments = ""
        source = searchForPlus(source, ">Rezensionen<")
        source = searchForPlus(source, "<div ")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "<img src=")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<div")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        tempcomment = tempcomments

        while (searchFor(tempcomment, "<p> ") != None):
            i = string.find(tempcomment, "<p> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<P> ") != None):
            i = string.find(tempcomment, "<P> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<br> ") != None):
            i = string.find(tempcomment, "<br> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        while (searchFor(tempcomment, "<BR> ") != None):
            i = string.find(tempcomment, "<BR> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#133;", "...")
        tempcomment = string.replace(tempcomment, "&#145;", "'")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&#146;", "\'")
        tempcomment = string.replace(tempcomment, "&#150;", "-")
        tempcomment = string.replace(tempcomment, "<blockquote>", "")
        tempcomment = string.replace(tempcomment, "</blockquote>", "")
        tempcomment = string.replace(tempcomment, "<ul>", "")
        tempcomment = string.replace(tempcomment, "</ul>", "")
        tempcomment = string.replace(tempcomment, "<li>", "    ")
        tempcomment = string.replace(tempcomment, "</li>", "")

        while (searchFor(tempcomment, "\n ") != None):
            i = string.find(tempcomment, "\n ")
            tempcomment = tempcomment[0:i+1] + tempcomment[i+2:]

        while (searchFor(tempcomment, "\n\n\n\n") != None):
            i = string.find(tempcomment, "\n\n\n\n")
            tempcomment = tempcomment[0:i+2] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img ") != None):
            i = string.find(tempcomment, "<img ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        comments = tempcomment


    extractCategory()


def extractCategory():
    global title,author,format,bookclub,first,signed,read,date,publisher,place,isbn
    global value,category,copies,condition,rating,comments,source,image
    global fullDateFormat

    # Find category
    i = string.find(source, "<b>Browse for")

    if (i != -1):
        source = searchForPlus(source, "<b>Browse for")
    else:
        i = string.find(source, "<b> Browse for")

        if (i != -1):
            source = searchForPlus(source, "<b> Browse for")

    if (i != -1):
        source = searchForPlus(source, "<br>")
        i = string.find(source, "<br>")
        tempCatg = source[0:i]

    if (i != -1):
        category = ""

        while (searchFor(tempCatg, "href=") != None):
            tempCatg = searchForPlus(tempCatg, "href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")
            temp = stripText(tempCatg[0:i])

            if temp != "DVD" and temp != "Categories":
                if (category != ""):
                    category = category + " : "

                category = category + temp

            tempCatg = tempCatg[i:]



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
