# Amazon CA video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    format              = "DVD"
    color               = "Color"
    sound               = "Mono"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    i = string.find(source, " ineligible for Amazon Marketplace selling.")

    if (i != -1):
        # Not found
        return;


    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find New
    i = string.find(marketinfo, "sdp_new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">CDN$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "sdp_coll")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_coll")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectable<")

        if (i != -1):
            i = string.find(usedinfo, "collectable<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectable<")
            usedinfo = searchFor(usedinfo, ">CDN$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "sdp_used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "used<")
            usedinfo = searchFor(usedinfo, ">CDN$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            usedprice = stripText(usedinfo[0:i])


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.ca Sales Rank")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.ca Sales Rank")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])




    # Find Title
    source = searchForPlus(source, "<b class=\"sans\">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    i = string.rfind(title, "[HD DVD]")

    if i != -1:
        format = "HD DVD"

    i = string.rfind(title, "(Combo HD DVD")

    if i != -1:
        format = "HD DVD Combo"

    i = string.rfind(title, "[Blu-ray]")

    if i != -1:
        format = "Blu-ray Disc"


    # Find Copyright date
    i = string.rfind(title, " (")

    if i != -1:
        copyDate = stripText(title[i+2:])
        i = string.find(copyDate, ")")
        copyDate = stripText(copyDate[0:i])

        if len(copyDate) != 4:
            copyDate = ""

        i = string.find(title, " (")
        title = stripText(title[0:i])

    i = string.rfind(title, " [")

    if i != -1:
        title = stripText(title[0:i])


    # Find Image
    if image == "":
        i = string.find(source, "images.amazon.")

        if (i != -1):
            image = source[i-15:]
            image = searchForPlus(image, " src=\"")
            i = string.find(image, "\"")
            image = stripText(image[0:i])

            i = string.find(image, "dvd-")

            if i != -1:
                image = ""

            i = string.find(image, "truck-icon")

            if i != -1:
                image = ""


    # Find Price
    i = string.find(source, ">Our Price:")

    if i == -1:
        i = string.find(source, ">Price:<")

    if i == -1:
        i = string.find(source, ">List Price:")

    if i != -1:
        source = source[i:]

        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])

        # amazon.ca has space between $ and amount
        value = string.replace(value, " ", "")


    # Find Release date
    source = searchForPlus(source, ">Product Details<")
    i = string.find(source, "Release Date:<")

    if i != -1:
        date = searchForPlus(source, "Release Date:<")
        date = searchForPlus(date, ">")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Rating
    i = string.find(source, ">Classification:<")

    if i != -1:
        mpaa = searchForPlus(source, ">Classification:<")
        mpaa = searchForPlus(mpaa, ">")
        i = string.find(mpaa, "</li>")
        mpaa = stripText(mpaa[0:i])

        i = string.find(mpaa, "alt=\"")

        if i != -1:
            mpaa = searchForPlus(mpaa, "alt=\"")
            i = string.find(mpaa, "\"")
            mpaa = stripText(mpaa[0:i])
        else:
            mpaa = "NR"

        i = string.rfind(mpaa, "\\")

        if i != -1:
            mpaa = stripText(mpaa[0:i])


    # Find Studio
    i = string.find(source, ">Studio:<")

    if i != -1:
        studio = searchForPlus(source, ">Studio:<")
        studio = searchForPlus(studio, ">")
        i = string.find(studio, "<")
        studio = stripText(studio[0:i])


    # Find Color
    i = string.find(source, ">Format:")

    if i != -1:
        color = searchForPlus(source, ">Format:")
        color = searchForPlus(color, ">")
        i = string.find(color, "<")
        color = stripText(color[0:i])

        i = string.find(color, "Black & White")

        if i != -1:
            color = "Black & White"
        else:
            color = "Color"


    # Find Running time
    i = string.find(source, ">Run Time:<")

    if i != -1:
        running = searchForPlus(source, ">Run Time:<")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find Actors
    actorTag = ">Actors:"
    i = string.find(source, actorTag)

    if i == -1:
        actorTag = ">Starring"
        i = string.find(source, actorTag)

    if i != -1:
        tempData = searchForPlus(source, actorTag)
        i = string.find(tempData, "\n")
        actors = stripText(tempData[0:i])
        actorList = []

        while (searchFor(actors, "href") != None):
            i = string.find(actors, "href")

            if i != -1:
                actors = searchForPlus(actors, "href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                if actor == "See more":
                    continue

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    tag = ">Director:"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Directors:"
        i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        director = stripText(tempData[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Attributes
    i = string.rfind(source, ">Format:")

    if i != -1:
        tempData = stripText(source[i:])
        tempData = searchForPlus(tempData, ">")
        i = string.find(tempData, "</li>")
        attrs = stripText(tempData[0:i])

        # Find Color
        i = string.find(attrs, "Black & White")

        if i != -1:
            color = "Black & White"

        # Find Sound
        i = string.find(attrs, "HiFi Sound")

        if i != -1:
            sound = "HiFi Sound"

        # Find Sound
        i = string.find(attrs, "Dolby")

        if i != -1:
            sound = "Dolby"

        # Find Sound
        i = string.find(attrs, "DTS Surround Sound")

        if i != -1:
            sound = "DTS Surround Sound"

        # Find Sound
        i = string.find(attrs, "AC-3")

        if i != -1:
            sound = "AC-3"

        # Find Closed caption
        i = string.find(attrs, "closed-captioned")

        if i != -1:
            closedCap = "Y"

        # Find Widerscreen
        i = string.find(attrs, "widescreen")

        if i != -1:
            wide = "Y"

        i = string.find(attrs, "Widescreen")

        if i != -1:
            wide = "Y"


    # Find Comments
    i = string.find(source, ">Product Description<")

    if i != -1:
        comments = ""
        source = searchForPlus(source, ">Product Description<")
        source = searchForPlus(source, ">")
        i = string.find(source, "<hr ")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "</div>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(source, "<p align=right>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        tempcomment = stripText(tempcomments)
        i = string.find(tempcomment, "<span")

        if (i != -1):
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        i = string.find(tempcomment, "<div ")

        if i != -1:
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        i = string.find(tempcomment, "...<")

        if (i != -1):
            tempcomment = stripText(tempcomment[0:i+3])

        tempcomment = string.replace(tempcomment, "\n", "")
        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#146;", "'")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#224;", "")
        tempcomment = string.replace(tempcomment, "&#234;", "")
        tempcomment = string.replace(tempcomment, "&#241;", "")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&ccedil;", "")
        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "\n ", "\n")
        tempcomment = string.replace(tempcomment, "<ul>", "\n")
        tempcomment = string.replace(tempcomment, "<li>", "\n    ")
        tempcomment = string.replace(tempcomment, "</ul>", "")

        while (searchFor(tempcomment, "\n ") != None):
            i = string.find(tempcomment, "\n ")
            tempcomment = tempcomment[0:i+1] + tempcomment[i+2:]

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        if comments != "":
            comments = comments + "\n\n" +  tempcomment
        else:
            comments = tempcomment


    extractCategory()


def extractCategory():
    global title,author,format,bookclub,first,signed,read,date,publisher,place,isbn
    global value,category,copies,condition,rating,comments,source,image
    global fullDateFormat

    # Find category
    i = string.find(source, " by subject:<")

    if (i != -1):
        source = searchForPlus(source, " by subject:<")
        source = searchForPlus(source, " by subject:<")
    else:
        i = string.find(source, "<b> Browse for")

        if (i != -1):
            source = searchForPlus(source, "<b> Browse for")

    if (i != -1):
        source = searchForPlus(source, "<br>")
        i = string.find(source, "<br>")
        tempCatg = source[0:i]

    if (i != -1):
        category = ""

        while (searchFor(tempCatg, "href=") != None):
            tempCatg = searchForPlus(tempCatg, "href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")
            temp = stripText(tempCatg[0:i])

            if temp != "DVD" and temp != "Categories" and temp != "Gift Sets":
                if (category != ""):
                    category = category + " : "

                category = category + temp

            tempCatg = tempCatg[i:]



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
