# Amazon CA music scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "Audio CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    artistLF            = ""


    i = string.find(source, "The item you indicated is ineligible for Amazon Marketplace selling.")

    if i != -1:
        return


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)



    # Find marketplace pricing
    marketinfo = source
    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchForPlus(listprice, "class=\"listprice\"")
        listprice = searchForPlus(listprice, ">")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])
    else:
        i = string.find(marketinfo, "RRP:")

        if i != -1:
            listprice = searchForPlus(marketinfo, "RRP:")
            listprice = searchForPlus(listprice, "class=\"listprice\"")
            listprice = searchForPlus(listprice, ">")
            i = string.find(listprice, "<")
            listprice = stripText(listprice[0:i])


    # Find New/Used together
    i = string.find(marketinfo, "/offer-listing/")

    if i != -1:
        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "</div>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used &amp; new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used &amp; new")
            usedprice = searchFor(usedprice, ">")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "used & new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used & new")
            usedprice = searchForPlus(usedprice, "class=\"price\"")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all used<")

        if (i != -1):
            usedprice = searchForPlus(usedinfo, ">See all used<")
            usedprice = searchFor(usedprice, ">")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all new<")

        if (i != -1):
            newprice = searchForPlus(usedinfo, ">See all new<")
            newprice = searchFor(newprice, ">")
            newprice = searchForPlus(newprice, ">")
            i = string.find(newprice, "<")
            newprice = stripText(newprice[0:i])

        i = string.find(usedinfo, ">See all collectible<")

        if (i != -1):
            collectibleprice = searchForPlus(usedinfo, ">See all collectible<")
            collectibleprice = searchFor(collectibleprice, ">")
            collectibleprice = searchForPlus(collectibleprice, ">")
            i = string.find(collectibleprice, "<")
            collectibleprice = stripText(collectibleprice[0:i])


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.ca Sales Rank:")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.ca Sales Rank:")
        usedinfo = searchForPlus(usedinfo, "#")
        i = string.find(usedinfo, " ")
        salesrank = stripText(usedinfo[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Find Image (first try)
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        source = searchForPlus(source, "registerImage(\"original_image\"")
        source = searchForPlus(source, "\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "no-image-")

        if i != -1:
            image = ""

        image = string.replace(image, "_SCLZZ", "_SCMZZ")
        image = string.replace(image, "_AA400_", "_AA_")
        #image = string.replace(image, "_AA240_", "_AA_")
        image = string.replace(image, "_AA140_", "_AA_")
        image = string.replace(image, "_AA280_", "_AA_")

        i = string.find(image, ",")

        if i != -1:
            j = string.find(image[i:], "_AA")
            image = stripText(image[0:i] + image[i+j:])


    # Find Title
    source = searchForPlus(source, "<b class=\"sans\">")
    source = searchForPlus(source, "\"btAsinTitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Artist
    tag = "&field-artist"
    i = string.find(source, tag)

    if i == -1:
        tag = "&field-keywords="
        i = string.find(source, tag)

    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        artist = stripText(source[0:i])
        artist = convertArtist(artist)


    # Find user rating
    tag = "/>See all "
    i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        i = string.find(source, "/customer-reviews/ratings/stars")

        if i != -1:
            temp = searchFor(temp, "/customer-reviews/ratings/stars")
            i = string.find(temp, ".")
            temp = stripText(temp[0:i])
            temp = searchForPlus(temp, "stars-")
            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find Image
    if image == "":
        tag = "images.amazon.com"
        i = string.find(source, tag)

        if i == -1 or i > 3000:
            tag = "images-amazon.com/"
            i = string.find(source, tag)

        if i != -1 and i < 3000:
            image = source[i-50:]
            image = searchForPlus(image, " src=\"")
            i = string.find(image, "\"")
            image = stripText(image[0:i])
            image = string.replace(image, ".LZZZ", ".MZZZ")
            image = string.replace(image, ".TZZZ", ".MZZZ")

            i = string.find(image, "_PE")

            if i != -1:
                j = string.find(image[i+1:], "_")
                image = stripText(image[0:i] + image[i+j+1:])

            i = string.find(image, "ZZZ")

            if i == -1:
                image = ""


    # Find Price
    tag = ">Our Price:<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Price:<"
        i = string.find(source, tag)

    if i == -1:
        tag = "List Price:<"
        i = string.find(source, tag)

    if i == -1:
        tag = ">RRP:<"
        i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        temp = searchForPlus(temp, "class=\"price\"")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "<")
        value = stripText(temp[0:i])
    else:
        value = usedprice


    # Find Format
    source = searchForPlus(source, ">Product Details<")
    i = string.find(source, "</ul>")
    details = stripText(source[0:i])

    while (searchFor(details, "<li>") != None):
        details = searchForPlus(details, "<li>")
        i = string.find(details, "</li>")
        temp = stripText(details[0:i])

        if temp.endswith(")") == 1:
            i = string.find(temp, "http:")

            if i != -1:
                continue

            i = string.find(temp, "<b>")

            if i != -1:
                temp = searchForPlus(temp, "<b>")
                i = string.find(temp, "<")
                format = stripText(temp[0:i])


                # Find Date
                i = string.find(temp, "(")
        
                if i <= 100:
                    temp = searchForPlus(temp, "(")
                    i = string.find(temp, ")")
                    date = stripText(temp[0:i])

                    if fullDateFormat == "false":
                        i = string.rfind(date, " ")

                        if i != -1:
                            date = stripText(date[i+1:])

            break


    # Find Composer
    i = string.find(source, ">Composer:<")

    if i != -1:
        composer = searchForPlus(source, ">Composer:<")
        composer = searchForPlus(composer, "\">")
        i = string.find(composer, "<")
        composer = stripText(composer[0:i])

        i = string.rfind(composer, " ")

        if i != -1:
            composer = stripText(composer[i+1:]) + ", " + stripText(composer[0:i])


    # Find Label
    i = string.find(source, ">Label:<")

    if i != -1:
        label = searchForPlus(source, ">Label:<")
        label = searchForPlus(label, ">")
        i = string.find(label, "<")
        label = stripText(label[0:i])


    # Find Date (2nd attempt)
    i = string.find(source, ">Released:")

    if i != -1:
        source = searchForPlus(source, ">Released:")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Tracks
    i = string.find(source, ">Track Listings<")

    if i != -1:
        source = searchForPlus(source, ">Track Listings<")
        i = string.find(source, "</div>")
        disks = stripText(source[0:i])
        i = string.find(disks, " tracks on this disc</a>")

        if i != -1:
            disks = stripText(disks[i-200:])
            disks = searchForPlus(disks, "<a href=\"")
            i = string.find(disks, "\">")
            url = "" + stripText(disks[0:i])
            http = HTTPConnection()
            http.resetReferer();
            http.blockForLoad();
            disks = http.getContents(url)
            disks = searchForPlus(disks, ">Track Listings<")
            i = string.find(disks, "<hr no")
            disks = stripText(disks[0:i])
            t2 = open("trace2.html", "w")
            t2.write(disks)
            t2.close()

        if string.find(disks, ">Disc: ") == -1:
            disks = ">Disc: 1<" + disks

        trackList = []
        artistList = []
        diskCount = 0

        while (searchFor(disks, ">Disc: ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc: ")
            i = string.find(disks, "<")
            diskid = "%02d" % stripText(disks[0:i])

            i = string.find(disks, "</table>")

            if i != -1:
                i = string.find(disks, "</table>")
                tracks = stripText(disks[0:i])
            else:
                tracks = disks

            trackIndex = 1

            while (searchFor(tracks, ". ") != None):
                tracks = searchForPlus(tracks, ". ")

                if (tracks[0:1] == "<"):
                    i = string.find(tracks, "<A HREF=")

                    if i == 0:
                      i = string.find(tracks, ">")
                      tracks = stripText(tracks[i+1:])

                i = string.find(tracks, "<")

                if i != -1:
                    trackid = "%02d" % trackIndex
                    track = stripText(tracks[0:i])
                    i = string.find(track, ">")

                    if (i != -1):
                        track = stripText(track[i+1:])

                    i = string.find(track, " - ")
                    trackArtist = ""

                    if (i != -1):
                        trackArtist = stripText(track[i+3:])
                        track = stripText(track[0:i])

                    trackList.append(diskid + "-" + trackid + "-" + track)
                    trackIndex = trackIndex + 1
                    i = string.find(tracks, "<")

                    if trackArtist != "":
                        trackArtist = convertArtist(trackArtist)
                        artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                    else:
                        artistList.append(diskid + "-" + trackid + "-")

                tracks = tracks[i+1:]

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)


    i = string.find(source, ">Listen to Samples<")

    if i != -1:
        source = searchForPlus(source, ">Listen to Samples<")
        i = string.find(source, "</div>")
        disks = stripText(source[0:i])
        i = string.find(disks, " tracks on this disc</a>")

        if i != -1:
            disks = stripText(disks[i-200:])
            disks = searchForPlus(disks, "<a href=\"")
            i = string.find(disks, "\">")
            url = "" + stripText(disks[0:i])
            http = HTTPConnection()
            http.resetReferer();
            http.blockForLoad();
            disks = http.getContents(url)
            disks = searchForPlus(disks, ">Listen to Samples<")
            i = string.find(disks, "<hr no")
            disks = stripText(disks[0:i])
            t2 = open("trace2.html", "w")
            t2.write(disks)
            t2.close()

        if string.find(disks, ">Disc: ") == -1:
            disks = ">Disc: 1<" + disks

        trackList = []
        artistList = []
        diskCount = 0

        while (searchFor(disks, ">Disc: ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc: ")
            i = string.find(disks, "<")
            diskid = "%02d" % stripText(disks[0:i])

            i = string.find(disks, "</table>")
            tracks = stripText(disks[0:i])
            trackIndex = 1

            while (searchFor(tracks, ". ") != None):
                tracks = searchForPlus(tracks, ". ")

                if (tracks[0:1] == "<"):
                    i = string.find(tracks, "<A HREF=")

                    if (i == 0):
                      i = string.find(tracks, ">")
                      tracks = stripText(tracks[i+1:])

                i = string.find(tracks, "<")

                if (i != -1):
                    trackid = "%02d" % trackIndex
                    track = stripText(tracks[0:i])
                    i = string.find(track, ">")

                    if (i != -1):
                        track = stripText(track[i+1:])

                    i = string.find(track, " - ")
                    trackArtist = ""

                    if (i != -1):
                        trackArtist = stripText(track[i+3:])
                        track = stripText(track[0:i])

                    trackList.append(diskid + "-" + trackid + "-" + track)
                    trackIndex = trackIndex + 1
                    i = string.find(tracks, "<")

                    if trackArtist != "":
                        trackArtist = convertArtist(trackArtist)
                        artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                    else:
                        artistList.append(diskid + "-" + trackid + "-")

                tracks = tracks[i+1:]

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)


    # Find Comments
    i = string.find(source, ">Product Description<")

    if i != -1:
        comments = ""
        source = searchForPlus(source, ">Product Description<")
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if i != -1:
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if i != -1:
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<span class=\"h1\">")

        if i != -1:
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<span")

        if i != -1:
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        i = string.find(tempcomments, "... <")

        if (i != -1):
            tempcomments = stripText(tempcomments[0:i+3])

        tempcomments = string.replace(tempcomments, "\n", "")

        while (searchFor(tempcomments, "  ") != None):
            i = string.find(tempcomments, "  ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<p> ") != None):
            i = string.find(tempcomments, "<p> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<P> ") != None):
            i = string.find(tempcomments, "<P> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<br> ") != None):
            i = string.find(tempcomments, "<br> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        while (searchFor(tempcomments, "<BR> ") != None):
            i = string.find(tempcomments, "<BR> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        tempcomments = string.replace(tempcomments, "</a>", "")
        tempcomments = string.replace(tempcomments, "</A>", "")
        tempcomments = string.replace(tempcomments, "<p>", "\n\n")
        tempcomments = string.replace(tempcomments, "<P>", "\n\n")
        tempcomments = string.replace(tempcomments, "</p>", "")
        tempcomments = string.replace(tempcomments, "<BR>", "\n")
        tempcomments = string.replace(tempcomments, "<br>", "\n")
        tempcomments = string.replace(tempcomments, "<br />", "\n")
        tempcomments = string.replace(tempcomments, "<i>", "")
        tempcomments = string.replace(tempcomments, "</i>", "")
        tempcomments = string.replace(tempcomments, "<I>", "")
        tempcomments = string.replace(tempcomments, "</I>", "")
        tempcomments = string.replace(tempcomments, "<b>", "")
        tempcomments = string.replace(tempcomments, "</b>", "")
        tempcomments = string.replace(tempcomments, "</font>", "")
        tempcomments = string.replace(tempcomments, "</span>", "")
        tempcomments = string.replace(tempcomments, "&copy;", "")
        tempcomments = string.replace(tempcomments, "&#145;", "'")
        tempcomments = string.replace(tempcomments, "&#169;", "")
        tempcomments = string.replace(tempcomments, "&#8217;", "'")
        tempcomments = string.replace(tempcomments, "&quot;", "\"")
        tempcomments = string.replace(tempcomments, "&#8211;", "-")
        tempcomments = string.replace(tempcomments, "&#146;", "\'")
        tempcomments = string.replace(tempcomments, "<blockquote>", "")
        tempcomments = string.replace(tempcomments, "</blockquote>", "")

        while (searchFor(tempcomments, "\n ") != None):
            i = string.find(tempcomments, "\n ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<a ") != None):
            i = string.find(tempcomments, "<a ")
            j = string.find(tempcomments[i:], ">")

            if j == -1:
                j = 2

            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<A ") != None):
            i = string.find(tempcomments, "<A ")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<img src") != None):
            i = string.find(tempcomments, "<img src")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<span") != None):
            i = string.find(tempcomments, "<span")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        comments = tempcomments


    # Find category
    i = string.find(source, ">Look for similar items by subject<")

    if i != -1:
        source = searchForPlus(source, ">Look for similar items by subject<")
        source = searchForPlus(source, "type=\"checkbox\"")
        source = searchForPlus(source, "value=\"")
        i = string.find(source, "\"")
        category = stripText(source[0:i])





try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
