# Amazon music scrpaer
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    artistLF            = ""


    # Find marketplace pricing
    marketinfo = source


    # Find Title
    source = searchForPlus(source, ">Album Details:<")
    source = searchForPlus(source, "\">")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Image
    i = string.find(source, "<!-- Coverart")

    if i != -1:
        source = searchForPlus(source, "<!-- Coverart")
        source = searchForPlus(source, " SRC=\"")
        i = string.find(source, "\"")
        image = "http://www.arkivmusic.com" + stripText(source[0:i])

        i = string.find(image, "music-no-image")

        if i != -1:
            image = ""


    # Find Date
    i = string.find(source, ">Release Date:<")

    if i != -1:
        date = searchForPlus(source, ">Release Date:<")
        date = searchForPlus(date, ">")
        i = string.find(date, "<")
        date = stripText(date[0:i])
        date = stripText(string.replace(date, "&nbsp;", " "))

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Label
    i = string.find(source, ">Label:<")

    if i != -1:
        label = searchForPlus(source, ">Label:<")
        label = searchForPlus(source, "CLASS=\"noline\"")
        label = searchForPlus(label, ">")
        i = string.find(label, "<")
        label = stripText(label[0:i])
        label = stripText(string.replace(label, "&nbsp;", " "))


    # Find Composer
    i = string.find(source, ">Composer:")

    if i != -1:
        composer = searchForPlus(source, ">Composer:")
        composer = searchForPlus(composer, "CLASS=\"noline\"")
        composer = searchForPlus(composer, ">")
        i = string.find(composer, "<")
        composer = stripText(composer[0:i])
        composer = stripText(string.replace(composer, "&nbsp;", " "))

        i = string.rfind(composer, " ")

        if i != -1:
            composer = stripText(composer[i+1:]) + ", " + stripText(composer[0:i])


    # Find Conductor
    i = string.find(source, ">Conductor:")

    if i != -1:
        conductor = searchForPlus(source, ">Conductor:")
        conductor = searchForPlus(conductor, "CLASS=\"noline\"")
        conductor = searchForPlus(conductor, ">")
        i = string.find(conductor, "<")
        conductor = stripText(conductor[0:i])
        conductor = stripText(string.replace(conductor, "&nbsp;", " "))


    # Find Artist
    artist = ""
    i = string.find(source, ">Orchestra/Ensemble:")

    if i != -1:
        artist = searchForPlus(source, ">Orchestra/Ensemble:")
        artist = searchForPlus(artist, "CLASS=\"noline\"")
        artist = searchForPlus(artist, ">")
        i = string.find(artist, "<")
        artist = stripText(artist[0:i])
        artist = stripText(string.replace(artist, "&nbsp;", " "))
    else:
        i = string.find(source, ">Performer:")

        if i != -1:
            artist = searchForPlus(source, ">Performer:")
            artist = searchForPlus(artist, "CLASS=\"noline\"")
            artist = searchForPlus(artist, ">")
            i = string.find(artist, "<")
            artist = stripText(artist[0:i])
            artist = stripText(string.replace(artist, "&nbsp;", " "))


    # Find Sound
    i = string.find(source, ">Recorded in:")

    if i != -1:
        sound = searchForPlus(source, ">Recorded in:")
        sound = searchForPlus(sound, ">")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])
        sound = stripText(string.replace(sound, "&nbsp;", " "))
        i = string.find(source, ">Spars Code:")

        if i != -1:
            temp = searchForPlus(source, ">Spars Code:")
            temp = searchForPlus(temp, ">")
            i = string.find(temp, "<")
            temp = stripText(temp[0:i])
            temp = stripText(string.replace(temp, "&nbsp;", " "))
            if temp != "n/a":
                sound = sound + " (" + temp + ")"


    # Find Running Time
    i = string.find(source, ">Length:")

    if i != -1:
        running = searchForPlus(source, ">Length:")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])
        running = stripText(string.replace(running, "&nbsp;", " "))


    # Find Price
    i = string.find(source, "<!-- forsale:")

    if i != -1:
        value = searchForPlus(source, "<!-- forsale:")
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripText(value[0:i])


    # Find Format
    i = string.find(source, "<!-- forsale:")

    if i != -1:
        source = searchForPlus(source, "<!-- forsale:")
        source = searchForPlus(source, "CLASS=\"format\">")
        i = string.find(source, "<")
        format = stripText(source[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, "<!-- ALBUM NOTE-->")

    if i != -1:
        temp = searchForPlus(source, "<!-- ALBUM NOTE-->")
        i = string.find(temp, "<!-- ALBUM DETAIL SECTION ENDS HERE -->")
        temp = stripText(temp[0:i])

        i = string.find(temp, "CLASS=\"listdata\">")

    if i != -1:
        comment = searchForPlus(temp, "CLASS=\"listdata\">")
        i = string.find(comment, "</SPAN>")
        comments = comment[0:i]

        while (searchFor(comments, "<a ") != None):
            i = string.find(comments, "<a ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<IMG ") != None):
            i = string.find(comments, "<IMG ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<HEAD>") != None):
            i = string.find(comments, "<HEAD>")
            j = string.find(comments[i:], "</HEAD>")
            comments = comments[0:i] + comments[i+j+7:]

        while (searchFor(comments, "<body ") != None):
            i = string.find(comments, "<body ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<table") != None):
            i = string.find(comments, "<table")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<td") != None):
            i = string.find(comments, "<td")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<th") != None):
            i = string.find(comments, "<th")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<font") != None):
            i = string.find(comments, "<font")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        comments = string.replace(comments, "<BR>", "\n")
        comments = string.replace(comments, "<br>", "\n")
        comments = string.replace(comments, "<p><p>", "\n\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "</p>", "\n")
        comments = string.replace(comments, "<center>", "")
        comments = string.replace(comments, "</center>", "")
        comments = string.replace(comments, "<HTML>", "")
        comments = string.replace(comments, "</TD>", "")
        comments = string.replace(comments, "</td>", "")
        comments = string.replace(comments, "</TH>", "")
        comments = string.replace(comments, "</th>", "")
        comments = string.replace(comments, "<tr>", "")
        comments = string.replace(comments, "</tr>", "\n")
        comments = string.replace(comments, "<TR>", "")
        comments = string.replace(comments, "</TR>", "\n")
        comments = string.replace(comments, "</TABLE>", "\n")
        comments = string.replace(comments, "</table>", "\n")
        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "</font>", "")
        comments = string.replace(comments, "</FONT>", "")


    # Find Tracks
    gotTracks = 0

    i = string.find(source, "<!-- TRACK DETAILS SECTION BEGINS HERE -->")

    if gotTracks == 0 and i != -1:
        gotTracks = 1
        source = searchForPlus(source, "<!-- TRACK DETAILS SECTION BEGINS HERE -->")
        i = string.find(source, "<!-- TRACK DETAILS SECTION ENDS HERE -->")
        disks = stripText(source[0:i])
            

        if string.find(disks, ">Disc: ") == -1:
            disks = ">Disc: 1<" + disks

        trackList = []
        artistList = []
        composerList = []
        diskCount = 0

        while (searchFor(disks, ">Disc: ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc: ")
            i = string.find(disks, "<")
            diskid = "%02d" % stripText(disks[0:i])

            tracks = searchForPlus(disks, ">Individual Track Details:<")
            trackIndex = 1

            while (searchFor(tracks, ".</FONT") != None):
                tracks = searchForPlus(tracks, ".</FONT")
                trackid = "%02d" % trackIndex
                track = searchForPlus(tracks, "CLASS=\"piecename\">")
                i = string.find(track, "<")
                track = stripText(track[0:i])

                i = string.find(tracks, "> by <")

                if i != -1:
                    trackComposer = searchForPlus(tracks, "> by <")
                    trackComposer = searchForPlus(trackComposer, ">")
                    i = string.find(trackComposer, "<")
                    trackComposer = stripText(trackComposer[0:i])

                    i = string.rfind(trackComposer, " ")

                    if i != -1:
                        trackComposer = stripText(trackComposer[i+1:]) + ", " + stripText(trackComposer[0:i])
                else:
                    trackComposer = ""

                i = string.find(tracks, ">Performer:<")

                if i != -1:
                    trackArtist = searchForPlus(tracks, ">Performer:<")
                    trackArtist = searchForPlus(trackArtist, ">")
                    i = string.find(trackArtist, "<FONT ")
                    trackArtist = stripText(trackArtist[0:i])
                    trackArtist = string.replace(trackArtist, "<BR>", " ")
                    trackArtist = string.replace(trackArtist, "\n", " ")
                    trackArtist = string.replace(trackArtist, "\t", " ")
                    trackArtist = string.replace(trackArtist, "&nbsp;", " ")
                    trackArtist = string.replace(trackArtist, "  ", " ")
                    trackArtist = string.replace(trackArtist, "  ", " ")
                    trackArtist = string.replace(trackArtist, "  ", " ")
                    trackArtist = string.replace(trackArtist, "  ", " ")
                    trackArtist = stripText(trackArtist)
                else:
                    i = string.find(tracks, ">Orchestra/Ensemble:<")

                    if i != -1:
                        trackArtist = searchForPlus(tracks, ">Orchestra/Ensemble:<")
                        trackArtist = searchForPlus(trackArtist, ">")
                        i = string.find(trackArtist, "<")
                        trackArtist = stripText(trackArtist[0:i])
                        trackArtist = string.replace(trackArtist, "<BR>", " ")
                        trackArtist = string.replace(trackArtist, "\n", " ")
                        trackArtist = string.replace(trackArtist, "\t", " ")
                        trackArtist = string.replace(trackArtist, "&nbsp;", " ")
                        trackArtist = string.replace(trackArtist, "  ", " ")
                        trackArtist = string.replace(trackArtist, "  ", " ")
                        trackArtist = string.replace(trackArtist, "  ", " ")
                        trackArtist = string.replace(trackArtist, "  ", " ")
                        trackArtist = stripText(trackArtist)
                    else:
                        trackArtist = ""

                trackList.append(diskid + "-" + trackid + "-" + track)
                trackIndex = trackIndex + 1

                if trackArtist != "":
                    trackArtist = convertArtist(trackArtist)
                    artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                else:
                    artistList.append(diskid + "-" + trackid + "-")

                if trackComposer != "":
                    composerList.append(diskid + "-" + trackid + "-" + trackComposer)
                else:
                    composerList.append(diskid + "-" + trackid + "-")

                i = string.find(tracks, "CLASS=\"text\"")

                if i != -1:
                    tracks = searchForPlus(tracks, "CLASS=\"text\"")

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)
        composerList.sort()
        trackWriters = array(composerList, String)





try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
