# CD Universe music scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    artistLF            = ""
    conductor           = ""


    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">List Price<")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">List Price<")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find Ranking
    i = string.find(marketinfo, ">All Time Sales Rank<")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">All Time Sales Rank<")
        usedinfo = searchForPlus(usedinfo, ">&nbsp;&nbsp;")
        i = string.find(usedinfo, "&")
        salesrank = stripText(usedinfo[0:i])



    # Main Extraction
    soyrce = searchForPlus(source, "<table cellPadding=\"4\"")


    # Find Image
    tag = "/CDUCoverArt/"
    i = string.find(source, tag)

    if i == -1 or i > 6000:
        tag = "/CDUCoverart/"
        i = string.find(source, tag)

    if i == -1 or i > 6000:
        tag = "/MuzeAudioArt/"
        i = string.find(source, tag)

    if i != -1 and i < 6000:
        image = stripText(source[i-100:])
        image = searchForPlus(image, "src=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])

        i = string.find(image, "no-image-avail")

        if i != -1:
            image = ""


    # Find Title
    i = string.find(source, "size=3><strong>")

    if i != -1:
        source = searchForPlus(source, "size=3><strong>")
        i = string.find(source, "<")
        title = stripText(source[0:i])

        if title.endswith(" CD") == 1:
            title = stripText(title[0: len(title)-3])
            format = "CD"


    # Find Artist
    i = string.find(source, "size=\"2\"><b>")

    if i != -1:
        source = searchForPlus(source, "size=\"2\"><b>")

        if source.startswith("<a href") == 1:
            i = string.find(source, "&HT_Search_Info=")

            if i != -1:
                artistLF = searchForPlus(source, "&HT_Search_Info=")
                i = string.find(artistLF, "\"")
                artistLF = stripText(artistLF[0:i])
                i = string.find(artistLF, "&cart=")

                if i != -1:
                    artistLF = stripText(artistLF[0:i])

                artistLF = string.replace(artistLF, "%2C", ",")
                artistLF = string.replace(artistLF, "%26", "&")
                artistLF = string.replace(artistLF, "+", " ")

            source = searchForPlus(source, "\">")
            i = string.find(source, "<")
            artist = stripText(source[0:i])
            artist = convertArtist(artist)
        else:
            i = string.find(source, "<")
            artist = stripText(source[0:i])


    # Find Price
    i = string.find(source, ">Sale Price:")

    if i == -1:
        i = string.find(source, ">Regular Price:")

    if i == -1:
        i = string.find(source, ">Our Price:")

    if i != -1:
        source = source[i:]
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find keywords
    keywords = ""
    i = string.find(source, ">Category<")

    if i != -1:
        temp = searchForPlus(source, ">Category<")
        i = string.find(temp, "</tr>")
        temp = stripText(temp[0:i])

        while (searchFor(temp, "<a href") != None):
            temp = searchForPlus(temp, "<a href")
            temp = searchForPlus(temp, "\">")
            i = string.find(temp, "<")

            if keywords == "":
                keywords = stripText(temp[0:i])
                category = keywords
            else:
                keywords = keywords + ", " + stripText(temp[0:i])


    # Find Studio
    i = string.find(source, ">Label<")

    if i != -1:
        label = searchForPlus(source, ">Label<")
        label = searchForPlus(label, "\">")
        i = string.find(label, "<")
        label = stripText(label[0:i])


    # Find Copyright date
    i = string.find(source, ">Orig Year<")

    if i != -1:
        copyDate = searchForPlus(source, ">Orig Year<")
        copyDate = searchForPlus(copyDate, "<td>")
        i = string.find(copyDate, "<")
        copyDate = stripText(copyDate[0:i])


    # Find Release date
    i = string.find(source, ">Street Date<")

    if i != -1:
        date = searchForPlus(source, ">Street Date<")
        date = searchForPlus(date, "<td>")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Sound
    i = string.find(source, ">Mono/Stereo<")

    if i != -1:
        sound = searchForPlus(source, ">Mono/Stereo<")
        sound = searchForPlus(sound, "<td>")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])


    # Find Running time
    i = string.find(source, ">Recording Time<")

    if i != -1:
        running = searchForPlus(source, ">Recording Time<")
        running = searchForPlus(running, "<td>")
        i = string.find(running, "</td>")
        running = stripText(running[0:i])
        running = string.replace(running, "<nobr>", "")
        running = string.replace(running, "</nobr>", "")


    # Find Tracks
    i = string.find(source, "> Track Listing")

    if i != -1:
        source = searchForPlus(source, "> Track Listing")
        source = searchForPlus(source, "<tr><td colspan=\"4\"")
        i = string.find(source, "</table>")
        disks = stripText(source[0:i])

        if string.find(disks, ">DISC 1:<") == -1:
            disks = ">DISC 1:<" + disks

        trackList = []
        artistList = []
        diskCount = 0

        while (searchFor(disks, ">DISC ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">DISC ")
            i = string.find(disks, ":")
            diskid = "%02d" % stripText(disks[0:i])

            i = string.find(disks, ">DISC")
            tracks = stripText(disks[0:i])
            trackIndex = 1

            while (searchFor(tracks, "<td><b>") != None):
                tracks = searchForPlus(tracks, "<td><b>")
                tracks = searchForPlus(tracks, "<td>")
                i = string.find(tracks, "<")

                trackid = "%02d" % trackIndex
                track = stripText(tracks[0:i])

                i = string.find(track, " - ")
                trackArtist = ""

                if i != -1:
                    trackArtist = stripText(track[i+3:])

                    if trackArtist.startswith("(") == 1:
                        trackArtist = ""
                    else:
                        track = stripText(track[0:i])

                trackList.append(diskid + "-" + trackid + "-" + track)
                trackIndex = trackIndex + 1

                if trackArtist != "":
                    trackArtist = convertArtist(trackArtist)
                    artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                else:
                    artistList.append(diskid + "-" + trackid + "-")

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)


    # Find Classical Works
    i = string.find(source, ">Detailed Work Information<")

    if i != -1:
        source = searchForPlus(source, ">Detailed Work Information<")
        source = searchForPlus(source, "<table")
        i = string.find(source, "</table>")
        works = stripText(source[0:i])

        trackList = []
        artistList = []
        writerList = []
        timeList = []
        diskid = "%02d" % "1"
        trackIndex = 1

        while (searchFor(works, ".</b>") != None):
            works = searchForPlus(works, ".</b>")
            i = string.find(works, "<td COLSPAN=\"2\">")
            work = stripText(works[0:i])
            trackid = "%02d" % trackIndex
            work = searchForPlus(work, "<span class=h3>")
            i = string.find(work, "<")
            track = stripText(work[0:i])

            trackTime = ""
            i = string.find(work, "&nbsp;(")

            if i != -1:
                trackTime = searchForPlus(work, "&nbsp;(")
                i = string.find(trackTime, ")")
                trackTime = stripText(trackTime[0:i])

            trackWriter = ""
            i = string.find(work, ">Composer")

            if i != -1:
                trackWriter = searchForPlus(work, ">Composer")
                trackWriter = searchForPlus(trackWriter, "<a href")
                trackWriter = searchForPlus(trackWriter, "\">")
                i = string.find(trackWriter, "<")
                trackWriter = stripText(trackWriter[0:i])

            trackArtist = ""
            i = string.find(work, ">Performer")

            if i != -1:
                trackArtist = searchForPlus(work, ">Performer")
                trackArtist = searchForPlus(trackArtist, "<a href")
                trackArtist = searchForPlus(trackArtist, "\">")
                i = string.find(trackArtist, "<")
                trackArtist = stripText(trackArtist[0:i])

            trackConductor = ""
            i = string.find(work, ">Conductor")

            if i != -1:
                trackConductor = searchForPlus(work, ">Conductor")
                trackConductor = searchForPlus(trackConductor, "<a href")
                trackConductor = searchForPlus(trackConductor, "\">")
                i = string.find(trackConductor, "<")
                trackConductor = stripText(trackConductor[0:i])

            trackList.append(diskid + "-" + trackid + "-" + track)
            artistList.append(diskid + "-" + trackid + "-" + trackArtist)
            writerList.append(diskid + "-" + trackid + "-" + trackWriter)
            timeList.append(diskid + "-" + trackid + "-" + trackTime)

            if trackConductor != "":
                i = string.find(conductor, trackConductor)

                if i == -1:
                    if conductor == "":
                        conductor = trackConductor
                    else:
                        conductor = conductor + " / " + trackConductor

            trackIndex = trackIndex + 1

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)
        writerList.sort()
        trackWriters = array(writerList, String)
        timeList.sort()
        trackTimes = array(timeList, String)


    # Find Comments
    tempcomment= ""
    i = string.find(source, "> Notes")

    if i != -1:
        source = searchForPlus(source, "> Notes")
        source = searchForPlus(source, "<div ")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div")
        tempcomment = source[0:i]

    i = string.find(source, "> Review")

    if i != -1:
        source = searchForPlus(source, "> Review")
        source = searchForPlus(source, "<div ")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div")

        if tempcomment == "":
            tempcomment = stripText(source[0:i])
        else:
            tempcomment = tempcomment + "<P>" + stripText(source[0:i])

    if tempcomment != "":            
        i = string.find(tempcomment, "> Customer Reviews")

        if i != -1:
            tempcomment = tempcomment[0:i+1]

        i = string.find(tempcomment, "<br><br><font size=1 color=#666666>")

        if i != -1:
            tempcomment = tempcomment[0:i]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span class=") != None):
            i = string.find(tempcomment, "<span class=")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<hr ") != None):
            i = string.find(tempcomment, "<hr ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "\n") != None):
            i = string.find(tempcomment, "\n")
            tempcomment = tempcomment[0:i] + tempcomment[i+1:]

        while (searchFor(tempcomment, "\r") != None):
            i = string.find(tempcomment, "\r")
            tempcomment = tempcomment[0:i] + tempcomment[i+1:]

        while (searchFor(tempcomment, "\t") != None):
            i = string.find(tempcomment, "\t")
            tempcomment = tempcomment[0:i] + " " +  tempcomment[i+1:]

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "</p>", "")
        tempcomment = string.replace(tempcomment, "</P>", "")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&ccedil;", "")
        tempcomment = string.replace(tempcomment, "<strong>", "")
        tempcomment = string.replace(tempcomment, "</strong>", "")

        while (searchFor(tempcomment, "\n ") != None):
            i = string.find(tempcomment, "\n ")
            tempcomment = tempcomment[0:i+1] + tempcomment[i+2:]

        comments = comments + tempcomment


    # Find user rating
    i = string.find(source, ">Average Rating:<")

    if i != -1:
        temp = searchForPlus(source, ">Average Rating:<")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "out")
        rating = stripText(temp[0:i])





try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
