# freedb scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Raw format?
    i = string.find(source, "# xmcd")

    if i != -1:
        extractRaw()
        return


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    artistLF            = ""

    switchArtist        = "N"
    buyerwaiting        = "N"
    available           = "Y"


    # Find Artist / Title
    source = searchForPlus(source, "<h2>")
    i = string.find(source, "<")
    artist = stripHTML(source[0:i])

    i = string.rfind(artist, "/")

    if i != -1:
        title = artist[i+1:]
        artist = artist[0:i]
    else:
        title = artist
        artist = "Various"

    artist = convertArtist(artist)

    if switchArtist == "Y":
        i = string.rfind(artist, " ")

        if (i != -1):
            artist = stripText(artist[i+1:]) + ", " + stripText(artist[0:i])


    # Find running time
    i = string.find(source, "total time:")

    if i != -1:
        source = searchForPlus(source, "total time:")
        i = string.find(source, "<")
        running = stripText(source[0:i])


    # Release date
    i = string.find(source, "year:")

    if i != -1:
        source = searchForPlus(source, "year:")
        i = string.find(source, "<")
        date = stripText(source[0:i])


    # Category
    i = string.find(source, "genre:")

    if i != -1:
        source = searchForPlus(source, "genre:")
        i = string.find(source, "<")
        category = stripText(source[0:i])


    # Comments
    i = string.find(source, "<pre>")

    if i != -1:
        source = searchForPlus(source, "<pre>")
        i = string.find(source, "<")
        comments = stripText(source[0:i])


    # Find Tracks
    # No multi disk sets, freedb lists CDs separately
    i = string.find(source, "<table")

    if (i != -1):
        source = searchFor(source, "<table")
        i = string.find(source, "</table>")
        tracks = stripHTML(source[0:i])
        trackList = []
        artistList = []
        timeList = []
        diskCount = 1
        trackCount = 0

        while (string.find(tracks, ".<") != -1):
            i = string.find(tracks, ".<")
            tracks = tracks[i-5:]
            tracks = searchForPlus(tracks, ">")
            i = string.find(tracks, ".<")
            trackNum = stripText(tracks[0:i])

            tracks = searchForPlus(tracks, "=top>")
            i = string.find(tracks, "<")
            trackTime = stripText(tracks[0:i])

            tracks = searchForPlus(tracks, "<td>")
            i = string.find(tracks, "<")
            track = stripText(tracks[0:i])

            i = string.find(tracks, "=top>")

            if i != -1:
                tracks = searchFor(tracks, "=top>")

            i = string.find(track, " / ")

            trackArtist = ""

            if (i != -1):
                trackArtist = stripText(track[0:i])
                track = stripText(track[i+3:])

            diskid = "%02d" % diskCount
            trackNum = "%02d" % trackNum

            trackList.append(diskid + "-" + trackNum + "-" + track)
            timeList.append(diskid + "-" + trackNum + "-" + trackTime)

            if trackArtist != "":
                trackArtist = convertArtist(trackArtist)
                artistList.append(diskid + "-" + trackNum + "-" + trackArtist)
            else:
                artistList.append(diskid + "-" + trackNum + "-")

        trackList.sort()
        trackTitles = array(trackList, String)

        artistList.sort()
        trackArtists = array(artistList, String)

        timeList.sort()
        trackTimes = array(timeList, String)




def extractRaw():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    print "FreeDB raw format"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    artistLF            = ""

    switchArtist        = "N"
    buyerwaiting        = "N"
    available           = "Y"


    # Find track times
    timeList = []
    i = string.find(source, "Track frame offsets:")

    if i != -1:
        running = searchForPlus(source, "Track frame offsets:")
        i = string.find(running, "Disc length:")
        running = stripText(running[0:i])

        running = searchForPlus(running, "#")
        i = string.find(running, "#")
        startFrame = stripText(running[0:i])

        while (string.find(running, "#") != -1):
            running = searchForPlus(running, "#")
            i = string.find(running, "#")
            trackTime = stripText(running[0:i])

            if trackTime != "":
                timeList.append(trackTime)


    # Find running time
    i = string.find(source, "Disc length:")

    if i != -1:
        running = searchForPlus(source, "Disc length:")
        i = string.find(running, "\n")
        running = stripText(running[0:i])
        i = string.find(running, " ")

        if i != -1:
            running = stripText(running[0:i])

        try:
            secs = string.atoi(running)
            frame = "%d" % ((secs * 75) + 75)
            timeList.append(frame)
            mins = secs / 60
            secs = secs % 60
            mins = "%02d" % mins
            secs = "%02d" % secs
            running = mins + ":" + secs
        except:
            running = running + " seconds"

    # Find Artist / Title
    source = searchForPlus(source, "DTITLE=")
    i = string.find(source, "\n")
    artist = stripHTML(source[0:i])

    i = string.rfind(artist, "/")

    if i != -1:
        title = artist[i+1:]
        artist = artist[0:i]
    else:
        title = artist
        artist = "Various"

    artist = convertArtist(artist)

    if switchArtist == "Y":
        i = string.rfind(artist, " ")

        if (i != -1):
            artist = stripText(artist[i+1:]) + ", " + stripText(artist[0:i])


    # Find copyright date
    i = string.find(source, "DYEAR=")

    if i != -1:
        copyDate = searchForPlus(source, "DYEAR=")
        i = string.find(copyDate, "\n")
        copyDate = stripText(copyDate[0:i])


    # Find Category
    i = string.find(source, "DGENRE=")

    if i != -1:
        category = searchForPlus(source, "DGENRE=")
        i = string.find(category, "\n")
        category = stripText(category[0:i])


    # Find Tracks
    # No multi disk sets, freedb lists CDs separately
    i = string.find(source, "TTITLE0=")

    if (i != -1):
        source = searchFor(source, "TTITLE0=")
        i = string.find(source, "EXTD=")
        tracks = stripHTML(source[0:i]) + "\n"
        trackList = []
        artistList = []
        diskCount = 1
        trackCount = 1

        while (string.find(tracks, "=") != -1):
            tracks = searchFor(tracks, "TTITLE")
            i = string.find(tracks, "=")
            key = stripText(tracks[0:i])
            tracks = searchForPlus(tracks, "=")
            i = string.find(tracks, "\n")
            track = stripText(tracks[0:i])

            # Tracks can span multiple lines, you could see multiple 
            # TTITLE2= lines for example. Get all track title lines
            while (string.find(tracks, "=") != -1):
                nexttrack = searchFor(tracks, "TTITLE")
                i = string.find(nexttrack, "=")
                nextkey = stripText(nexttrack[0:i])

                if key != nextkey:
                    break

                tracks = searchForPlus(tracks, "=")
                i = string.find(tracks, "\n")
                track = track + stripText(tracks[0:i])

            trackArtist = ""

            diskid = "%02d" % diskCount
            trackid = "%02d" % trackCount

            trackList.append(diskid + "-" + trackid + "-" + track)
            artistList.append(diskid + "-" + trackid + "-" + trackArtist)
            trackTime = timeList[trackCount-1]

            try:
                fromF = string.atoi(startFrame)
                startFrame = trackTime
                toF = string.atoi(trackTime)
                secs = (toF - fromF) / 75
                mins = secs / 60
                secs = secs % 60
                mins = "%d" % mins
                secs = "%02d" % secs
                trackTime = mins + ":" + secs
            except:
                trackTime = ""

            trackTime = diskid + "-" + trackid + "-" + trackTime
            timeList[trackCount-1] = trackTime

            trackCount = trackCount + 1

        trackList.sort()
        trackTitles = array(trackList, String)

        artistList.sort()
        trackArtists = array(artistList, String)

        timeList.sort()
        trackTimes = array(timeList, String)



try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
