# fishpond.com.au music scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    artistLF            = ""


    i = string.find(source, "> - did not match any products.<")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return

    i = string.find(source, "Products meeting the search criteria")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return


    # Find marketplace pricing
    marketinfo = source


    # Main extraction
    source = searchForPlus(source, "<!-- body_text //-->")


    # Find Artist
    i = string.find(source, "Music_result.php?author=")

    if (i != -1):
        source = searchForPlus(source, "Music_result.php?author=")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        artist = stripText(source[0:i])
        artist = convertArtist(artist)


    # Find Title
    source = searchForPlus(source, "class=\"pageHeading\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    i = string.rfind(title, "[")

    if i != -1:
        title = stripText(title[0:i])


    # Find Price
    i = string.find(source, "Our Price:")

    if i != -1:
        source = source[i:]
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find Attributes
    i = string.find(source, ">Format:")

    if i != -1:
        attrs = searchForPlus(source, ">Format:")
        attrs = searchForPlus(attrs, "<td ")
        attrs = searchForPlus(attrs, "<br>")
        i = string.find(attrs, "</td>")
        attrs = stripText(attrs[0:i])

        # Find Release date
        i = string.find(attrs, "<")
        date = stripText(attrs[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        # Find Label
        attrs = searchForPlus(attrs, "<br>")
        i = string.find(attrs, "<")
        label = stripText(attrs[0:i])

        # Find Format
        attrs = searchForPlus(attrs, "<br>")
        attrs = searchForPlus(attrs, "<br>")
        i = string.find(attrs, "<")
        format = stripText(attrs[0:i])


    # Find Image
    i = string.find(source, "<a href=\"http://images.")

    if i != -1:
        source = searchFor(source, "<a href=\"http://images.")
        source = searchForPlus(source, "\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.rfind(image, ".")

        if i != -1:
            image = stripText(image[0:i]) + "-250x250" + stripText(image[i:])

        i = string.find(image, "dvd-no-image")

        if i != -1:
            image = ""
    else:
        i = string.find(source, "<img src=\"http://images.")

        if i != -1:
            source = searchFor(source, "<img src=\"http://images.")
            source = searchForPlus(source, "\"")
            i = string.find(source, "\"")
            image = stripText(source[0:i])

            i = string.find(image, "dvd-no-image")

            if i != -1:
                image = ""
        else:
            i = string.find(source, "<img src=\"http://image.")

        if i != -1:
            source = searchFor(source, "<img src=\"http://image.")
            source = searchForPlus(source, "\"")
            i = string.find(source, "\"")
            image = stripText(source[0:i])

            i = string.find(image, "no-image")

            if i != -1:
                image = ""


    # Find Tracks
    tag = ">listen to"
    i = string.find(source, tag)

    if i == -1:
        tag = "class=\"productListing-data\""
        i = string.find(source, tag)

    if i != -1:
        source = searchForPlus(source, tag)
        i = string.find(source, "</table>")
        disks = stripText(source[0:i])

        if string.find(disks, ">Disc 1<") == -1:
            disks = ">Disc 1<" + disks

        trackList = []
        artistList = []
        diskCount = 0

        while (searchFor(disks, ">Disc ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc ")
            i = string.find(disks, "<")
            diskid = "%02d" % stripText(disks[0:i])

            i = string.find(disks, ">Disc ")

            if i != -1:
                tracks = stripText(disks[0:i])
            else:
                tracks = stripText(disks)

            trackIndex = 1

            while (searchFor(tracks, "class='productListing-data'") != None):
                trackid = "%02d" % trackIndex
                tracks = searchForPlus(tracks, "class='productListing-data'")
                tracks = searchForPlus(tracks, ">")
                i = string.find(tracks, "<")
                track = stripText(tracks[0:i])

                i = string.find(track, " - ")
                trackArtist = ""

                if i != -1:
                    trackArtist = stripText(track[i+3:])
                    track = stripText(track[0:i])

                trackList.append(diskid + "-" + trackid + "-" + track)
                trackIndex = trackIndex + 1

                if trackArtist != "":
                    trackArtist = convertArtist(trackArtist)
                    artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                else:
                    artistList.append(diskid + "-" + trackid + "-")

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)


    # Find UPC
    i = string.find(source, ">UPC:<")

    if i != -1:
        upc = searchForPlus(source, ">UPC:<")
        upc = searchForPlus(upc, ">")
        i = string.find(upc, "<")
        upc = stripText(upc[0:i])


    # Find Sound
    i = string.find(source, ">Mono/Stereo:")

    if i != -1:
        sound = searchForPlus(source, ">Mono/Stereo:")
        sound = searchForPlus(sound, ">")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, ">Performer Notes")

    if i != -1:
        comments = searchForPlus(source, ">Performer Notes")
        comments = searchForPlus(comments, "<ul>")
        i = string.find(comments, "</ul>")
        comments = stripText(comments[0:i])

        comments = string.replace(comments, "<br>", "\n")
        comments = string.replace(comments, "<li>", "")
        comments = string.replace(comments, "</li>", "")

    i = string.find(source, ">Reviews:")

    if i != -1:
        reviews = searchForPlus(source, ">Reviews:")
        i = string.find(reviews, "<!--")
        reviews = "Reviews\n" + stripText(reviews[0:i])

        reviews = string.replace(reviews, "<p>", "\n")
        reviews = string.replace(reviews, "<br>", "\n")
        reviews = string.replace(reviews, "</b>", "")

        if comments == "":
            comments = reviews
        else:
            comments = comments + "\n\n" + reviews





try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
