# Borders scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    artistLF            = ""


    i = string.find(source, "<!---START OF ROW WITH PRODUCT INFORMATION--->")

    if i == -1:
        return


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        i = string.find(listprice, "$")

        if i != -1:
            listprice = searchFor(listprice, "$")
            i = string.find(listprice, "<")
            listprice = stripText(listprice[0:i])
        else:
            listprice = ""


    # Main extraction
    # Find Image
    source = searchForPlus(source, "<!---START OF ROW WITH PRODUCT INFORMATION--->")
    i = string.find(source, "web_images")

    if (i != -1):
        source = searchForPlus(source, "<img src=\"")
        i = string.find(source, "\"")
        tempimage = stripText(source[0:i])
        i = string.find(tempimage, "placeholder")

        if (i == -1):
            image = "http://www.bordersstores.com" + tempimage


    # Find Title
    source = searchForPlus(source, "class=\"titledetail\"")
    source = searchForPlus(source, "<b>")
    i = string.find(source, "</b>")
    title = stripText(source[0:i])


    # Find Attributes
    source = searchForPlus(source, ">Bibliographic Data:")
    source = searchForPlus(source, "</b>")
    i = string.find(source, "<br>")
    attributes = stripText(source[0:i+4])


    # Find Format
    i = string.find(attributes, ",")

    if i != -1:
        format = stripText(attributes[0:i])
        attributes = searchForPlus(attributes, ",")


        # Find Running time
        i = string.find(attributes, ",")
        running = stripText(attributes[0:i])
        i = string.find(running, "inutes")

        if i == -1:
            # No running time, reset
            running = ""
        else:
            # Running time found, step on
            attributes = searchForPlus(attributes, ",")


        # Find Label
        i = string.find(attributes, "<")
        label = stripText(attributes[0:i])
        # At this point label also contains the date

        # Find Publication Date, date follows LAST comma in label
        i = string.rfind(label, ",")

        if i != -1:
            date = stripText(label[i+1:])
            label = stripText(label[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, " ")

                if i != -1:
                    date = stripText(date[i+1:])

            copyDate = date


    # Find Artist
    artist = ""
    i = string.find(source, ">Performer:")

    if (i != -1):
        source = searchForPlus(source, ">Performer:")
        source = searchForPlus(source, "</b>")
        i = string.find(source, "<")
        artist = stripText(source[0:i])
        artist = convertArtist(artist)

    if (i != -1):
        i = string.find(artist, "/")

        if (i != -1):
            artist = stripText(artist[0:i])
            artist = convertArtist(artist)


    # Find Composer
    i = string.find(source, ">Composer:")

    if (i != -1):
        source = searchForPlus(source, ">Composer:")
        source = searchForPlus(source, "</b>")
        i = string.find(source, "<")
        composer = stripText(source[0:i])

        i = string.rfind(composer, " ")

        if i != -1:
            composer = stripText(composer[i+1:]) + ", " + stripText(composer[0:i])


    # Find Conductor
    conductor = ""
    i = string.find(source, ">Conductor:")

    if i != -1:
        source = searchForPlus(source, ">Conductor:")
        source = searchForPlus(source, "</b>")
        i = string.find(source, "<")
        conductor = stripText(source[0:i])

    # Find Artist (Part II)
    i = string.find(source, ">Ensemble:")

    if (i != -1):
        if conductor == "":
            conductor = artist

        source = searchForPlus(source, ">Ensemble:")
        source = searchForPlus(source, "</b>")
        i = string.find(source, "<")
        artist = stripText(source[0:i])


    # Find Price
    i = string.find(source, ">List Price:")

    if i != -1:
        source = searchForPlus(source, ">List Price:")
        i = string.find(source, "$")

        if i != -1:
            source = searchFor(source, "$")
            i = string.find(source, "<")
            value = stripText(source[0:i])

    i = string.find(source, ">Borders Price:")

    if i != -1:
        source = searchForPlus(source, ">Borders Price:")
        source = searchForPlus(source, "</b>")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find UPC
    i = string.find(source, ">UPC:</B>")

    if i != -1:
        source = searchForPlus(source, ">UPC:</B>")
        i = string.find(source, "<")
        upc = stripText(source[0:i])


    #Find Category
    i = string.find(source, ">Shelf Location:<")

    if i != -1:
        source = searchForPlus(source, ">Shelf Location:<")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        category = stripText(source[0:i])
        i = string.rfind(category, "&gt;")

        if i != -1:
            category = stripText(category[i+4:])

        category = string.replace(category, "\t", "")
        category = string.replace(category, "\n", "")
        category = string.replace(category, "&nbsp;", " ")
        category = stripText(category)


    # Find Comments
    comments = ""
    i = string.find(source, ">Review:<")

    if i != -1:
        comments = searchFor(source, ">Review:<")
        comments = searchForPlus(comments, ">")
        i = string.find(comments, "<B>")
        comments = stripText(comments[0:i])
        comments = string.replace(comments, "<P>", "")
        comments = string.replace(comments, "</B>", "")
        comments = string.replace(comments, "\n", " ")
        comments = string.replace(comments, "\t", " ")
        comments = stripText(comments)

    i = string.find(source, "<B>Description:<")

    if i != -1:
        while (searchFor(source, "<B>Description:<") != None):
            source = searchFor(source, "<B>Description:<")
            i = string.find(source, "<br>")
            tempcomments = stripText(source[0:i])
            tempcomments = string.replace(tempcomments, "\n", " ")
            tempcomments = string.replace(tempcomments, "<p>", "\n\n")
            tempcomments = string.replace(tempcomments, "</P>", "")
            tempcomments = string.replace(tempcomments, "<P>", "\n\n")
            tempcomments = string.replace(tempcomments, "<i>", "")
            tempcomments = string.replace(tempcomments, "</i>", "")
            tempcomments = string.replace(tempcomments, "<I>", "")
            tempcomments = string.replace(tempcomments, "</I>", "")
            tempcomments = string.replace(tempcomments, "<B>", "")
            tempcomments = string.replace(tempcomments, "</B>", "")
            tempcomments = stripText(tempcomments)

            while (searchFor(tempcomments, "<a ") != None):
                i = string.find(tempcomments, "<a ")
                j = string.find(tempcomments[i:], "\">")
                tempcomments = tempcomments[0:i] + tempcomments[i+j+2:]

            if comments != "":
                comments = stripText(comments + "\n\n" + tempcomments)
            else:
                comments = stripText(tempcomments)

            source = source[1:]


    # Find Tracks
    i = string.find(source, ">Track List:")

    if i != -1:
        disks = searchForPlus(source, ">Track List:")
        disks = searchForPlus(disks, "START=\"1\">")
        i = string.find(disks, "<!---END OF ROW")

        if string.find(disks, ">Disc ") == -1:
            disks = ">Disc 1:<" + disks

        trackList = []
        artistList = []
        diskCount = 0

        while (searchFor(disks, ">Disc ") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc ")
            i = string.find(disks, ":<")
            diskid = "%02d" % stripText(disks[0:i])

            i = string.find(disks, "</OL>")
            tracks = stripText(disks[0:i])
            trackIndex = 1

            while (searchFor(tracks, "<LI>") != None):
                tracks = searchForPlus(tracks, "<LI>")
                i = string.find(tracks, "<")

                if (i != -1):
                    track = stripText(tracks[0:i])

                    i = string.find(track, " - ")
                    trackArtist = ""

                    if (i != -1):
                        if track[i+3:i+4] != "(":
                            trackArtist = stripText(track[i+3:])
                            track = stripText(track[0:i])

                    trackid = "%02d" % trackIndex
                    trackList.append(diskid + "-" + trackid + "-" + track)
                    trackIndex = trackIndex + 1

                    if trackArtist != "":
                        trackArtist = convertArtist(trackArtist)
                        artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                    else:
                        artistList.append(diskid + "-" + trackid + "-")

                tracks = tracks[1:]

        trackList.sort()
        trackTitles = array(trackList, String)
        artistList.sort()
        trackArtists = array(artistList, String)



try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
