# Barnes and Noble scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML
from    scrapers.scrapers import convertArtist
from    scrapers.scrapers import convertComposer
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    artistLF            = ""

    switchArtist        = "N"
    buyerwaiting        = "N"
    available           = "Y"


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Main extraction
    # Find Image
    i = string.find(source, "<div id=\"product-image\"")

    if i != -1:
        temp = searchForPlus(source, "<div id=\"product-image\"")
        temp = searchForPlus(temp, "src=\"")
        i = string.find(temp, "\"")
        image = stripText(temp[0:i])

        i = string.find(image, "cleardot")

        if i != -1:
            image = ""

        i = string.find(image, "ruledot")

        if i != -1:
            image = ""

        i = string.find(image, "orange_dot.gif")

        if i != -1:
            image = ""

        i = string.find(image, "ImageNA")

        if i != -1:
            image = ""


    # Find Category
    tag = ">Related Music Styles<"
    i = string.find(source, tag)

    if i != -1:
        category = searchForPlus(source, tag)
        category = searchForPlus(category, "<a href")
        category = searchForPlus(category, "\">")
        i = string.find(category, "<")
        category = stripText(category[0:i])


    # Find Title
    source = searchForPlus(source, "<div id=\"product-info\"")
    source = searchForPlus(source, "<h2")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripHTML(source[0:i])

    if title == "The page you are looking for is currently unavailable.":
        title = ""
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return


    # Find Artist
    i = string.find(source, "?CTR=")

    if i != -1 and i < 200:
        source = searchForPlus(source, "?CTR=")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        artist = stripHTML(source[0:i])
        artist = convertArtist(artist)

        if switchArtist == "Y":
            i = string.rfind(artist, " ")

            if (i != -1):
                artist = stripText(artist[i+1:]) + ", " + stripText(artist[0:i])
    else:
        artist = "Various Artists"


    # Find List price
    i = string.find(source, "class=\"list-price\"")

    if i != -1:
        listprice = searchFor(source, "class=\"list-price\"")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, " ")
        listprice = stripText(listprice[0:i])


    # Find Retail Price (value)
    i = string.find(source, "Online price")

    if i != -1:
        value = stripText(source[i-25:])
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripText(value[0:i])
    else:
        value = listprice


    # Find Format
    i = string.find(source, "class=\"format\"")

    if i != -1:
        format = searchForPlus(source, "class=\"format\"")
        format = searchForPlus(format, ">")
        i = string.find(format, "<")
        format = stripText(format[0:i])


    # Find Running time
    i = string.find(source, ">Total Running Time:<")

    if i != -1:
        running = searchForPlus(source, ">Total Running Time:<")
        running = searchForPlus(running, ">")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find release date
    i = string.find(source, "class=\"ReleaseDate\"")

    if i != -1:
        date = searchForPlus(source, "class=\"ReleaseDate\"")
        date = searchForPlus(date, ":")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Find copyright date
    i = string.find(source, ">Original Release Date:")

    if i != -1:
        copyDate = searchForPlus(source, ">Original Release Date:")
        copyDate = searchForPlus(copyDate, "\">")
        copyDate = searchForPlus(copyDate, "\">")
        i = string.find(copyDate, "<")
        copyDate = stripText(copyDate[0:i])

        if fullDateFormat == "false":
            i = string.rfind(copyDate, "/")

            if i != -1:
                copyDate = stripText(copyDate[i+1:])


    # Find Sales Rank
    i = string.find(source, "class=\"saleRank\"")

    if i != -1:
        salesrank = searchForPlus(source, "class=\"saleRank\"")
        salesrank = searchForPlus(salesrank, ":")
        i = string.find(salesrank, "<")
        salesrank = stripText(salesrank[0:i])


    # Find Label
    i = string.find(source, ">Label:")

    if i != -1:
        label = searchForPlus(source, ">Label:")
        i = string.find(label, "<")
        label = stripText(label[0:i])


    # Find UPC
    i = string.find(source, "class=\"isbn\"")

    if i != -1:
        upc = searchForPlus(source, "class=\"isbn\"")
        upc = searchForPlus(upc, ":")
        upc = searchForPlus(upc, ">")
        i = string.find(upc, "<")
        upc = stripText(upc[0:i])

        while len(upc) < 12:
            upc = "0" + upc


    # Find Tracks
    i = string.find(source, "id=\"tab-tracks\"")
    diskCount = 0

    if i != -1:
        disks = searchForPlus(source, "id=\"tab-tracks\"")
        i = string.find(disks, "</tr></table></div>")
        disks = stripText(disks[0:i])

        if string.find(disks, ">Disc") == -1:
            disks = ">Disc 1<" + disks

        trackList = []
        artistList = []
        timeList = []

        while (searchFor(disks, ">Disc") != None):
            diskCount = diskCount + 1
            disks = searchForPlus(disks, ">Disc")
            i = string.find(disks, "<")
            diskid = "%02d" % stripText(disks[0:i])
            trackNum = 0

            i = string.find(disks, "class=\"DiscNumber\"")

            if i != -1:
                tracks = stripText(disks[0:i])
            else:
                tracks = disks

            while (searchFor(tracks, "><b>") != None):
                trackNum = trackNum + 1
                tracks = searchForPlus(tracks, "><b>")

                if trackNum > 30:
                    continue

                i = string.find(tracks, "<")
                track = stripText(tracks[0:i])

                if track == "Track List":
                    continue

                trackList.append(diskid + "-" + "%02d" % trackNum + "-" + track)

                i = string.find(tracks, "> / <")

                if i != -1 and i <= 100:
                    tracks = searchForPlus(tracks, "> / <")
                    tracks = searchForPlus(tracks, "\">")
                    i = string.find(tracks, "<")
                    trackArtist = stripText(tracks[0:i])
                else:
                    i = string.find(tracks, "> / ")

                    if i != -1 and i <= 100:
                        trackArtist = searchForPlus(tracks, "> / ")
                        i = string.find(trackArtist, "<")
                        trackArtist = stripText(trackArtist[0:i])

                        i = string.find(trackArtist, ":")

                        if i != -1:
                            i = string.rfind(trackArtist, " ")
                            trackArtist = stripText(trackArtist[0:i])
                    else:
                        trackArtist = ""

                artistList.append(diskid + "-" + "%02d" % trackNum + "-" + trackArtist)

                i = string.find(tracks, "</a> ")

                if i != -1 and i <= 100:
                    tracks = searchForPlus(tracks, "</a> ")
                    i = string.find(tracks, "<")
                    trackTime = stripText(tracks[0:i])
                    i = string.rfind(trackTime, " ")

                    if i != -1:
                        trackTime = stripText(trackTime[i:])

                    i = string.find(trackTime, ":")

                    if i == -1:
                        trackTime = ""
                else:
                    trackTime = ""

                timeList.append(diskid + "-" + "%02d" % trackNum + "-" + trackTime)

        trackList.sort()
        trackTitles = array(trackList, String)

        artistList.sort()
        trackArtists = array(artistList, String)
        timeList.sort()
        trackTimes = array(timeList, String)


    # Find Comments
    comments = ""
    i = string.rfind(source, "id=\"tab-edreviews\"")

    if i != -1:
        comment = searchForPlus(source, "id=\"tab-edreviews\"")
        comment = searchForPlus(comment, "<h3>")
        i = string.find(comment, "</div></div>")
        comment = stripText(comment[0:i])

        if comments == "":
            comments = stripComments(comment);
        else:
            comments = comments + "\n\n" + stripComments(comment);


    # Find Rating
    i = string.find(source, "Average Rating:")

    if i != -1:
        rating = searchForPlus(source, "Average Rating:")
        rating = searchForPlus(rating, "alt=\"")
        i = string.find(rating, "\"")
        rating = stripText(rating[0:i])

        if rating.startswith("Customer Rating for this product is") == 1:
            rating = stripText(searchForPlus(rating, "Customer Rating for this product is"))


    # Find Sound
    sound = "Stereo"



def stripComments(tempcomment):
    i = string.find(tempcomment, "</td></tr>")

    if i != -1:
        tempcomment = stripText(tempcomment[0:i])

    while (searchFor(tempcomment, "  ") != None):
        tempcomment = string.replace(tempcomment, "  ", " ")

    while (searchFor(tempcomment, "<div ") != None):
        i = string.find(tempcomment, "<div ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<h3 ") != None):
        i = string.find(tempcomment, "<h3 ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<strong ") != None):
        i = string.find(tempcomment, "<strong ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    tempcomment = string.replace(tempcomment, "\n", "")
    tempcomment = string.replace(tempcomment, "<h3>", "\n\n")
    tempcomment = string.replace(tempcomment, "</h3><p>", "\n")
    tempcomment = string.replace(tempcomment, "</h3>", "\n")
    tempcomment = string.replace(tempcomment, "</SPAN>", "")
    tempcomment = string.replace(tempcomment, "<TABLE>", "")
    tempcomment = string.replace(tempcomment, "<TD>", "")
    tempcomment = string.replace(tempcomment, "</TD>", "")
    tempcomment = string.replace(tempcomment, "<TR>", "")
    tempcomment = string.replace(tempcomment, "</TR>", "\n")
    tempcomment = string.replace(tempcomment, "</A>", "")
    tempcomment = string.replace(tempcomment, "<strong>", "")
    tempcomment = string.replace(tempcomment, "</strong>", "")
    tempcomment = string.replace(tempcomment, "</font>", "")
    tempcomment = string.replace(tempcomment, "</div>", "\n\n")
    tempcomment = string.replace(tempcomment, "\t", "")
    tempcomment = string.replace(tempcomment, "  ", " ")
    tempcomment = string.replace(tempcomment, "<p>", "\n\n")
    tempcomment = string.replace(tempcomment, "</p>", "")
    tempcomment = string.replace(tempcomment, "</P>", "")
    tempcomment = string.replace(tempcomment, "<P>", "\n\n")
    tempcomment = string.replace(tempcomment, "<br>", "\n")
    tempcomment = string.replace(tempcomment, "<BR>", "\n")
    tempcomment = string.replace(tempcomment, "<br />", "\n")
    tempcomment = string.replace(tempcomment, "<i>", "")
    tempcomment = string.replace(tempcomment, "</i>", "")
    tempcomment = string.replace(tempcomment, "<b>", "")
    tempcomment = string.replace(tempcomment, "</b>", "")
    tempcomment = string.replace(tempcomment, "<B>", "")
    tempcomment = string.replace(tempcomment, "</B>", "")
    tempcomment = string.replace(tempcomment, "<I>", "")
    tempcomment = string.replace(tempcomment, "</I>", "")
    tempcomment = string.replace(tempcomment, "&#151;", "-")
    tempcomment = string.replace(tempcomment, "", "...")
    tempcomment = string.replace(tempcomment, "<em>", "")
    tempcomment = string.replace(tempcomment, "</em>", "")
    tempcomment = string.replace(tempcomment, "<EM>", "")
    tempcomment = string.replace(tempcomment, "</EM>", "")
    tempcomment = string.replace(tempcomment, "&#58;", ":")
    tempcomment = string.replace(tempcomment, "<ul>", "\n")
    tempcomment = string.replace(tempcomment, "</ul>", "\n\n")
    tempcomment = string.replace(tempcomment, "<UL>", "\n")
    tempcomment = string.replace(tempcomment, "</UL>", "\n\n")
    tempcomment = string.replace(tempcomment, "<LI>", "\n    ")
    tempcomment = string.replace(tempcomment, "</LI>", "")
    tempcomment = string.replace(tempcomment, "<li>", "\n    ")
    tempcomment = string.replace(tempcomment, "</li>", "")
    tempcomment = string.replace(tempcomment, "&#8217;", "'")
    tempcomment = string.replace(tempcomment, "<br/>", "\n")
    tempcomment = string.replace(tempcomment, "\n ", "\n")

    while (searchFor(tempcomment, "<TD ") != None):
        i = string.find(tempcomment, "<TD ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<SPAN ") != None):
        i = string.find(tempcomment, "<SPAN ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<A ") != None):
        i = string.find(tempcomment, "<A ")
        j = string.find(tempcomment[i:], "\">")

        if j == -1:
            j = string.find(tempcomment[i:], ">") - 1

        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<a ") != None):
        i = string.find(tempcomment, "<a ")
        j = string.find(tempcomment[i:], "\">")

        if j == -1:
            j = string.find(tempcomment[i:], ">") - 1

        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<font ") != None):
        i = string.find(tempcomment, "<font ")
        j = string.find(tempcomment[i:], ">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

    while (searchFor(tempcomment, "<img src") != None):
        i = string.find(tempcomment, "<img src")
        j = string.find(tempcomment[i:], ">")

        if j != -1:
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]
        else:
            tempcomment = tempcomment[0:i];

    while (searchFor(tempcomment, "\n\n\n") != None):
        tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")

    return tempcomment


try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
