# Barnes and Noble scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import convertAuthor
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Main extraction
    # Find Image
    i = string.find(source, "<div id=\"product-image\"")

    if i != -1:
        temp = searchForPlus(source, "<div id=\"product-image\"")
        temp = searchForPlus(temp, "src=\"")
        i = string.find(temp, "\"")
        image = stripText(temp[0:i])

        i = string.find(image, "cleardot")

        if i != -1:
            image = ""

        i = string.find(image, "ruledot")

        if i != -1:
            image = ""

        i = string.find(image, "orange_dot.gif")

        if i != -1:
            image = ""

        i = string.find(image, "ImageNA")

        if i != -1:
            image = ""


    # Find Category
    tag = ">Related DVD Genres<"
    i = string.find(source, tag)

    if i != -1:
        category = searchForPlus(source, tag)
        category = searchForPlus(category, "<a href")
        category = searchForPlus(category, "\">")
        i = string.find(category, "<")
        category = stripText(category[0:i])


    # Find Title
    source = searchForPlus(source, "<div id=\"product-info\"")
    source = searchForPlus(source, "<h2")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripHTML(source[0:i])

    if title == "The page you are looking for is currently unavailable.":
        title = ""
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return


    # Find Director
    i = string.find(source, "Director:")

    if i != -1:
        director = searchForPlus(source, "Director:")
        director = searchForPlus(director, "\">")
        i = string.find(director, "<")
        director = stripHTML(director[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i+1:]) + ", " + stripText(director[0:i])


    # Find Actors
    i = string.find(source, "Cast:")

    if i != -1:
        actors = searchForPlus(source, "Cast:")
        i = string.find(actors, "</em>")
        actors = stripHTML(actors[0:i])
        actorList = []

        while (searchFor(actors, "<a href=") != None):
            actors = searchForPlus(actors, "<a href=")
            actor = searchForPlus(actors, ">")
            i = string.find(actor, "<")
            actor = stripText(actor[0:i])

            i = string.rfind(actor, " ")

            if (i != -1):
                actor = stripText(actor[i+1:]) + ", " + stripText(actor[0:i])

            actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Format
    i = string.find(source, "/icons/")

    if i != -1:
        format = searchForPlus(source, "/icons/")
        format = searchForPlus(format, "alt=\"")
        i = string.find(format, "\"")
        format = stripHTML(format[0:i])
    else:
        i = string.find(source, "<p>Blu-ray")

        if i != -1 and i<2500:
            format = "Blu-ray Disc"
        else:
            i = string.find(source, "<p>HD-DVD")

            if i != -1 and i<2500:
                format = "HD DVD"


    # Find List price
    listprice = ""
    i = string.find(source, "class=\"list-price\"")

    if i != -1:
        listprice = searchFor(source, "class=\"list-price\"")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, " ")
        listprice = stripText(listprice[0:i])


    # Find Retail Price (value)
    i = string.find(source, "Online price")

    if i != -1:
        value = stripText(source[i-25:])
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripText(value[0:i])
    else:
        value = listprice


    # Find Release Date
    i = string.find(source, "class=\"ReleaseDate\"")

    if i != -1:
        date = searchForPlus(source, "class=\"ReleaseDate\"")
        date = searchForPlus(date, ":")
        i = string.find(date, "<")
        date = stripHTML(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Copyright Date
    i = string.find(source, "class=\"originalReleaseDate\"")

    if i != -1:
        copyDate = searchForPlus(source, "class=\"originalReleaseDate\"")
        copyDate = searchForPlus(copyDate, ":")
        i = string.find(copyDate, "<")
        copyDate = stripHTML(copyDate[0:i])

        if fullDateFormat == "false":
            i = string.rfind(copyDate, "/")

            if i != -1:
                copyDate = stripText(copyDate[i+1:])


    # Find MPAA rating
    i = string.find(source, ">Rating:")

    if i != -1:
        source = searchForPlus(source, ">Rating:")
        source = searchForPlus(source, "src=\"")
        i = string.find(source, "\"")

        mpaaimg = stripText(source[0:i])
        mpaa = "Not Rated"

        i = string.find(mpaaimg, "/g.gif")

        if i != -1:
            mpaa = "G (MPAA)"

        i = string.find(mpaaimg, "/pg.gif")

        if i != -1:
            mpaa = "PG (MPAA)"

        i = string.find(mpaaimg, "/pg13.gif")

        if i != -1:
            mpaa = "PG-13 (MPAA)"

        i = string.find(mpaaimg, "/r.gif")

        if i != -1:
            mpaa = "R (MPAA)"

        i = string.find(mpaaimg, "/nr.gif")

        if i != -1:
            mpaa = "NR"


    # Find Comments
    comments = ""
    i = string.rfind(source, "id=\"tab-edreviews\"")

    if i != -1:
        comment = searchForPlus(source, "id=\"tab-edreviews\"")
        comment = searchForPlus(comment, "<h3>")
        i = string.find(comment, "</div></div>")
        comment = stripText(comment[0:i])

        if comments == "":
            comments = stripComments(comment);
        else:
            comments = comments + "\n\n" + stripComments(comment);

    i = string.rfind(source, "id=\"tab-overview\">")

    if i != -1:
        comment = searchForPlus(source, "id=\"tab-overview\">")
        comment = searchForPlus(comment, "<h3>")
        i = string.find(comment, "</div></div>")
        comment = stripText(comment[0:i])

        if comments == "":
            comments = stripComments(comment);
        else:
            comments = comments + "\n\n" + stripComments(comment);


    # Find Attributes
    i = string.rfind(source, ">Product Details<")

    if i != -1:
        attrs = stripText(source[i:])
        i = string.find(attrs, "</div>")
        attrs = stripText(attrs[0:i])


        # Find UPC
        i = string.find(attrs, "UPC:")

        if i != -1:
            upc = searchForPlus(attrs, "UPC:")
            upc = searchForPlus(upc, "\">")
            i = string.find(upc, "<")
            upc = stripText(upc[0:i])

            while len(upc) < 12:
                upc = "0" + upc


        # Find release date
        i = string.find(attrs, ">Release Date:")

        if i != -1:
            date = searchForPlus(attrs, ">Release Date:")
            i = string.find(date, "<")
            date = stripText(date[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, "/")

                if i != -1:
                    date = stripText(date[i+1:])


        # Find Studio
        i = string.find(attrs, ">Source:")

        if i != -1:
            studio = searchForPlus(attrs, ">Source:")
            i = string.find(studio, "<")
            studio = stripText(studio[0:i])


        # Find Format
        i = string.find(attrs, ">Format:")

        if i != -1:
            format = searchForPlus(attrs, ">Format:")
            i = string.find(format, "<")
            format = stripText(format[0:i])
            i = string.find(format, "&nbsp;")

            if i != -1:
                format = stripText(format[0:i])

            i = string.find(format, " ")

            if i != -1:
                format = stripText(format[0:i])


        # Sales Rank
        i = string.find(source, "Sales Rank:")

        if i != -1:
            salesrank = searchForPlus(source, "Sales Rank:")
            i = string.find(salesrank, "<")
            salesrank = stripText(salesrank[0:i])


        # Find widescreen, color
        i = string.find(attrs, ">Presentation:")

        if i != -1:
            attr = searchForPlus(attrs, ">Presentation:")
            i = string.find(attr, "<")
            attr = stripText(attr[0:i])

            if string.find(attr, "Wide") != -1:
                wide = "Y"
            else:
                wide="N"

            if string.find(attr, "B&W") != -1:
                color = "B&W"
            else:
                color="Color"


        # Find Sound
        i = string.find(attrs, ">Sound:")

        if i != -1:
            sound = searchForPlus(attrs, ">Sound:")
            i = string.find(sound, "<")
            sound = stripText(sound[0:i])

            i = string.rfind(sound, ",")

            if i != -1:
                sound = stripText(sound[i+1:])


        # Find Running Time
        i = string.find(attrs, ">Time:")

        if i != -1:
            running = searchForPlus(attrs, ">Time:")
            i = string.find(running, "<")
            running = stripText(running[0:i])


    #Find Tracks
    i = string.find(source, "which may be available separately:<")

    if i != -1:
        tracks = searchForPlus(source, "which may be available separately:<")
        i = string.find(tracks, "</ul>")
        tracks = stripText(tracks[0:i])
        trackList = []

        while (searchFor(tracks, "<li>") != None):
            tracks = searchForPlus(tracks, "<li>")

            if tracks.startswith("<a href") == 1:
                tracks = searchForPlus(tracks, "\">")

            i = string.find(tracks, "<")

            if (i == -1):
                track = tracks
            else:
                track = stripText(tracks[0:i])

            i = string.find(track, " [TV Series]")

            if i != -1:
                continue

            i = string.find(track, "- Season ")

            if i != -1:
                continue

            i = string.find(track, ": ")

            if i != -1:
                track = stripText(track[i+1:])

            trackList.append(track)


        if len(trackList) > 0:
            track1 = trackList[0]

        if len(trackList) > 1:
            track2 = trackList[1]

        if len(trackList) > 2:
            track3 = trackList[2]

        if len(trackList) > 3:
            track4 = trackList[3]

        if len(trackList) > 4:
            track5 = trackList[4]

        if len(trackList) > 5:
            track6 = trackList[5]

        if len(trackList) > 6:
            track7 = trackList[6]

        if len(trackList) > 7:
            track8 = trackList[7]

        if len(trackList) > 8:
            track9 = trackList[8]

        if len(trackList) > 9:
            track10 = trackList[9]

        if len(trackList) > 10:
            track11 = trackList[10]

        if len(trackList) > 11:
            track12 = trackList[11]

        if len(trackList) > 12:
            track13 = trackList[12]

        if len(trackList) > 13:
            track14 = trackList[13]

        if len(trackList) > 14:
            track15 = trackList[14]

        if len(trackList) > 15:
            track16 = trackList[15]

        if len(trackList) > 16:
            track17 = trackList[16]

        if len(trackList) > 17:
            track18 = trackList[17]

        if len(trackList) > 18:
            track19 = trackList[18]

        if len(trackList) > 19:
            track20 = trackList[19]

    
    # Second attempt at credits
    i = string.find(source, "id=\"tab-castcrew\"")

    if i != -1:

        # Find Actors
        i = string.find(source, ">Performance Credits<")

        if i != -1:
            source = searchForPlus(source, ">Performance Credits<")
            i = string.find(source, "<h3>")
            actors = stripHTML(source[0:i])

            actorList = []

            while (searchFor(actors, "<tr><td style=\"") != None):
                actors = searchForPlus(actors, "<td style=\"")
                actors = searchForPlus(actors, "<a href=")
                actor = searchForPlus(actors, ">")
                i = string.find(actor, "<")
                actor = stripText(actor[0:i])
                actor = convertAuthor(actor)
                actorList.append(actor)

            if len(actorList) > 0:
                actor1 = actorList[0]

            if len(actorList) > 1:
                actor2 = actorList[1]

            if len(actorList) > 2:
                actor3 = actorList[2]

            if len(actorList) > 3:
                actor4 = actorList[3]

            if len(actorList) > 4:
                actor5 = actorList[4]

            if len(actorList) > 5:
                actor6 = actorList[5]

            if len(actorList) > 6:
                actor7 = actorList[6]

            if len(actorList) > 7:
                actor8 = actorList[7]

            if len(actorList) > 8:
                actor9 = actorList[8]

            if len(actorList) > 9:
                actor10 = actorList[9]

        # Find Director
        i = string.find(source, ">Director")

        if i != -1:
            director = stripText(source[i-150:])
            director = searchForPlus(director, "class=\"underline\"")
            director = searchForPlus(director, ">")
            i = string.find(director, "<")
            director = stripHTML(director[0:i])
            director = convertAuthor(director)

        # Find Composer
        i = string.find(source, ">Score Composer<")

        if i != -1:
            composer = stripText(source[i-150:])
            composer = searchForPlus(composer, "class=\"underline\"")
            composer = searchForPlus(composer, ">")
            i = string.find(composer, "<")
            composer = stripHTML(composer[0:i])
            composer = convertAuthor(composer)

        # Find Photographer
        i = string.find(source, ">Cinematographer<")

        if i != -1:
            photographer = stripText(source[i-150:])
            photographer = searchForPlus(photographer, "class=\"underline\"")
            photographer = searchForPlus(photographer, ">")
            i = string.find(photographer, "<")
            photographer = stripHTML(photographer[0:i])
            photographer = convertAuthor(photographer)

        # Find Screenwriter
        i = string.find(source, ">Screenwriter<")

        if i != -1:
            screenwriter = stripText(source[i-150:])
            screenwriter = searchForPlus(screenwriter, "class=\"underline\"")
            screenwriter = searchForPlus(screenwriter, ">")
            i = string.find(screenwriter, "<")
            screenwriter = stripHTML(screenwriter[0:i])
            screenwriter = convertAuthor(screenwriter)

        # Find Editor
        i = string.find(source, ">Editor<")

        if i != -1:
            editor = stripText(source[i-150:])
            editor = searchForPlus(editor, "class=\"underline\"")
            editor = searchForPlus(editor, ">")
            i = string.find(editor, "<")
            editor = stripHTML(editor[0:i])
            editor = convertAuthor(editor)

        # Find Author
        i = string.find(source, ">Source Author<")

        if i != -1:
            author = stripText(source[i-150:])
            author = searchForPlus(author, "class=\"underline\"")
            author = searchForPlus(author, ">")
            i = string.find(author, "<")
            author = stripHTML(author[0:i])
            author = convertAuthor(author)

        



def stripComments(tempcomment):
    i = string.find(tempcomment, "</td></tr>")

    if i != -1:
        tempcomment = stripText(tempcomment[0:i])

    while (searchFor(tempcomment, "  ") != None):
        tempcomment = string.replace(tempcomment, "  ", " ")

    while (searchFor(tempcomment, "<div ") != None):
        i = string.find(tempcomment, "<div ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<h3 ") != None):
        i = string.find(tempcomment, "<h3 ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<strong ") != None):
        i = string.find(tempcomment, "<strong ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    tempcomment = string.replace(tempcomment, "\n", "")
    tempcomment = string.replace(tempcomment, "<h3>", "\n\n")
    tempcomment = string.replace(tempcomment, "</h3><p>", "\n")
    tempcomment = string.replace(tempcomment, "</h3>", "\n")
    tempcomment = string.replace(tempcomment, "</SPAN>", "")
    tempcomment = string.replace(tempcomment, "<TABLE>", "")
    tempcomment = string.replace(tempcomment, "<TD>", "")
    tempcomment = string.replace(tempcomment, "</TD>", "")
    tempcomment = string.replace(tempcomment, "<TR>", "")
    tempcomment = string.replace(tempcomment, "</TR>", "\n")
    tempcomment = string.replace(tempcomment, "</A>", "")
    tempcomment = string.replace(tempcomment, "<strong>", "")
    tempcomment = string.replace(tempcomment, "</strong>", "")
    tempcomment = string.replace(tempcomment, "</font>", "")
    tempcomment = string.replace(tempcomment, "</div>", "\n\n")
    tempcomment = string.replace(tempcomment, "\t", "")
    tempcomment = string.replace(tempcomment, "  ", " ")
    tempcomment = string.replace(tempcomment, "<p>", "\n\n")
    tempcomment = string.replace(tempcomment, "</p>", "")
    tempcomment = string.replace(tempcomment, "</P>", "")
    tempcomment = string.replace(tempcomment, "<P>", "\n\n")
    tempcomment = string.replace(tempcomment, "<br>", "\n")
    tempcomment = string.replace(tempcomment, "<BR>", "\n")
    tempcomment = string.replace(tempcomment, "<br />", "\n")
    tempcomment = string.replace(tempcomment, "<i>", "")
    tempcomment = string.replace(tempcomment, "</i>", "")
    tempcomment = string.replace(tempcomment, "<b>", "")
    tempcomment = string.replace(tempcomment, "</b>", "")
    tempcomment = string.replace(tempcomment, "<B>", "")
    tempcomment = string.replace(tempcomment, "</B>", "")
    tempcomment = string.replace(tempcomment, "<I>", "")
    tempcomment = string.replace(tempcomment, "</I>", "")
    tempcomment = string.replace(tempcomment, "&#151;", "-")
    tempcomment = string.replace(tempcomment, "", "...")
    tempcomment = string.replace(tempcomment, "<em>", "")
    tempcomment = string.replace(tempcomment, "</em>", "")
    tempcomment = string.replace(tempcomment, "<EM>", "")
    tempcomment = string.replace(tempcomment, "</EM>", "")
    tempcomment = string.replace(tempcomment, "&#58;", ":")
    tempcomment = string.replace(tempcomment, "<ul>", "\n")
    tempcomment = string.replace(tempcomment, "</ul>", "\n\n")
    tempcomment = string.replace(tempcomment, "<UL>", "\n")
    tempcomment = string.replace(tempcomment, "</UL>", "\n\n")
    tempcomment = string.replace(tempcomment, "<LI>", "\n    ")
    tempcomment = string.replace(tempcomment, "</LI>", "")
    tempcomment = string.replace(tempcomment, "<li>", "\n    ")
    tempcomment = string.replace(tempcomment, "</li>", "")
    tempcomment = string.replace(tempcomment, "&#8217;", "'")
    tempcomment = string.replace(tempcomment, "<br/>", "\n")
    tempcomment = string.replace(tempcomment, "Scene Index</a>", "")
    tempcomment = string.replace(tempcomment, "View Trailer</a>", "")
    tempcomment = string.replace(tempcomment, "Full Product Details</a>", "")
    tempcomment = string.replace(tempcomment, "\n ", "\n")

    while (searchFor(tempcomment, "<TD ") != None):
        i = string.find(tempcomment, "<TD ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<SPAN ") != None):
        i = string.find(tempcomment, "<SPAN ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<A ") != None):
        i = string.find(tempcomment, "<A ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<a ") != None):
        i = string.find(tempcomment, "<a ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<font ") != None):
        i = string.find(tempcomment, "<font ")
        j = string.find(tempcomment[i:], ">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

    while (searchFor(tempcomment, "<img src") != None):
        i = string.find(tempcomment, "<img src")
        j = string.find(tempcomment[i:], ">")

        if j != -1:
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]
        else:
            tempcomment = tempcomment[0:i];

    while (searchFor(tempcomment, "\n\n\n") != None):
        tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")

    while (tempcomment.startswith("\n") == 1):
        tempcomment = tempcomment[1]

    while (tempcomment.endswith("\n") == 1):
        tempcomment = tempcomment[0:len(tempcomment)-1]

    return tempcomment



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
