# Barnes and Noble scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "United States"



    # Main extraction
    # Find Image
    i = string.find(source, "<div id=\"product-image\"")

    if i != -1:
        temp = searchForPlus(source, "<div id=\"product-image\"")
        temp = searchForPlus(temp, "src=\"")
        i = string.find(temp, "\"")
        image = stripText(temp[0:i])

        i = string.find(image, "cleardot")

        if i != -1:
            image = ""

        i = string.find(image, "ruledot")

        if i != -1:
            image = ""

        i = string.find(image, "orange_dot.gif")

        if i != -1:
            image = ""

        i = string.find(image, "ImageNA")

        if i != -1:
            image = ""


    # Find Title
    source = searchForPlus(source, "<div id=\"product-info\"")
    source = searchForPlus(source, "<h2")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripHTML(source[0:i])

    i = string.find(title, "Looking for suggestions? Browse Subjects.")

    if i != -1:
        title = ""
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return

    # Find Author
    authorList = []
    i = string.find(source, "?ATH=")

    if i != -1:
        source = searchFor(source, "?ATH=")
        i = string.find(source, "</h2>")
        authors = stripText(source[0:i])

        while (searchFor(authors, "?ATH=") != None):
            authors = searchForPlus(authors, "?ATH=")
            authors = searchForPlus(authors, "\">")
            i = string.find(authors, "<")
            author = stripText(authors[0:i])
            i = string.rfind(author, " ")

            if (i != -1):
                author = stripText(author[i:]) + ", " + stripText(author[0:i])

            authorList.append(author)


    if len(authorList) > 0:
        author = authorList[0]

    if len(authorList) > 1:
        author2 = authorList[1]

    if len(authorList) > 2:
        author3 = authorList[2]

    if len(authorList) > 3:
        author4 = authorList[3]

    if len(authorList) > 4:
        author5 = authorList[4]

    if len(authorList) > 5:
        author6 = authorList[5]


    # Find Publication date
    i = string.find(source, ">Pub. Date:")

    if i != -1:
        date = searchForPlus(source, ">Pub. Date:")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Series
    i = string.find(source, ">Series:")

    if i != -1:
        series = searchForPlus(source, ">Series:")
        series = searchForPlus(series, "\">")
        i = string.find(series, "<")
        series = stripText(series[0:i])


    # Reading level
    i = string.find(source, ">Age Range:")

    if i != -1:
        readinglevel = searchForPlus(source, ">Age Range:")
        i = string.find(readinglevel, "<")
        readinglevel = stripText(readinglevel[0:i])


    # Find List price
    listprice = ""
    i = string.find(source, "class=\"list-price\"")

    if i != -1:
        listprice = searchFor(source, "class=\"list-price\"")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, " ")
        listprice = stripText(listprice[0:i])


    # Find Retail Price (value)
    i = string.find(source, "Online price")

    if i != -1:
        value = stripText(source[i-25:])
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripText(value[0:i])
    else:
        value = listprice


    # Find Comments
    comments = ""
    i = string.rfind(source, "id=\"tab-edreviews\"")

    if i != -1:
        comment = searchForPlus(source, "id=\"tab-edreviews\"")
        comment = searchForPlus(comment, "<h3>")
        i = string.find(comment, "</div></div>")
        comment = stripText(comment[0:i])

        if comments == "":
            comments = stripComments(comment);
        else:
            comments = comments + "\n\n" + stripComments(comment);

    i = string.rfind(source, "id=\"tab-overview\">")

    if i != -1:
        comment = searchForPlus(source, "id=\"tab-overview\">")
        comment = searchForPlus(comment, "<h3>")
        i = string.find(comment, "</div></div>")
        comment = stripText(comment[0:i])

        if comments == "":
            comments = stripComments(comment);
        else:
            comments = comments + "\n\n" + stripComments(comment);



    # Find book attributes
    i = string.find(source, ">Product Details<")

    if i != -1:
        source = searchForPlus(source, ">Product Details<")

        # Find ISBN
        i = string.find(source, "ISBN:")

        if i != -1:
            isbn = searchForPlus(source, "ISBN:")
            isbn = searchForPlus(isbn, "\">")
            i = string.find(isbn, "<")
            isbn = stripText(isbn[0:i])


        # Find Format
        i = string.find(source, ">Format:")

        if i != -1:
            format = searchForPlus(source, ">Format:")
            i = string.find(format, "<")
            format = stripText(format[0:i])

            i = string.find(format, ",")

            if i != -1:
                pages = stripText(format[i+1:])
                format = stripText(format[0:i])

                i = string.find(pages, "pp")

                if i != -1:
                    pages = stripText(pages[0:i])
                else:
                    pages = ""


        # Find Publisher
        i = string.find(source, ">Publisher:")

        if i != -1:
            publisher = searchForPlus(source, ">Publisher:")
            i = string.find(publisher, "<")
            publisher = stripText(publisher[0:i])


        # Sales Rank
        i = string.find(source, "Sales Rank:")

        if i != -1:
            salesrank = searchForPlus(source, "Sales Rank:")
            i = string.find(salesrank, "<")
            salesrank = stripText(salesrank[0:i])


    # Find Category
    i = string.find(source, ">Related Subjects<")

    if i != -1:
        category = searchForPlus(source, ">Related Subjects<")
        category = searchForPlus(category, "<a href")
        category = searchForPlus(category, "\">")
        i = string.find(category, "<")
        category = stripText(category[0:i])


def stripComments(tempcomment):
    i = string.find(tempcomment, "</td></tr>")

    if i != -1:
        tempcomment = stripText(tempcomment[0:i])

    while (searchFor(tempcomment, "  ") != None):
        tempcomment = string.replace(tempcomment, "  ", " ")

    tempcomment = string.replace(tempcomment, "<div class=\"header\">", "<BR>")
    tempcomment = string.replace(tempcomment, "<div><P>", "<BR>")

    while (searchFor(tempcomment, "<div ") != None):
        i = string.find(tempcomment, "<div ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    tempcomment = string.replace(tempcomment, "\n", "")
    tempcomment = string.replace(tempcomment, "<h3>", "\n\n")
    tempcomment = string.replace(tempcomment, "</h3><p>", "\n")
    tempcomment = string.replace(tempcomment, "</h3>", "\n")
    tempcomment = string.replace(tempcomment, "</SPAN>", "")
    tempcomment = string.replace(tempcomment, "<TABLE>", "")
    tempcomment = string.replace(tempcomment, "<TD>", "")
    tempcomment = string.replace(tempcomment, "</TD>", "")
    tempcomment = string.replace(tempcomment, "<TR>", "")
    tempcomment = string.replace(tempcomment, "</TR>", "\n")
    tempcomment = string.replace(tempcomment, "</A>", "")
    tempcomment = string.replace(tempcomment, "<strong>", "")
    tempcomment = string.replace(tempcomment, "</strong>", "")
    tempcomment = string.replace(tempcomment, "</font>", "")
    tempcomment = string.replace(tempcomment, "<div>", "\n")
    tempcomment = string.replace(tempcomment, "</div>", "\n\n")
    tempcomment = string.replace(tempcomment, "<center>", "")
    tempcomment = string.replace(tempcomment, "</center>", "")
    tempcomment = string.replace(tempcomment, "\t", "")
    tempcomment = string.replace(tempcomment, "  ", " ")
    tempcomment = string.replace(tempcomment, "<p>", "\n\n")
    tempcomment = string.replace(tempcomment, "</p>", "")
    tempcomment = string.replace(tempcomment, "</P>", "")
    tempcomment = string.replace(tempcomment, "<P>", "\n\n")
    tempcomment = string.replace(tempcomment, "<br>", "\n")
    tempcomment = string.replace(tempcomment, "<BR>", "\n")
    tempcomment = string.replace(tempcomment, "<br />", "\n")
    tempcomment = string.replace(tempcomment, "<i>", "")
    tempcomment = string.replace(tempcomment, "</i>", "")
    tempcomment = string.replace(tempcomment, "<b>", "")
    tempcomment = string.replace(tempcomment, "</b>", "")
    tempcomment = string.replace(tempcomment, "<B>", "")
    tempcomment = string.replace(tempcomment, "</B>", "")
    tempcomment = string.replace(tempcomment, "<I>", "")
    tempcomment = string.replace(tempcomment, "</I>", "")
    tempcomment = string.replace(tempcomment, "<big>", "")
    tempcomment = string.replace(tempcomment, "</big>", "")
    tempcomment = string.replace(tempcomment, "&#151;", "-")
    tempcomment = string.replace(tempcomment, "", "...")
    tempcomment = string.replace(tempcomment, "<em>", "")
    tempcomment = string.replace(tempcomment, "</em>", "")
    tempcomment = string.replace(tempcomment, "<EM>", "")
    tempcomment = string.replace(tempcomment, "</EM>", "")
    tempcomment = string.replace(tempcomment, "&#58;", ":")
    tempcomment = string.replace(tempcomment, "<ol>", "\n")
    tempcomment = string.replace(tempcomment, "</ol>", "\n\n")
    tempcomment = string.replace(tempcomment, "<ul>", "\n")
    tempcomment = string.replace(tempcomment, "</ul>", "\n\n")
    tempcomment = string.replace(tempcomment, "<UL>", "\n")
    tempcomment = string.replace(tempcomment, "</UL>", "\n\n")
    tempcomment = string.replace(tempcomment, "<LI>", "\n    ")
    tempcomment = string.replace(tempcomment, "</LI>", "")
    tempcomment = string.replace(tempcomment, "<li>", "\n    ")
    tempcomment = string.replace(tempcomment, "</li>", "")
    tempcomment = string.replace(tempcomment, "&#8217;", "'")
    tempcomment = string.replace(tempcomment, "&#8220;", "\"")
    tempcomment = string.replace(tempcomment, "&#8221;", "\"")
    tempcomment = string.replace(tempcomment, "&#8212;", "-")
    tempcomment = string.replace(tempcomment, "<br/>", "\n")
    tempcomment = string.replace(tempcomment, "More Reviews and Recommendations", "")
    tempcomment = string.replace(tempcomment, "More About the Author", "")

    while (searchFor(tempcomment, "\n ") != None):
        tempcomment = string.replace(tempcomment, "\n ", "\n")

    while (searchFor(tempcomment, "\n\n\n\n") != None):
        tempcomment = string.replace(tempcomment, "\n\n\n\n", "\n\n\n")

    while (searchFor(tempcomment, "<TD ") != None):
        i = string.find(tempcomment, "<TD ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<SPAN ") != None):
        i = string.find(tempcomment, "<SPAN ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<A ") != None):
        i = string.find(tempcomment, "<A ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<a ") != None):
        i = string.find(tempcomment, "<a ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<strong ") != None):
        i = string.find(tempcomment, "<strong ")
        j = string.find(tempcomment[i:], "\">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+2:]

    while (searchFor(tempcomment, "<font ") != None):
        i = string.find(tempcomment, "<font ")
        j = string.find(tempcomment[i:], ">")
        tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

    while (searchFor(tempcomment, "<img src") != None):
        i = string.find(tempcomment, "<img src")
        j = string.find(tempcomment[i:], ">")

        if j != -1:
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]
        else:
            tempcomment = tempcomment[0:i];

    return tempcomment


def separateUsedPrice(url):

    http = HTTPConnection()
    http.resetReferer();
    http.blockForLoad();
    src = http.getContents(url)
    src = searchFor(src, "alt=\"Buy\"")
    src = searchFor(src, "$")
    i = string.find(src, "<")
    src = stripText(src[0:i])
    return src


try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
