# Borders scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripNewLines


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "United States"


    # Find optional fields, pricing info etc.
    marketinfo = source
    i = string.find(marketinfo, ">List Price:<")

    if i != -1:
        listprice = searchForPlus(marketinfo, ">List Price:<")
        listprice = searchForPlus(listprice, ">")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])

        if listprice == "N/A":
            listprice = ""


    # Main extraction
    i = string.find(source, "<!---START OF ROW WITH PRODUCT INFORMATION--->")

    if i == -1:
        return


    # Find Image
    source = searchForPlus(source, "<!---START OF ROW WITH PRODUCT INFORMATION--->")
    i = string.find(source, "web_images")

    if (i != -1):
        source = searchForPlus(source, "<img src=\"")
        i = string.find(source, "\"")
        tempimage = stripText(source[0:i])
        i = string.find(tempimage, "placeholder")

        if (i == -1):
            image = "http://www.bordersstores.com" +  tempimage


    # Find Title
    source = searchForPlus(source, "class=\"titledetail\"")
    source = searchForPlus(source, "<b>")
    i = string.find(source, "</b>")
    title = stripText(source[0:i])


    # Find Format
    source = searchForPlus(source, ">Bibliographic Data:")
    source = searchForPlus(source, "</b>")
    i = string.find(source, ",")
    format = stripText(source[0:i])


    # Find Pages
    source = searchForPlus(source, ",")
    i = string.find(source, "<")
    pages = stripText(source[0:i])

    # Just the numbers
    i = string.find(pages, "Pages")

    if i != -1:
        pages = stripText(pages[0:i])


    # Find Publisher
    i = string.find(source, "<")
    publisher = stripText(source[0:i])
    # At this point publisher also contains the data and maybe page count

    # Step over page count
    i = string.find(publisher, "Pages")

    if i != -1:
        publisher = stripText(searchForPlus(publisher, ","))


    # Find Publication Date, date follows LAST comma in publisher
    i = string.rfind(publisher, ",")

    if i != -1:
        date = stripText(publisher[i+1:])
        publisher = stripText(publisher[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

    # Remove extraneous crap from publisher
    i = string.find(publisher, ", Incorporated")

    if i != -1:
        publisher = stripText(publisher[0:i])

    i = string.find(publisher, ", Limited")

    if i != -1:
        publisher = stripText(publisher[0:i])

    i = string.find(publisher, ", The")

    if i != -1:
        publisher = stripText(publisher[0:i])


    # Find Author
    authorList = []
    tag = "<b>Author:"
    i = string.find(source, tag)

    if i == -1:
        tag = "<b>Editor:"
        i = string.find(source, tag)

    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "</b>")

        i = string.find(source, "<br>")

        # Add a dummy / for multiple authors
        authors = " / " + stripText(source[0:i])

        while (searchFor(authors, " / ") != None):
            authors = searchForPlus(authors, " / ")
            i = string.find(authors, " / ")

            if (i == -1):
                author = authors
            else:
                author = stripText(authors[0:i])

            authorList.append(author)

    # Add in editors, if any
    i = string.find(source, "<b>Editor:")

    if i != -1:
        source = searchForPlus(source, "<b>Editor:")
        source = searchForPlus(source, "</b>")

        i = string.find(source, "<br>")

        # Add a dummy / for multiple authors
        authors = " / " + stripText(source[0:i])

        while (searchFor(authors, " / ") != None):
            authors = searchForPlus(authors, " / ")
            i = string.find(authors, " / ")

            if (i == -1):
                author = stripText(authors) + " (Editor)"
            else:
                author = stripText(authors[0:i])  + " (Editor)"

            authorList.append(author)

    # Add in translators, if any
    i = string.find(source, "<b>Translator:")

    if i != -1:
        source = searchForPlus(source, "<b>Translator:")
        source = searchForPlus(source, "</b>")

        i = string.find(source, "<br>")

        # Add a dummy / for multiple authors
        authors = " / " + stripText(source[0:i])

        while (searchFor(authors, " / ") != None):
            authors = searchForPlus(authors, " / ")
            i = string.find(authors, " / ")

            if (i == -1):
                author = stripText(authors) + " (Translator)"
            else:
                author = stripText(authors[0:i])  + " (Translator)"

            authorList.append(author)


    if len(authorList) == 0:
        author = "No Author"

    if len(authorList) > 0:
        author = authorList[0]

    if len(authorList) > 1:
        author2 = authorList[1]

    if len(authorList) > 2:
        author3 = authorList[2]

    if len(authorList) > 3:
        author4 = authorList[3]

    if len(authorList) > 4:
        author5 = authorList[4]

    if len(authorList) > 5:
        author6 = authorList[5]


    # Find Price
    i = string.find(source, " Price:<")

    if i != -1:
        source = searchForPlus(source, " Price:<")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find ISBN
    source = searchForPlus(source, ">ISBN:<")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    isbn = stripText(source[0:i])


    #Find Category
    tag = ">Subject:<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Shelf Location:<"
        i = string.find(source, tag)

    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        category = stripText(source[0:i])
        category = string.replace(category, "\t", "")
        category = string.replace(category, "\n", "")
        category = string.replace(category, "&nbsp;", " ")
        category = string.replace(category, "&gt;", ":")
        i = string.find(category, " -- Shelf")

        if i != -1:
            category = stripText(category[0:i])


    # Find Comments
    i = string.find(source, "<B>Description:<")
    comments = ""

    if (i != -1):
        while (searchFor(source, "<B>Description:<") != None):
            source = searchFor(source, "<B>Description:<")
            i = string.find(source, "\n")
            tempcomments = stripText(source[0:i])

            i = string.find(source, "\r")

            if i != -1:
                tempcomments = stripText(tempcomments[0:i])

            tempcomments = string.replace(tempcomments, "<p> ", "\n\n")
            tempcomments = string.replace(tempcomments, "<p>", "\n\n")
            tempcomments = string.replace(tempcomments, "<P> ", "\n\n")
            tempcomments = string.replace(tempcomments, "<P>", "\n\n")
            tempcomments = string.replace(tempcomments, "<BR>", "\n")
            tempcomments = string.replace(tempcomments, "<i>", "")
            tempcomments = string.replace(tempcomments, "</i>", "")
            tempcomments = string.replace(tempcomments, "<I>", "")
            tempcomments = string.replace(tempcomments, "</I>", "")
            tempcomments = string.replace(tempcomments, "<B>", "")
            tempcomments = string.replace(tempcomments, "</B>", "")
            tempcomments = stripText(tempcomments)

            while (searchFor(tempcomments, "<a ") != None):
                i = string.find(tempcomments, "<a ")
                j = string.find(tempcomments[i:], "\">")
                tempcomments = tempcomments[0:i] + tempcomments[i+j+2:]

            if comments != "":
                comments = comments + "\n\n" + tempcomments
            else:
                comments = comments + tempcomments

            source = source[1:]



try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
