# Library of Congress scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Main extraction
    # Find Title
    source = searchFor(source, "/table")
    source = searchForPlus(source, "<font size=+1>")
    i = string.find(source, "</font>")
    title = stripText(source[0:i])

    # Remove trailing comma, if any
    i = string.rfind(title, ",")

    if (len(title) == (i + 1)):
        title = title[0:i]

    # Remove trailing period, if present
    i = string.rfind(title, ".")

    if (i != -1):
        if (len(title) == (i + 1)):
            title = title[0:i]


    # Find Place
    i = string.find(source, "</em><br>")

    if (i != -1):
        source = searchForPlus(source, "</em><br>")
    else:
        source = searchForPlus(source, "<DD>")

    if (searchFor(source, "</em><br>") != None):
        source = searchForPlus(source, "</em><br>")

    i = string.find(source, " :")

    if (i == -1):
        i = string.find(source, ",")

    place = stripText(source[0:i])


    # Find Publisher
    source = source[i+2:]
    i = string.find(source, "<BR>")
    str = stripText(source[0:i])
    i = string.rfind(str, ",")
    publisher = stripText(str[0:i])

    # Check publisher for URL
    j = string.find(publisher, "\">")

    if j != -1:
        publisher = stripText(publisher[j+2:])

    j = string.find(publisher, " :")

    if (j != -1):
        publisher = stripText(publisher[0:j])


    # Find Publication Date
    date = stripText(str[i+1:])
    i = string.find(date, "[")

    if (i != -1):
        date = stripText(date[i+1:])

    i = string.find(date, "]")

    if (i != -1):
        date = stripText(date[0:i])

    # Check date for trailing html
    i = string.find(date, "</a>")

    if (i != -1):
        date = searchForPlus(date, "</a>")
        date = stripText(date)

    # Remove trailing period, if any
    i = string.rfind(date, ".")

    if (len(date) == (i + 1)):
        date = date[0:i]

    # if date in format cyyyy, strip c
    if (len(date) == 5):
        i = string.find(date, "c")

        if (i == 0):
            date = stripText(date[1:])


    # Find Category
    i = string.find(source, ">Subjects:<")

    if (i != -1):
        source = searchForPlus(source, ">Subjects:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "</")
        category = stripText(source[0:i])
        i = string.rfind(category, ".")

        if (i != -1):
            category = stripText(category[0:i])


    # Find Author
    source = searchForPlus(source, ">Search for other works by:<")
    source = searchForPlus(source, "HREF=")
    source = searchForPlus(source, "\">")
    i = string.find(source, "</A>")
    author = stripText(source[0:i])

    # Remove trailing date, if any
    i = string.rfind(author, ",")

    if (i != -1):
        str = author[i:]
        j = string.rfind(str, "-")

        if (j != -1):
            author = stripText(author[0:i])

    # Remove , pseud if present.
    if (string.count(author, ",") > 1):
        i = string.find(author, ",")
        i = string.find(author, ",", i+1)
        author = author[0:i]

    # Remove trailing period, if present
    author = stripHTML(author)
    i = string.rfind(author, ".")

    if (i != -1):
        x = i - 2

        if (x >= 2):
            if (author[x:x+1] != ' '):
                if (len(author) == (i + 1)):
                    author = author[0:i]
        else:
            if (len(author) == (i + 1)):
                author = author[0:i]


    # Find ISBN
    source = searchForPlus(source, ">ISBN/ISSN<")
    i = string.find(source, "<br></td>")

    if (i != -1):
        source = searchForPlus(source, "<br></td>")
    else:
        source = searchForPlus(source, "-</td>")

    source = searchForPlus(source, "<td align=center>")
    i = string.find(source, "<")

    if (source[i:i+3] == "<B>"):
        source = source[i+3:]
        i = string.find(source, "<")

    str = stripText(source[0:i])

    if (str != "-"):
        isbn = stripText(source[0:i])


    # Find Value, if present
    i = string.find(source, "$")

    if (i != -1):
        source = source[i:]
        i = string.find(source, "<")
        value = stripText(source[0:i])
        i = string.find(value, "(")

        if (i != -1):
            value = stripText(source[0:i])



try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
