# cokesbury.com scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "United States"


    #Get detail page, if necessary
    i = string.find(source, ">Search Results<")

    if i != -1:
        source = searchForPlus(source, ">Search Results")
        i = string.find(source, "class=\"titlelink\"")
        source = stripText(source[i-100:])
        source = searchForPlus(source, "href=\"")
        i = string.find(source, "\"")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find List Price
    i = string.find(marketinfo, ">Suggested Price:<")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">Suggested Price:<")
        listprice = searchForPlus(listprice, "class=\"body\"")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Main extraction
    # Find Title
    source = searchForPlus(source, "class=\"sectiontitle\">")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Series
    i = string.find(source, "clsSubTitleText")

    if (i != -1):
        source = searchForPlus(source, "clsSubTitleText")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        series = stripText(source[0:i])


    # Find Image
    i = string.find(source, "/products/1")

    if i != -1:
        source = stripText(source[i-50:])
        source = searchForPlus(source, " src='")
        i = string.find(source, "'")
        image = stripText(source[0:i])

        i = string.find(image, "No_Photo")

        if (i != -1):
            image = ""

        if image != "":
            image = "http://www.cokesbury.com" + image


    # Find Comments
    i = string.find(source, " width=\"293\"")

    if (i != -1):
        source = searchForPlus(source, " width=\"293\"")
        source = searchForPlus(source, ">")
        i = string.find(source, "<TABLE")
        comments = stripText(source[0:i])

        comments = string.replace(comments, "\n", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "</P>", "")
        comments = string.replace(comments, "<EM>", "")
        comments = string.replace(comments, "</EM>", "")
        comments = string.replace(comments, "<BR>", "\n")


    # Find Publisher
    i = string.find(source, ">Publisher:<")

    if (i != -1):
        source = searchForPlus(source, ">Publisher:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        publisher = stripText(source[0:i])


    # Find Publication Date
    i = string.find(source, ">Publication Date:<")

    if (i != -1):
        source = searchForPlus(source, ">Publication Date:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Format
    i = string.find(source, ">Binding:<")

    if (i != -1):
        source = searchForPlus(source, ">Binding:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        format = stripText(source[0:i])


    # Find ISBN
    i = string.find(source, "ISBN:<")

    if (i != -1):
        source = searchForPlus(source, "ISBN:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        isbn = stripText(source[0:i])


    # Find Price
    i = string.find(source, ">Cokesbury Price:<")

    if (i != -1):
        source = searchForPlus(source, ">Cokesbury Price:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        value = stripText(source[0:i])
    else:
        i = string.find(source, ">Suggested Price:<")

        if (i != -1):
            source = searchForPlus(source, ">Suggested Price:<")
            source = searchForPlus(source, "\">")
            i = string.find(source, "<")
            value = stripText(source[0:i])


    # Find Author
    i = string.find(source, ">Author:<")

    if (i != -1):
        source = searchForPlus(source, ">Author:<")
        source = searchForPlus(source, "<td>")
        i = string.find(source, "</td>")
        author = stripText(source[0:i])
        i = string.find(author, "<A href='")

        if i != -1:
            author = searchForPlus(author, "'>")

        i = string.find(author, "<")

        if i != -1:
            author = stripText(author[0:i])

        i = string.rfind(author, " ")

        if i != -1:
            authorFirst = stripText(author[0:i])
            authorLast = stripText(author[i+1:])
            author = authorLast + ", " + authorFirst
    else:
        author = "Not specified"





try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
