# Christianbook.com scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "United States"


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">Retail Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">Retail Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Main extraction
    # Find Image
    i = string.find(source, "/g/product")

    if i != -1:
        source  = stripText(source[i-50:])
        i = string.find(source, "background='")

        if i != -1:
            source = searchForPlus(source, "background='")
            i = string.find(source, "'")
        else:
            source = searchForPlus(source, "src='")
            i = string.find(source, "'")

        image  = stripText(source[0:i])


    # Find Title
    source = searchForPlus(source, "class=\"mlarge\"")
    source = searchForPlus(source, "<b>")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Author
    i = string.find(source, "By:")

    if (i != -1):
        source = searchForPlus(source, "By:")
        source = searchForPlus(source, ">")

        i = string.find(source, "<")
        tempValue = stripText(source[0:i])

        i = string.rfind(tempValue, " ")
        authorfirst = stripText(tempValue[0:i])
        authorlast = stripText(tempValue[i:])
        author = stripText(authorlast) + ", " + stripText(authorfirst)
    else:
        author = "Not specified"


    # Find Price
    source = searchForPlus(source, ">CBD Price")
    source = searchForPlus(source, ":")
    i = string.find(source, "<")
    value = stripText(source[0:i])


    # Find Comments
    i = string.find(source, ">Description:")

    if i != -1:
        source = searchForPlus(source, ">Description:")
        i = string.find(source, "-->")

        if i != -1 and i < 200:
            source = searchForPlus(source, "-->")

        i = string.find(source, "<br />")
        comments = stripText(source[0:i])

        i = string.find(comments, "</td>")

        if i != -1:
            comments = stripText(comments[0:i])

        comments = string.replace(comments, "  ", " ")
        comments = string.replace(comments, "\n ", "\n")
        comments = string.replace(comments, " \n", "\n")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "<p>    ", "\n\n")
        comments = string.replace(comments, "<p>   ", "\n\n")
        comments = string.replace(comments, "<p>  ", "\n\n")
        comments = string.replace(comments, "<p> ", "\n\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "<ul>", "\n")
        comments = string.replace(comments, "<li>", "\n    ")
        comments = string.replace(comments, "</ul>", "")


    # Find Format
    source = searchForPlus(source, ">Type:")
    source = searchForPlus(source, ">")

    i = string.find(source, "<")
    format = stripText(source[0:i])


    # Find number of pages
    i = string.find(source, "Number of Pages:")

    if (i != -1):
        source = searchForPlus(source, ">Number of Pages:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        pages = stripText(source[0:i])


    # Find Publisher
    source = searchForPlus(source, ">Vendor:")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    publisher = stripText(source[0:i])

    # Find Publication Date
    i = string.find(source, ">Publication Date:")

    if (i != -1):
        source = searchForPlus(source, ">Publication Date:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find ISBN
    source = searchForPlus(source, ">ISBN:")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    isbn = stripText(source[0:i])


    # Find Dimensions
    i = string.find(source, ">Dimensions:")

    if (i != -1):
        source = searchForPlus(source, ">Dimensions:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        dimensions = stripText(source[0:i])


    # Find Category
    i = string.find(source, "this category<")

    if (i != -1):
        source = searchForPlus(source, "this category<")
        source = searchFor(source, "<a href=")
        i = string.find(source, "</li> <li>")
        tempCatg = stripText(source[0:i])
        category = ""

        while (searchFor(tempCatg, "<a href=") != None):
            tempCatg = searchForPlus(tempCatg, "<a href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")
            thisCatg = stripText(tempCatg[0:i])

            if thisCatg == "Books":
                continue

            if (category != ""):
                category = category + " : "

            category = category + thisCatg
            tempCatg = tempCatg[i:]




try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
