# Powells scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "United States"


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Main extraction
    # Summary or detail page
    i = string.find(source, ">Titles per page:<")

    if i != -1:
        # Summary page
        # Find Match, if any
        i = string.find(source, "<!-- RESULT ITEM START -->")

        if i == -1:
            return

        source = searchForPlus(source, "<!-- RESULT ITEM START -->")
        source = searchForPlus(source, "HREF=\"")
        i = string.find(source, "\"")
        url = "http://www.powells.com" + stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Detail display
    # Find Price
    i = string.find(source, "class=\"price\"")

    if i != -1:
        value = searchForPlus(source, "class=\"price\"")
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripText(value[0:i])


    # Find Image, if any
    i = string.find(source, "<img id=\"cover\"")

    if i != -1:
        image = searchForPlus(source, "<img id=\"cover\"")
        image = searchForPlus(image, "src=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])

        i = string.find(image, "nopicture")

        if i != -1:
            image = ""

        i = string.find(image, "clearpixel")

        if i != -1:
            image = ""


    # Find Title
    source = searchForPlus(source, "font-size: 18px;")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    # Find Author
    source = searchForPlus(source, "author=")
    i = string.find(source, "\"")
    author = stripText(source[0:i])

    i = string.rfind(author, " ")

    if i != -1:
        author = stripText(author[i:]) + ", " + stripText(author[0:i])


    # Find ISBN
    isbn = ""
    i = string.find(source, "ISBN10:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "style=")
        rawdata = searchForPlus(rawdata, ">")
        i = string.find(rawdata, "<")
        isbn = stripText(rawdata[0:i])


    # Find Comments
    comments = ""
    i = string.rfind(source, "class=\"ir publishercomments\"")
    j = string.find(source, "class=\"ir publishercomments\"")

    if i != -1 and i != j:
        rawdata = stripText(source[i+1:])
        rawdata = searchForPlus(rawdata, "class=")
        rawdata = searchForPlus(rawdata, ">")
        i = string.find(rawdata, "</div>")
        comments = "Publisher Comments:\n" + stripText(rawdata[0:i])

    i = string.find(source, "class=\"ir synopsis\"")

    if i != -1:
        rawdata = searchForPlus(source, "class=\"ir synopsis\"")
        rawdata = searchForPlus(rawdata, "class=")
        rawdata = searchForPlus(rawdata, ">")
        i = string.find(rawdata, "</div>")

        if comments != "":
            comments = comments + "\n\n"

        comments = comments + "Synopsis:\n" + stripText(rawdata[0:i])

    i = string.find(source, "class=\"ir review\"")

    if i != -1:
        rawdata = searchForPlus(source, "class=\"ir review\"")
        rawdata = searchForPlus(rawdata, "class=")
        rawdata = searchForPlus(rawdata, ">")
        i = string.find(rawdata, "</div>")

        if comments != "":
            comments = comments + "\n\n"

        comments = comments + "Review:\n" + stripText(rawdata[0:i])

    comments = string.replace(comments, "&#8217;", "\'")
    comments = string.replace(comments, "&#8219;", "\'")
    comments = string.replace(comments, "&#8220;", "\"")
    comments = string.replace(comments, "&#8221;", "\"")
    comments = string.replace(comments, "&#8212;", "-")
    comments = string.replace(comments, "<div>", "")
    comments = string.replace(comments, "<cite>", "")
    comments = string.replace(comments, "</cite>", "")
    comments = string.replace(comments, "<i>", "")
    comments = string.replace(comments, "</i>", "")
    comments = string.replace(comments, "<p>", "\n\n")
    comments = string.replace(comments, "</p>", "")
    comments = string.replace(comments, "<br />", "\n")
    comments = string.replace(comments, "\n ", "\n")

    while (searchFor(comments, "<a ") != None):
        i = string.find(comments, "<a ")
        j = string.find(comments[i:], ">")
        comments = comments[0:i] + comments[i+j+1:]


    # Move on to product details if present.
    i = string.find(source, "<div id=\"product_details\">")

    if i != -1:
        source = searchForPlus(source, "<div id=\"product_details\">")


    # Find Author
    i = string.find(source, ">Author:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        author = stripText(rawdata[0:i])

    # Find ISBN
    if isbn == "":
        i = string.find(source, "ISBN:")

        if i != -1:
            rawdata = source[i:]
            rawdata = searchForPlus(rawdata, "style=")
            rawdata = searchForPlus(rawdata, ">")
            i = string.find(rawdata, "<")
            isbn = stripText(rawdata[0:i])


    # Find Publisher
    i = string.find(source, "Publisher:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        publisher = stripText(rawdata[0:i])


    # Find Format
    i = string.find(source, "Binding:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        format = stripText(rawdata[0:i])


    # Find Date
    i = string.find(source, "Publication Date:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        date = stripText(rawdata[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        if date[0:1] == "c":
            date = stripText(date[1:])
            copyDate = date


    # Find Date (Try 2)
    i = string.find(source, "Year:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        date = stripText(rawdata[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        if date[0:1] == "c":
            date = stripText(date[1:])
            copyDate = date


    # Find Copyright Date
    i = string.find(source, "Copyright:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        copyDate = stripText(rawdata[0:i])

        if fullDateFormat == "false":
            i = string.rfind(copyDate, " ")

            if i != -1:
                copyDate = stripText(copyDate[i+1:])


    # Find Pages
    i = string.find(source, "Pages:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        pages = stripText(rawdata[0:i])

        i = string.find(pages, " ")

        if (i != -1):
            pages = stripText(pages[0:i])


    # Find Dimensions
    i = string.find(source, "Dimensions:")

    if i != -1:
        rawdata = source[i:]
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        dimensions = stripText(rawdata[0:i])


    # Find Condition
    i = string.find(source, "Condition:")

    if i != -1:
        rawdata = searchForPlus(source, "Condition:")
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        condition = stripText(rawdata[0:i])


    # Find Place
    i = string.find(source, "Published in:")

    if i != -1:
        rawdata = searchForPlus(source, "Published in:")
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        place = stripText(rawdata[0:i])


    # Find Category
    i = string.find(source, "Subject:")

    if i != -1:
        category = ""
        rawdata = searchForPlus(source, "Subject:")
        rawdata = searchForPlus(rawdata, "\">")
        i = string.find(rawdata, "<")
        category = stripText(rawdata[0:i])

        i = string.find(rawdata, "Subject:")

        if i != -1:
            rawdata = searchForPlus(rawdata, "Subject:")
            rawdata = searchForPlus(rawdata, "\">")
            i = string.find(rawdata, "<")
            category = category + " : " + stripText(rawdata[0:i])


try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
