# amazon.com scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import convertAuthor
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML
from    scrapers.scrapers import stripNewLines


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""


    # No place extraction default to US
    # place = "United States"


    i = string.find(source, "class=\"sans\"> Customers who viewed this item...")

    if i != -1:
        # Sometimes Amazon returns the multiple item referral page.
        # Get detail page.
        i = string.find(source, ">Read More<")
        source = stripText(source[i-200:])
        source = searchForPlus(source, "href=\"")
        i = string.find(source, "\"")
        source = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(source)
        t = open("trace.html", "w")
        t.write(source)
        t.close()

    # If Amazon lists other editions, remove them.
    # complicates price extraction
    i = string.find(source, ">Other Editions")

    if i != -1:
        j = string.find(source[i:], "</table")
        source = source[0:i+1] + source[i+j:]

    i = string.find(source, "class=\"otherEditions\"")

    if i != -1:
        j = string.find(source[i:], "</table")
        source = source[0:i+1] + source[i+j:]



    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find Ranking
    i = string.find(marketinfo, ">Amazon.com Sales Rank:")

    if i != -1:
        usedinfo = searchForPlus(marketinfo, ">Amazon.com Sales Rank:")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])

        if salesrank.startswith("#") == 1:
            salesrank = searchForPlus(salesrank, "#")

        i = string.find(salesrank, " ")

        if i != -1:
            salesrank = stripText(salesrank[0:i])


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find Reading level
    i = string.find(marketinfo, ">Reading level:<")

    if (i != -1):
        readinglevel = searchForPlus(marketinfo, ">Reading level:<")
        readinglevel = searchForPlus(readinglevel, ">")
        i = string.find(readinglevel, "<")
        readinglevel = stripText(readinglevel[0:i])


    # Find New/Used together
    i = string.rfind(marketinfo, "/offer-listing/")

    if i != -1:
        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "</span>")
        usedinfo = stripText(usedinfo[0:i+7])

        i = string.find(usedinfo, "used &amp; new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used &amp; new")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "used & new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used & new")
            usedprice = searchFor(usedprice, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "available offers<")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchFor(usedinfo, ">$")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "See ")

        if i != -1:
            usedinfo = searchForPlus(usedinfo, "See ")
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            usedprice = stripText(usedinfo[0:i])
            newprice = usedprice

        i = string.find(usedinfo, ">See all new<")

        if (i != -1):
            newprice = searchForPlus(usedinfo, ">See all new<")
            newprice = searchFor(newprice, ">$")
            newprice = searchForPlus(newprice, ">")
            i = string.find(newprice, "<")
            newprice = stripText(newprice[0:i])

        i = string.find(usedinfo, ">See all collectible<")

        if (i != -1):
            collectibleprice = searchForPlus(usedinfo, ">See all collectible<")
            collectibleprice = searchFor(collectibleprice, ">$")
            collectibleprice = searchForPlus(collectibleprice, ">")
            i = string.find(collectibleprice, "<")
            collectibleprice = stripText(collectibleprice[0:i])


    # Find New
    i = string.find(marketinfo, "offers/new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "new<")

        if (i != -1):
            i = string.find(usedinfo, "new<")
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "new<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "offers/collectible")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/collectible")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectible<")

        if (i != -1):
            i = string.find(usedinfo, "collectible<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectible<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "offers/used")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "offers/used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if (i != -1):
            i = string.find(usedinfo, "used<")
            usedcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "used<")
            usedinfo = searchFor(usedinfo, ">$")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            usedprice = stripText(usedinfo[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Main extraction
    titleauthors = searchForPlus(source, "<title>")
    i = string.find(titleauthors, "</title>")
    titleauthors = stripText(titleauthors[0:i])
    i = string.rfind(titleauthors, ":")
    titleauthors = stripText(titleauthors[i+1:])


    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])
        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])

        i = string.find(image, "_SL280")

        if i != -1:
            image = stripText(image[0:i] + "_SL240" + image[i+6:])


    # Find Title
    tag = "\"btAsinTitle\""
    source = searchForPlus(source, tag)
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    if title.startswith("*OP ") == 1:
        title = stripText(title[3:])

    if title.startswith("(") == 1:
        i = string.find(title, ")")

        if i != -1:
            title = stripText(title[i+1:])

    i = string.rfind(title, "(")

    if i != -1:
        title = stripText(title[0:i])

    if title.endswith(" [BARGAIN PRICE]") == 1:
        i = string.find(title, " [BARGAIN PRICE]")
        title = stripText(title[0:i])


    # Find Series
    i = string.rfind(title, "(")

    if i != -1:
        series = stripText(title[i+1:])
        title = stripText(title[0:i])

        i = string.find(series, ")")

        if i != -1:
            series = stripText(series[0:i])


    # Find Author
    authorTag = "author-exact="
    i = string.find(source, authorTag)

    if i == -1 or i>5000:
        authorTag = "author="
        i = string.find(source, authorTag)

    haveAuthor = 1
    authorList = []

    if i == -1:
        haveAuthor = 0

    if (i > 5000):
        # Author too far away, Amazon sometimes does not list the book
        # author.
        haveAuthor = 0

    if haveAuthor == 1:
        source = searchFor(source, authorTag)

        i = string.find(source, "\n")
        authors = stripText(source[0:i])

        while (searchFor(authors, authorTag) != None):
            authors = searchForPlus(authors, authorTag)
            i = string.find(authors, authorTag)

            if (i != -1):
                tempAuthor = stripText(authors[0:i])
            else :
                tempAuthor = stripText(authors)

            tempAuthor = searchForPlus(tempAuthor, "\">")

            i = string.find(tempAuthor, "<")
            author = stripText(tempAuthor[0:i])
            author = convertAuthor(author)

            i = string.find(tempAuthor, "</a>")

            if (i != -1):
                tempValue = searchFor(tempAuthor, "</a>")

                if tempValue[0:6] == "</a> (":
                    tempValue = searchForPlus(tempAuthor, "</a> ")
                    i = string.find(tempValue, ")")
                    author = author + " " + stripText(tempValue[0:i+1])

            i = string.find(author, "(Author)")

            if (i != -1):
                author = stripText(author[0:i])

            authorList.append(author)


        if len(authorList) > 0:
            author = authorList[0]

        if len(authorList) > 1:
            author2 = authorList[1]

        if len(authorList) > 2:
            author3 = authorList[2]

        if len(authorList) > 3:
            author4 = authorList[3]

        if len(authorList) > 4:
            author5 = authorList[4]

        if len(authorList) > 5:
            author6 = authorList[5]

    if title != "" and len(authorList) == 0:
        print "Getting author from <title>"
        titleauthors = titleauthors + ","

        while (searchFor(titleauthors, ",") != None):
            i = string.find(titleauthors, ",")
            author = stripText(titleauthors[0:i])
            author = convertAuthor(author)

            if author != "Books":
                authorList.append(author)

            titleauthors = searchForPlus(titleauthors, ",")            


        if len(authorList) > 0:
            author = authorList[0]

        if len(authorList) > 1:
            author2 = authorList[1]

        if len(authorList) > 2:
            author3 = authorList[2]

        if len(authorList) > 3:
            author4 = authorList[3]

        if len(authorList) > 4:
            author5 = authorList[4]

        if len(authorList) > 5:
            author6 = authorList[5]

    if title != "" and len(authorList) == 0:
        author = "No Author"


    # Find Image
    if image == "":
        tempdata = searchForPlus(source, "\n")
        i = string.find(tempdata, "/images.amazon.com/")

        if i > 0 and i <= 2500:
            tempdata = stripText(tempdata[i-25:])

        i = string.find(tempdata, "<img src=\"")

        if i <= 300:
            tempdata = searchForPlus(tempdata, "<img src=\"")
            i = string.find(tempdata, "\"")
            image = stripText(tempdata[0:i])

            i = string.find(image, "free-shipping")

            if i != -1:
                image = ""

            i = string.find(image, "/promotions")

            if i != -1:
                image = ""

            i = string.find(image, "/customer-reviews")

            if i != -1:
                image = ""

            i = string.find(image, "no-image")

            if i != -1:
                image = ""

            i = string.find(image, "no-img-lg.gif")

            if i != -1:
                image = ""

            image = string.replace(image, "_SCLZZ", "_SCMZZ")
            image = string.replace(image, "_AA400_", "_AA_")
            image = string.replace(image, "_AA240_", "_AA_")
            image = string.replace(image, "_AA140_", "_AA_")
            image = string.replace(image, "_AA180_", "_AA_")
            image = string.replace(image, "_AA136_", "_AA_")
            i = string.find(image, "_P")

            if i != -1:
                j = string.find(image[i:], "_SC")
                image = image[0:i] + image[i+j:]


    # Find Price
    i = string.find(source, ">Price:<")

    if (i == -1):
        i = string.find(source, "class=price")

    if (i == -1):
        i = string.find(source, "List Price:")

    if i != -1:
        source = source[i:]

        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

        # Price can contain special charges, ignore
        i = string.find(value, "+")

        if (i != -1):
            value = stripText(value[0:i])
    else:
        value = usedprice


    # Find keywords
    i = string.find(source, ">Tags Customers Associate with Similar Products<")

    if i == -1:
        i = string.find(source, "class=\"tags-piles-feedback\"")

        if i != -1:
            tags = searchForPlus(source, "class=\"tags-piles-feedback\"")
            i = string.find(tags, "</table>")
            tags = stripText(tags[0:i])

            while (searchFor(tags, "tag=\"") != None):
                tags = searchForPlus(tags, "tag=\"")
                i = string.find(tags, "\"")
                tag = stripText(tags[0:i])
                
                if keywords == "":
                    keywords = tag
                else:
                    keywords = keywords + ", " + tag


    # Find attributes, first (short) attempt
    i = string.find(source, "<br clear=\"left\">")

    if (i != -1):
        attributes = searchForPlus(source, "<br clear=\"left\">")
        i = string.find(attributes, "class=")

        if (i != -1):
            attributes = stripText(attributes[0:i])

        # Find Format
        i = string.find(attributes, "<b>")

        if (i != -1):
            attributes = searchForPlus(attributes, "<b>")
            i = string.find(attributes, "<")
            format = stripText(attributes[0:i])


        # Find Pages
        i = string.find(attributes, "pages")

        if (i != -1):
            attributes = stripText(attributes[i-10:])
            attributes = stripText(searchForPlus(attributes, "- "))

        i = string.find(attributes, " ")

        if (i != -1):
            pages = stripText(attributes[0:i])

            i = string.find(pages, " ")

            if (i != -1):
                pages = stripText(pages[0:i])

            try:
                temp = string.atoi(pages)
            except:
                pages = ""



        # Find Publication Date
        i = string.find(attributes, "(")

        if (i != -1):
            attributes = searchForPlus(attributes, "(")
            i = string.find(attributes, ")")
            date = stripText(attributes[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, " ")

                if i != -1:
                    date = stripText(date[i+1:])


        # Find Publisher
        i = string.find(attributes, "<font")

        if (i != -1):
            attributes = searchForPlus(attributes, "<font")
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            publisher = stripText(attributes[0:i])
            publisher = stripHTML(attributes[0:i])

            i = string.find(publisher, ";")

            if i != -1:
                publisher = stripText(publisher[0:i])


        # Find ISBN
        i = string.find(attributes, "ISBN:")

        if i != -1:
            attributes = searchForPlus(attributes, "ISBN:")
            i = string.find(attributes, "\n")
            temp = stripText(attributes[0:i])

            if temp != "":
                isbn = temp


        # Find Dimensions
        i = string.find(attributes, "; Dimensions")

        if i != -1:
            attributes = searchForPlus(attributes, "; Dimensions")
            attributes = searchForPlus(attributes, ":")
            i = string.find(attributes, "<")
            dimensions = stripText(attributes[0:i])


    # Find Comments
    i = string.find(source, ">Editorial Reviews<")

    if (i != -1):
        comments = ""
        tempcomment = searchForPlus(source, ">Editorial Reviews<")
        tempcomment = searchForPlus(tempcomment, "\">")
        i = string.find(tempcomment, "</div>")
        tempcomment = tempcomment[0:i]

        i = string.find(tempcomment, "See all Editorial Reviews<")

        if i != -1:
            tempcomment = tempcomment[0:i]

        i = string.find(tempcomment, "<hr ")

        if i != -1:
            tempcomment = tempcomment[0:i]

        i = string.find(tempcomment, "<p clear=")

        if i != -1:
            tempcomment = tempcomment[0:i]

        while (searchFor(tempcomment, "\n") != None):
            tempcomment = string.replace(tempcomment, "\n", " ")

        tempcomment = string.replace(tempcomment, "  ", " ")
        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<p/>", "\n\n")
        tempcomment = string.replace(tempcomment, "</p>", "")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "</P>", "")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<br/>", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<EM>", "")
        tempcomment = string.replace(tempcomment, "</EM>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "<div>", "")
        tempcomment = string.replace(tempcomment, "</div>", "")
        tempcomment = string.replace(tempcomment, "<em>", "")
        tempcomment = string.replace(tempcomment, "</em>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#130;", "")
        tempcomment = string.replace(tempcomment, "&#146;", "'")
        tempcomment = string.replace(tempcomment, "&#147;", "\"")
        tempcomment = string.replace(tempcomment, "&#148;", "\"")
        tempcomment = string.replace(tempcomment, "&#151;", "-")
        tempcomment = string.replace(tempcomment, "&#150;", "-")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#174;", "")
        tempcomment = string.replace(tempcomment, "&#191;", "")
        tempcomment = string.replace(tempcomment, "&#194;", "")
        tempcomment = string.replace(tempcomment, "&#8212;", "-")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&#8220;", "\"")
        tempcomment = string.replace(tempcomment, "&#8221;", "\"")
        tempcomment = string.replace(tempcomment, "&#8230;", "...")
        tempcomment = string.replace(tempcomment, "&#x92;", "'")
        tempcomment = string.replace(tempcomment, "&#x97;", "-")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&#xAE;", "")
        tempcomment = string.replace(tempcomment, "&mdash;", "--")
        tempcomment = string.replace(tempcomment, "<ul>", "\n")
        tempcomment = string.replace(tempcomment, "<ol>", "\n")
        tempcomment = string.replace(tempcomment, "</ul>", "\n\n")
        tempcomment = string.replace(tempcomment, "</ol>", "\n\n")
        tempcomment = string.replace(tempcomment, "<UL>", "\n")
        tempcomment = string.replace(tempcomment, "</UL>", "\n\n")
        tempcomment = string.replace(tempcomment, "<LI>", "\n    ")
        tempcomment = string.replace(tempcomment, "</LI>", "")
        tempcomment = string.replace(tempcomment, "<li>", "\n    ")
        tempcomment = string.replace(tempcomment, "</li>", "")
        tempcomment = string.replace(tempcomment, "<blockquote>", "")
        tempcomment = string.replace(tempcomment, "</blockquote>", "")
        tempcomment = string.replace(tempcomment, "<strong>", "")
        tempcomment = string.replace(tempcomment, "</strong>", "")
        tempcomment = string.replace(tempcomment, "<sup>", "")
        tempcomment = string.replace(tempcomment, "</sup>", "")

        while (searchFor(tempcomment, "\n\n\n") != None):
            tempcomment = string.replace(tempcomment, "\n\n\n", "\n\n")

        while (searchFor(tempcomment, "\n ") != None):
            tempcomment = string.replace(tempcomment, "\n ", "\n")

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<p ") != None):
            i = string.find(tempcomment, "<p ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<P ") != None):
            i = string.find(tempcomment, "<P ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<image ") != None):
            i = string.find(tempcomment, "<image ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<hr") != None):
            i = string.find(tempcomment, "<hr")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                break;

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        comments = tempcomment

    if comments.endswith("See all Editorial Reviews") == 1:
        i = string.find(comments, "See all Editorial Reviews")
        comments = stripText(comments[0:i])


    # Find attributes, second (long) attempt
    i = string.find(source, ">Product Details<")

    if i != -1:
        attributes = stripText(source[i:])
        i = string.find(attributes, "</ul>")
        attributes = stripText(attributes[0:i])

        # Find Format
        i = string.find(attributes, "<b>")

        if (i != -1):
            format = searchForPlus(attributes, "<b>")
            i = string.find(format, "<")
            format = stripText(format[0:i])

            if format == "Reading level:":
                attributes = searchForPlus(attributes, "<b>")
                format = searchForPlus(attributes, "<b>")
                i = string.find(format, "<")
                format = stripText(format[0:i])

            if format == "Format:":
                attributes = searchForPlus(attributes, "<b>")
                format = searchForPlus(attributes, ">")
                i = string.find(format, "<")
                format = stripText(format[0:i])

            i = string.find(format, ":")

            if (i != -1):
                format = stripText(format[0:i])

            if format == "Publisher":
                format = ""


        # Find Kindle Dimensions
        i = string.find(attributes, ">File Size:<")

        if i != -1:
            attributes = searchForPlus(attributes, ">File Size:<")
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            dimensions = stripText(attributes[0:i])


        # Find Pages
        i = string.find(attributes, "pages")

        if i != -1:
            attributes = stripText(attributes[i-10:])
            attributes = stripText(searchForPlus(attributes, ">"))

            i = string.find(attributes, " ")

            if (i != -1):
                pages = stripText(attributes[0:i])

                i = string.find(pages, " ")

                if (i != -1):
                    pages = stripText(pages[0:i])

                try:
                    temp = string.atoi(pages)
                except:
                    pages = ""


        # Find Publisher
        i = string.find(attributes, ">Publisher:")

        if (i != -1):
            attributes = searchForPlus(attributes, ">Publisher:")
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            publisher = stripText(attributes[0:i])
            publisher = stripHTML(attributes[0:i])

            i = string.find(publisher, ";")

            if i != -1:
                editionNumber = stripText(publisher[i+1:])

                if editionNumber[0:1] != "(":
                    j = string.find(editionNumber, "(")
                    editionNumber = stripText(editionNumber[0:j])
                else:
                    editionNumber = ""

                publisher = stripText(publisher[0:i])

            i = string.find(publisher, "(")

            if i != -1:
                publisher = stripText(publisher[0:i])


        # Find Publication Date
        i = string.find(attributes, "</li>")
        temp = stripText(attributes[0:i])
        
        i = string.rfind(temp, "(")

        if i != -1:
            date = stripText(temp[i+1:])
            i = string.find(date, ")")
            date = stripText(date[0:i])

            if fullDateFormat == "false":
                i = string.rfind(date, " ")

                if i != -1:
                    date = stripText(date[i+1:])


        # Find ISBN
        tag = "ISBN:"
        i = string.find(attributes, tag)

        if i == -1:
            tag = "ISBN-10:"
            i = string.find(attributes, tag)

        if i == -1:
            tag = "ASIN:"
            i = string.find(attributes, tag)

        if i != -1:
            attributes = searchForPlus(attributes, tag)
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            temp = stripText(attributes[0:i])

            if temp != "":
                isbn = temp

                i = string.find(isbn, ";")

                if (i != -1):
                    isbn = stripText(isbn[0:i])

                i = string.find(isbn, "<")

                if (i != -1):
                    isbn = stripText(isbn[0:i])


        # Find Dimensions
        i = string.find(attributes, "Product Dimensions:")

        if i != -1:
            attributes = searchForPlus(attributes, "Product Dimensions:")
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            dimensions = stripText(attributes[0:i])


        # Find Shipping weight
        i = string.find(attributes, ">Shipping Weight:<")

        if i != -1:
            attributes = searchForPlus(attributes, ">Shipping Weight:<")
            attributes = searchForPlus(attributes, ">")
            i = string.find(attributes, "<")
            weight = stripText(attributes[0:i])

            i = string.find(weight, "(")

            if i != -1:
                weight = stripText(weight[0:i])

            if weight.endswith(".") == 1:
                i = string.rfind(weight, ".")

                if i != -1:
                    weight = stripText(weight[0:i])


    # Find Series
    i = string.find(source, "series-numbers.gif")

    if i != -1:
        source = searchForPlus(source, "series-numbers.gif")
        source = searchForPlus(source, "<a href=")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        series = stripText(source[0:i])


    # Find user rating
    tag = "/>See all "
    i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        i = string.find(source, "/customer-reviews/ratings/stars")

        if i != -1:
            temp = searchFor(temp, "/customer-reviews/ratings/stars")
            i = string.find(temp, ".")
            temp = stripText(temp[0:i])
            temp = searchForPlus(temp, "stars-")
            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find category
    i = string.find(source, ">Look for Similar Items by Subject<")

    if i != -1:
        source = searchForPlus(source, ">Look for Similar Items by Subject<")
        source = searchForPlus(source, "type=\"checkbox\"")
        source = searchForPlus(source, "value=\"")
        i = string.find(source, "\"")
        category = stripText(source[0:i])
    else:
        i = string.find(source, ">Look for Similar Items by Category<")

        if i != -1:
            source = searchForPlus(source, ">Look for Similar Items by Category<")
            source = searchForPlus(source, "<ul>")
            source = searchForPlus(source, "<li>")
            i = string.find(source, "</ul>")
            tempCatg = source[0:i]
            i = string.find(tempCatg, "<li>")

            if (i != -1):
                tempCatg = stripText(tempCatg[0:i])

            category = ""

            while (searchFor(tempCatg, "href=") != None):
                tempCatg = searchForPlus(tempCatg, "href=")
                tempCatg = searchForPlus(tempCatg, ">")
                i = string.find(tempCatg, "<")
                temp = stripText(tempCatg[0:i])

                if temp != "Subjects":
                    if (category != ""):
                        category = category + " : "

                    category = category + temp

                tempCatg = tempCatg[i:]
        else:
            i = string.find(source, ">Search for")

            if (i != -1):
                source = searchForPlus(source, ">Search for")
                tempCatg = searchForPlus(source, "href=")
                tempCatg = searchForPlus(tempCatg, ">")
                i = string.find(tempCatg, "<")
                category = stripText(tempCatg[0:i])




try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
