# amazon.co.uk scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import convertAuthor
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to UK
    # place = "United Kingdom"

    # If Amazon lists other editions, remove them.
    # complicates price extraction
    i = string.find(source, ">Other Editions")

    if i != -1:
        j = string.find(source[i:], "</table")
        source = source[0:i+1] + source[i+j:]

    i = string.find(source, "td.otherEditions")

    if i != -1:
        j = string.find(source[i:], "</table")
        source = source[0:i+1] + source[i+j:]


    # Alternate format?
    i = string.find(source, "class=\"sans\">")

    if i != -1:
        am2()
        return


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.co.uk Sales Rank:")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.co.uk Sales Rank:")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])


    # Find Reading level
    i = string.find(marketinfo, ">Reading level:<")

    if (i != -1):
        readinglevel = searchForPlus(marketinfo, ">Reading level:<")
        readinglevel = searchForPlus(readinglevel, ">")
        i = string.find(readinglevel, "<")
        readinglevel = stripText(readinglevel[0:i])


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find New
    i = string.find(marketinfo, "sdp_new")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_new")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, " New")

        if i != -1:
            newcount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, " New")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            newprice = stripText(usedinfo[0:i])


    # Find Collectible
    i = string.find(marketinfo, "sdp_coll")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, "sdp_coll")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "collectable<")

        if (i != -1):
            i = string.find(usedinfo, "collectable<")
            collectiblecount = stripText(usedinfo[0:i])

            usedinfo = searchForPlus(usedinfo, "collectable<")
            usedinfo = searchFor(usedinfo, ">")
            usedinfo = searchForPlus(usedinfo, ">")
            i = string.find(usedinfo, "<")
            collectibleprice = stripText(usedinfo[0:i])


    # Find Used
    i = string.find(marketinfo, "sdp_used")

    if i != -1:
        usedinfo = searchForPlus(marketinfo, "sdp_used")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br>")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used<")

        if i != -1:
            usedcount = stripText(usedinfo[0:i])

            try:
                usedinfo = searchForPlus(usedinfo, "used<")
                usedinfo = searchFor(usedinfo, ">")
                usedinfo = searchForPlus(usedinfo, ">")
                i = string.find(usedinfo, "<")
                usedprice = stripText(usedinfo[0:i])
            except:
                usedprice = ""


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"




    # Find Title
    source = searchForPlus(source, "<td width=\"75%\" valign=\"top\">")
    source = searchForPlus(source, "<b>")
    i = string.find(source, "<")
    title = stripHTML(source[0:i])

    i = string.find(title, "<br>")

    if (i != -1):
        title = stripText(title[0:i])


    # Find Author
    i = string.find(source, "uthor=")
    haveAuthor = 1
    authorList = []

    if i == -1:
        haveAuthor = 0

    if (i > 5000):
        # Author too far away, Amazon sometimes does not list the book
        # author.
        haveAuthor = 0

    if haveAuthor == 1:
        source = searchFor(source, "uthor=")

        i = string.find(source, "\n")
        authors = stripText(source[0:i])
        authorList = []

        while (searchFor(authors, "uthor=") != None):
            authors = searchForPlus(authors, "uthor=")
            i = string.find(authors, ">")
            tempValue = stripText(authors[0:i])
            i = string.find(tempValue, "%2C")

            if (i == -1):
                i = string.find(tempValue, "/")

            authorlast = stripText(tempValue[0:i])
            i = string.find(tempValue, "%2C%20")

            if (i != -1):
                tempValue = searchForPlus(tempValue, "%2C%20")
                i = string.find(tempValue, "/")
                authorfirst = stripText(tempValue[0:i])
                author = stripText(authorlast) + ", " + stripText(authorfirst)
            else:
                author = authorlast

            authorList.append(author)


        if len(authorList) > 0:
            author = authorList[0]

        if len(authorList) > 1:
            author2 = authorList[1]

        if len(authorList) > 2:
            author3 = authorList[2]

        if len(authorList) > 3:
            author4 = authorList[3]

        if len(authorList) > 4:
            author5 = authorList[4]

        if len(authorList) > 5:
            author6 = authorList[5]

    if title != "" and len(authorList) == 0:
        author = "No Author"


    # Find Image
    image = ""
    i = string.find(source, "uk-covers")

    if i != -1:
        source = searchForPlus(source, "uk-covers")
        source = searchForPlus(source, "<img src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])
    else:
        i = string.find(source, "images-eu")

        if i != -1 and i<1000:
            source = stripText(source[i-50:])
            i = string.find(source, " href=\"")

            if i != -1 and i<50:
                temp = searchForPlus(source, " href=\"")
                i = string.find(temp, "\"")
                image = stripText(temp[0:i])
                image = string.replace(image, ".LZZZ", ".MZZZ")
                image = string.replace(image, ".TZZZ", ".MZZZ")
            else:
                temp = searchForPlus(source, "<img src=\"")
                i = string.find(temp, "\"")
                image = stripText(temp[0:i])

                i = string.find(image, ",")

                if i != -1:
                    j = string.rfind(image, ",")
                    k = string.find(image[j:], "_")
                    image = stripText(image[0:i] + image[j+k:])

                i = string.find(image, "_PE")

                if i != -1:
                    j = string.find(image[i+1:], "_")
                    image = stripText(image[0:i] + image[i+j+1:])

            i = string.find(image, "ZZZ")

            if i == -1:
                image = ""
        else:
            i = string.find(source, "images.amazon.com")

            if (i != -1):
                source = searchForPlus(source, "images.amazon.com")
                source = searchForPlus(source, "<img src=\"")
                i = string.find(source, "\"")
                image = stripText(source[0:i])

    i = string.find(image, "arrow.gif")

    if i != -1:
        image = ""


    # Find Price
    i = string.find(source, "Our Price: <")

    if i != -1:
        temp = searchForPlus(source, "Our Price: <")
    else:
        temp = searchForPlus(source, "List Price:<")

    if temp != None:
        temp = searchFor(temp, "")
        i = string.find(temp, "<")
        value = stripText(temp[0:i])


    # Find Categories
    category = ""
    i = string.find(source, "<b>Category(ies):")

    if i != -1:
        str = searchForPlus(source, "<b>Category(ies):")
        i = string.find(str, "<p>")
        categories = stripText(str[0:i])
        i = string.find(categories, "<font")

        if i != -1:
            categories = stripText(categories[0:i])

        while (searchFor(categories, "href=") != None):
            categories = searchForPlus(categories, "href=")
            categories = searchForPlus(categories, ">")
            i = string.find(categories, "<")

            if (category != ""):
                category = category + " : "

            category = category + stripText(categories[0:i])
            categories = categories[i:]


    # Find user rating
    i = string.find(source, ">Avg. Customer Review:<")

    if i == -1:
        i = string.find(source, ">Average Customer Review:<")

    if i != -1:
        temp = stripText(source[i:])
        i = string.find(temp, "<p>")
        temp = stripText(temp[0:i])

        i = string.find(temp, "<img src=\"")

        if i != -1:
            temp = searchForPlus(temp, "<img src=\"")
            i = string.find(temp, "\"")
            temp = stripText(temp[0:i])
            i = string.rfind(temp, "/")

            if i != -1:
                temp = stripText(temp[i+1:])

            i = string.find(temp, ".")

            if i != -1:
                temp = stripText(temp[0:i])

            if temp.startswith("stars-") == 1:
                temp = searchForPlus(temp, "stars-")

            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find Attributes
    attrtag = ">Product Details:"
    i = string.find(source, attrtag)

    if i != -1:
        attributes = searchFor(source, attrtag)
        i = string.find(attributes, "<font face=")
        attributes = stripText(attributes[0:i])

        # Find Format
        format = searchForPlus(attributes, "<b>")
        i = string.find(format, "<")
        format = stripText(format[0:i])
        i = string.find(format, ":")

        if i != -1:
            format = stripText(format[0:i])


        # Find Pages
        attributes = searchForPlus(attributes, "<b>")
        i = string.find(attributes, "pages")

        if (i != -1):
            attributes = stripText(attributes[i-10:])
            attributes = stripText(searchForPlus(attributes, ">"))

            i = string.find(attributes, " ")

            if (i != -1):
                pages = stripText(attributes[0:i])

                i = string.find(pages, " ")

                if (i != -1):
                    pages = stripText(pages[0:i])


        # Find Publication Date
        date = searchForPlus(attributes, "(")
        i = string.find(date, ")")
        date = stripText(date[0:i])

        # [Haai]
        # sometimes there is no date supplied, and the scraper hits the
        # bracket of Category(ies), giving date ies
        if date[0:3] == "ies":
            date = ""

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        # Find Publisher
        i = string.find(attributes, ">Publisher:")

        if i != -1:
            publisher = searchForPlus(attributes, ">Publisher:")
            publisher = searchForPlus(publisher, ">")
            i = string.find(publisher, "<")
            publisher = stripHTML(publisher[0:i])
            publisher = string.replace(publisher, "&amp;", "&")

            i = string.find(publisher, ";")

            if i != -1:
                publisher = stripText(publisher[0:i])

        # Find ISBN
        i = string.find(attributes, "ISBN:")

        if (i != -1):
            isbn = searchForPlus(attributes, "ISBN:")
            isbn = searchForPlus(isbn, ">")
            i = string.find(isbn, "<")
            isbn = stripHTML(isbn[0:i])
            i = string.find(isbn, " ")

            if i != -1:
                isbn = stripText(isbn[0:i])


    # Find Comments
    i = string.find(source, ">Reviews<")

    if (i != -1):
        comments = ""
        source = searchForPlus(source, ">Reviews<")
        source = searchForPlus(source, "<br> ")
        i = string.find(source, "<p align=right>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<font")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<font")

        i = string.find(tempcomments, "<span")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<span")

        tempcomments = searchForPlus(tempcomments, ">")
        i = string.find(tempcomments, "\n")
        tempcomment = stripText(tempcomments[0:i])
        i = string.find(tempcomment, "<span")

        if (i != -1):
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        i = string.find(tempcomment, "... <")

        if (i != -1):
            tempcomment = stripText(tempcomment[0:i+3])

        while (searchFor(tempcomment, "<p> ") != None):
            i = string.find(tempcomment, "<p> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<P> ") != None):
            i = string.find(tempcomment, "<P> ")
            tempcomment = tempcomment[0:i+3] + tempcomment[i+4:]

        while (searchFor(tempcomment, "<br> ") != None):
            i = string.find(tempcomment, "<br> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        while (searchFor(tempcomment, "<BR> ") != None):
            i = string.find(tempcomment, "<BR> ")
            tempcomment = tempcomment[0:i+4] + tempcomment[i+5:]

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&#145;", "'")
        tempcomment = string.replace(tempcomment, "&#169;", "")
        tempcomment = string.replace(tempcomment, "&#8217;", "'")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&#8211;", "-")
        tempcomment = string.replace(tempcomment, "&#146;", "\'")
        tempcomment = string.replace(tempcomment, "<blockquote>", "")
        tempcomment = string.replace(tempcomment, "</blockquote>", "")

        while (searchFor(tempcomment, "<a ") != None):
            i = string.find(tempcomment, "<a ")
            j = string.find(tempcomment[i:], ">")

            if j == -1:
                j = 2

            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<A ") != None):
            i = string.find(tempcomment, "<A ")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<img src") != None):
            i = string.find(tempcomment, "<img src")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        while (searchFor(tempcomment, "<span") != None):
            i = string.find(tempcomment, "<span")
            j = string.find(tempcomment[i:], ">")
            tempcomment = tempcomment[0:i] + tempcomment[i+j+1:]

        comments = tempcomment


    if category == "":
        extractCategory()


def extractCategory():
    global title,author,format,bookclub,first,signed,read,date,publisher,place,isbn
    global value,category,copies,condition,rating,comments,source,image
    global fullDateFormat

    # Find category
    i = string.find(source, "<b>Browse for")

    if (i != -1):
        source = searchForPlus(source, "<b>Browse for")
    else:
        i = string.find(source, "<b> Browse for")

        if (i != -1):
            source = searchForPlus(source, "<b> Browse for")

    if (i != -1):
        i = string.find(source, "<br>")
        tempCatg = source[0:i]

    if (i != -1):
        category = ""

        while (searchFor(tempCatg, "href=") != None):
            tempCatg = searchForPlus(tempCatg, "href=")
            tempCatg = searchForPlus(tempCatg, ">")
            i = string.find(tempCatg, "<")

            if (category != ""):
                category = category + " : "

            category = category + stripText(tempCatg[0:i])
            tempCatg = tempCatg[i:]




def am2():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    print "Alternate Amazon format"


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find Ranking
    salesrank = ""
    i = string.find(marketinfo, ">Amazon.co.uk Sales Rank:")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">Amazon.co.uk Sales Rank:")
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<")
        salesrank = stripText(usedinfo[0:i])

        i = string.find(salesrank, " ")

        if i != -1:
            salesrank = stripText(salesrank[0:i])


    # Find Reading level
    i = string.find(marketinfo, ">Reading level:<")

    if (i != -1):
        readinglevel = searchForPlus(marketinfo, ">Reading level:<")
        readinglevel = searchForPlus(readinglevel, ">")
        i = string.find(readinglevel, "<")
        readinglevel = stripText(readinglevel[0:i])


    # Find New/Used together
    i = string.rfind(marketinfo, "/offer-listing/")

    if i != -1:
        usedinfo = stripText(marketinfo[i:])
        usedinfo = searchForPlus(usedinfo, ">")
        i = string.find(usedinfo, "<br />")
        usedinfo = stripText(usedinfo[0:i])

        i = string.find(usedinfo, "used &amp; new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used &amp; new")
            usedprice = searchFor(usedprice, "")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, "used & new")

        if i != -1:
            i = string.find(usedinfo, " ")
            usedcount = stripText(usedinfo[0:i])
            newcount = stripText(usedinfo[0:i])
            usedprice = searchForPlus(usedinfo, "used & new")
            usedprice = searchForPlus(usedprice, "class=\"price\"")
            usedprice = searchForPlus(usedprice, ">")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])
            newprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all used<")

        if (i != -1):
            usedprice = searchForPlus(usedinfo, ">See all used<")
            usedprice = searchFor(usedprice, "")
            i = string.find(usedprice, "<")
            usedprice = stripText(usedprice[0:i])

        i = string.find(usedinfo, ">See all new<")

        if (i != -1):
            newprice = searchForPlus(usedinfo, ">See all new<")
            newprice = searchFor(newprice, "")
            i = string.find(newprice, "<")
            newprice = stripText(newprice[0:i])

        i = string.find(usedinfo, ">See all collectible<")

        if (i != -1):
            collectibleprice = searchForPlus(usedinfo, ">See all collectible<")
            collectibleprice = searchFor(collectibleprice, ">")
            collectibleprice = searchForPlus(collectibleprice, ">")
            i = string.find(collectibleprice, "<")
            collectibleprice = stripText(collectibleprice[0:i])


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchForPlus(listprice, "class=\"listprice\"")
        listprice = searchForPlus(listprice, ">")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])
    else:
        i = string.find(marketinfo, "RRP:")

        if i != -1:
            listprice = searchForPlus(marketinfo, "RRP:")
            listprice = searchForPlus(listprice, "class=\"listprice\"")
            listprice = searchForPlus(listprice, ">")
            i = string.find(listprice, "<")
            listprice = stripText(listprice[0:i])


    # Find Buyer Waiting
    i = string.find(marketinfo, "buyer waiting!")

    if (i != -1):
        buyerwaiting = "Y"


    # Main extraction
    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        tempdata = searchForPlus(source, "registerImage(\"original_image\"")
        tempdata = searchForPlus(tempdata, "\"")
        i = string.find(tempdata, "\"")
        image = stripText(tempdata[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])


    # Find Title
    source = searchForPlus(source, "\"btAsinTitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripHTML(source[0:i])

    i = string.find(title, "<br>")

    if (i != -1):
        title = stripText(title[0:i])

    i = string.rfind(title, "(")

    if i != -1:
        title = stripText(title[0:i])


    # Find Author
    authorList = []
    tag = "&field-keywords="
    i = string.find(source, tag)

    if i == -1 or i > 5000:
        tag = "&field-author="
        i = string.find(source, tag)

    if i != -1 and i <= 5000:
        source = searchFor(source, tag)

        i = string.find(source, "\n")
        authors = stripText(source[0:i])
        authorList = []

        while (searchFor(authors, tag) != None):
            authors = searchForPlus(authors, tag)
            i = string.find(authors, ">")
            tempValue = stripText(authors[0:i])
            i = string.find(tempValue, "%2C")

            if (i == -1):
                i = string.find(tempValue, "/")

            tempValue = stripText(tempValue[0:i])
            tempValue = string.replace(tempValue, "%20", " ")
            tempValue = string.replace(tempValue, "%20", " ")
            tempValue = string.replace(tempValue, "%20", " ")
            tempValue = convertAuthor(tempValue)
            authorList.append(tempValue)


        if len(authorList) > 0:
            author = authorList[0]

        if len(authorList) > 1:
            author2 = authorList[1]

        if len(authorList) > 2:
            author3 = authorList[2]

        if len(authorList) > 3:
            author4 = authorList[3]

        if len(authorList) > 4:
            author5 = authorList[4]

        if len(authorList) > 5:
            author6 = authorList[5]

    if title != "" and len(authorList) == 0:
        author = "No Author"


    # Find Image
    if image == "":
        tag = "/images.amazon.com"
        i = string.find(source, tag)

        if i == -1 or i > 3000:
            tag = "images-amazon.com/"
            i = string.find(source, tag)

        if i != -1 and i < 3000:
            source = stripText(source[i-50:])
            temp = searchForPlus(source, "<img src=\"")
            i = string.find(temp, "\"")
            image = stripText(temp[0:i])

            i = string.find(image, ",")

            if i != -1:
                j = string.rfind(image, ",")
                k = string.find(image[j:], "_")
                image = stripText(image[0:i] + image[j+k:])

            i = string.find(image, "_PE")

            if i != -1:
                j = string.find(image[i+1:], "_")
                image = stripText(image[0:i] + image[i+j+1:])

            i = string.find(image, "ZZZ")

            if i == -1:
                image = ""

            image = string.replace(image, "_SCLZZ", "_SCMZZ")
            image = string.replace(image, "_AA400_", "_AA_")
            image = string.replace(image, "_AA240_", "_AA_")
            image = string.replace(image, "_AA140_", "_AA_")
            image = string.replace(image, "_AA180_", "_AA_")
            image = string.replace(image, "_AA136_", "_AA_")
            i = string.find(image, "_P")

            if i != -1:
                j = string.find(image[i:], "_SC")
                image = image[0:i] + image[i+j:]


    # Find Price
    tag = ">Our Price:<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Price:<"
        i = string.find(source, tag)

    if i == -1:
        tag = "List Price:<"
        i = string.find(source, tag)

    if i != -1:
        temp = searchForPlus(source, tag)
        temp = searchForPlus(temp, "class=\"price\"")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "<")
        value = stripText(temp[0:i])
    else:
        value = usedprice


    # Find user rating
    i = string.find(source, ">Avg. Customer Review:<")

    if i == -1:
        i = string.find(source, ">Average Customer Review:<")

    if i != -1:
        temp = stripText(source[i:])
        i = string.find(temp, "</span>")
        temp = stripText(temp[0:i])

        i = string.find(temp, "<img src=\"")

        if i != -1:
            temp = searchForPlus(temp, "<img src=\"")
            i = string.find(temp, "\"")
            temp = stripText(temp[0:i])
            i = string.rfind(temp, "/")

            if i != -1:
                temp = stripText(temp[i+1:])

            i = string.find(temp, ".")

            if i != -1:
                temp = stripText(temp[0:i])

            if temp.startswith("stars-") == 1:
                temp = searchForPlus(temp, "stars-")

            temp = string.replace(temp, "-", ".")

            if temp != "":
                rating = temp + " Stars"


    # Find Attributes
    attrtag = ">Product details<"
    i = string.find(source, attrtag)

    if i != -1:
        attributes = searchFor(source, attrtag)
        i = string.find(attributes, "</ul>")
        attributes = stripText(attributes[0:i])

        # Find Format
        format = searchForPlus(attributes, "<b>")
        i = string.find(format, "<")
        format = stripText(format[0:i])
        i = string.find(format, ":")

        if i != -1:
            format = stripText(format[0:i])


        # Find Pages
        attributes = searchForPlus(attributes, "<b>")
        i = string.find(attributes, "pages")

        if i != -1:
            attributes = stripText(attributes[i-10:])
            attributes = stripText(searchForPlus(attributes, ">"))

            i = string.find(attributes, " ")

            if (i != -1):
                pages = stripText(attributes[0:i])

                i = string.find(pages, " ")

                if (i != -1):
                    pages = stripText(pages[0:i])


        # Find Publisher
        i = string.find(attributes, ">Publisher:")

        if i != -1:
            attributes = searchForPlus(attributes, ">Publisher:")
            publisher = searchForPlus(attributes, ">")
            i = string.find(publisher, "<")
            publisher = stripHTML(publisher[0:i])
            publisher = string.replace(publisher, "&amp;", "&")

            i = string.find(publisher, ";")

            if i != -1:
                publisher = stripText(publisher[0:i])

            i = string.rfind(publisher, "(")

            if i != -1:
                publisher = stripText(publisher[0:i])


            # Find Publication Date
            i = string.find(attributes, "</li>")
            temp = stripText(attributes[0:i])
            i = string.rfind(temp, "(")

            if i != -1:
                date = stripText(temp[i+1:])
                i = string.find(date, ")")
                date = stripText(date[0:i])

                # [Haai]
                # sometimes there is no date supplied, and the scraper hits the
                # bracket of Category(ies), giving date ies
                if date[0:3] == "ies":
                    date = ""

                if fullDateFormat == "false":
                    i = string.rfind(date, " ")

                    if i != -1:
                        date = stripText(date[i+1:])

        # Find ISBN
        i = string.find(attributes, "ISBN:")

        if i != -1:
            isbn = searchForPlus(attributes, "ISBN:")
            isbn = searchForPlus(isbn, ">")
            i = string.find(isbn, "<")
            isbn = stripHTML(isbn[0:i])
            i = string.find(isbn, " ")

            if i != -1:
                isbn = stripText(isbn[0:i])
        else :
            i = string.find(attributes, "ISBN-10:")

            if (i != -1):
                isbn = searchForPlus(attributes, "ISBN-10:")
                isbn = searchForPlus(isbn, ">")
                i = string.find(isbn, "<")
                isbn = stripHTML(isbn[0:i])
                i = string.find(isbn, " ")

                if i != -1:
                    isbn = stripText(isbn[0:i])

        # Find Dimensions
        i = string.find(attributes, "Product Dimensions:")

        if i != -1:
            dimensions = searchForPlus(attributes, "Product Dimensions:")
            dimensions = searchForPlus(dimensions, ">")
            i = string.find(dimensions, "<")
            dimensions = stripHTML(dimensions[0:i])


    # Find Comments
    tag = ">Product Description<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">reviews<"
        i = string.find(source, tag)

    if i == -1:
        tag = ">Reviews<"
        i = string.find(source, tag)

    if i != -1:
        comments = ""
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "See all Product Description")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<font")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<font")

        i = string.find(tempcomments, "<span")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<span")

        tempcomments = searchForPlus(tempcomments, ">")
        i = string.find(tempcomments, "<span")

        if (i != -1):
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        i = string.find(tempcomments, "... <")

        if (i != -1):
            tempcomments = stripText(tempcomments[0:i+3])

        tempcomments = string.replace(tempcomments, "\n", "")

        while (searchFor(tempcomments, "  ") != None):
            i = string.find(tempcomments, "  ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<p> ") != None):
            i = string.find(tempcomments, "<p> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<P> ") != None):
            i = string.find(tempcomments, "<P> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<br> ") != None):
            i = string.find(tempcomments, "<br> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        while (searchFor(tempcomments, "<BR> ") != None):
            i = string.find(tempcomments, "<BR> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        tempcomments = string.replace(tempcomments, "</a>", "")
        tempcomments = string.replace(tempcomments, "</A>", "")
        tempcomments = string.replace(tempcomments, "<p>", "\n\n")
        tempcomments = string.replace(tempcomments, "<P>", "\n\n")
        tempcomments = string.replace(tempcomments, "<BR>", "\n")
        tempcomments = string.replace(tempcomments, "<br>", "\n")
        tempcomments = string.replace(tempcomments, "<br />", "\n")
        tempcomments = string.replace(tempcomments, "<i>", "")
        tempcomments = string.replace(tempcomments, "</i>", "")
        tempcomments = string.replace(tempcomments, "<I>", "")
        tempcomments = string.replace(tempcomments, "</I>", "")
        tempcomments = string.replace(tempcomments, "<b>", "")
        tempcomments = string.replace(tempcomments, "</b>", "")
        tempcomments = string.replace(tempcomments, "<em>", "")
        tempcomments = string.replace(tempcomments, "</em>", "")
        tempcomments = string.replace(tempcomments, "</font>", "")
        tempcomments = string.replace(tempcomments, "</span>", "")
        tempcomments = string.replace(tempcomments, "&copy;", "")
        tempcomments = string.replace(tempcomments, "&#145;", "'")
        tempcomments = string.replace(tempcomments, "&#169;", "")
        tempcomments = string.replace(tempcomments, "&#8217;", "'")
        tempcomments = string.replace(tempcomments, "&quot;", "\"")
        tempcomments = string.replace(tempcomments, "&#8211;", "-")
        tempcomments = string.replace(tempcomments, "&#146;", "\'")
        tempcomments = string.replace(tempcomments, "<blockquote>", "")
        tempcomments = string.replace(tempcomments, "</blockquote>", "")

        while (searchFor(tempcomments, "\n ") != None):
            i = string.find(tempcomments, "\n ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<a ") != None):
            i = string.find(tempcomments, "<a ")
            j = string.find(tempcomments[i:], ">")

            if j == -1:
                j = 2

            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<A ") != None):
            i = string.find(tempcomments, "<A ")
            j = string.find(tempcomments[i:], ">")
            tempcomments = stripText(tempcomments[0:i] + tempcomments[i+j+1:])

        while (searchFor(tempcomments, "<img src") != None):
            i = string.find(tempcomments, "<img src")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<span") != None):
            i = string.find(tempcomments, "<span")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        tempcomments = stripText(tempcomments);
        if tempcomments.endswith("See all reviews") == 1:
            i = string.rfind(tempcomments, "See all reviews")
            tempcomments = stripText(tempcomments[0:i])

        comments = tempcomments

    if comments.endswith("See all Reviews") == 1:
        i = string.find(comments, "See all Reviews")
        comments = stripText(comments[0:i])


    # Find category
    i = string.find(source, ">Look for similar items by subject<")

    if i != -1:
        source = searchForPlus(source, ">Look for similar items by subject<")
        source = searchForPlus(source, "type=\"checkbox\"")
        source = searchForPlus(source, "value=\"")
        i = string.find(source, "\"")
        category = stripText(source[0:i])





try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
