# chapters.ca scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to US
    # place = "Canada"

    i = string.find(source, ">The item you requested could not be found")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return

    i = string.find(source, "header_oops.gif")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return


    # Find optional fields, pricing info etc.
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">List Price:")

    if i != -1:
        listprice = searchForPlus(marketinfo, ">List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Check for availability
    i = string.find(source, ">Temporarily Unavailable<")

    if i != -1:
        available = "N";


    # Main extraction

    # Find Image
    i = string.find(source, "class=\"OTSProductImage\"")

    if i != -1:
        image = source[i:]
        image = searchForPlus(image, "src=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])

        while (searchFor(image, "&amp;") != None):
            i = string.find(image, "&amp;")
            image = image[0:i+1] + image[i+5:]

        i = string.find(image, "&sale=")

        if i != -1:
            j = string.find(image[i+1:], "&")
            image = image[0:i+6] + "0" + image[i+1+j:]

        i = string.find(image, "&header=")

        if i != -1:
            j = string.find(image[i+1:], "&")
            image = image[0:i] + image[i+1+j:]

        if string.find(image, "NotAvailable") != -1:
            image = ""


    # Find Title
    source = searchForPlus(source, "class=\"itemProduct\"")
    source = searchForPlus(source, "<h1>")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Author, believe it or not Chapters often
    # does not include the author!
    tag = ">Author:"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Translated by:"
        i = string.find(source, tag)

    if i != -1:
        source = searchForPlus(source, tag)
        i = string.find(source, "</p>")
        authors = stripText(source[0:i])
        authorList = []

        while (searchFor(authors, "href=") != None):
            authors = searchForPlus(authors, "href=")
            authors = searchForPlus(authors, "\">")
            i = string.find(authors, "<")
            author = stripText(authors[0:i])

            i = string.rfind(author, " ")

            if i != -1:
                author = author[i+1:] + ", " + author[0:i]

            authorList.append(author)


        if len(authorList) > 0:
            author = authorList[0]

        if len(authorList) > 1:
            author2 = authorList[1]

        if len(authorList) > 2:
            author3 = authorList[2]

        if len(authorList) > 3:
            author4 = authorList[3]

        if len(authorList) > 4:
            author5 = authorList[4]

        if len(authorList) > 5:
            author6 = authorList[5]


    # Find Price
    i = string.find(source, ">Our Price:")

    if i != -1:
        source = searchForPlus(source, ">Our Price:")
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find Format
    i = string.find(source, ">Format:")

    if i != -1:
        source = searchForPlus(source, ">Format:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        format = stripHTML(source[0:i])
        i = string.find(format, "")

        if i != -1:
            format = stripText(format[0:i])


    # Find Publication Date
    i = string.find(source, "Published:")

    if i != -1:
        source = searchForPlus(source, "Published:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        date = stripHTML(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Dimensions
    i = string.find(source, ">Dimensions:")

    if i != -1:
        source = searchForPlus(source, ">Dimensions:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        dimensions = stripHTML(source[0:i])

        i = string.find(dimensions, "Pages")

        if (i != -1):
            pages = stripText(dimensions[0:i])
            dimensions = stripText(dimensions[i+5:])

        if dimensions != "":
            if dimensions[0:1] == ",":
                dimensions = stripText(dimensions[1:])



    # Find ISBN
    i = string.find(source, "ISBN:")

    if i != -1:
        source = searchForPlus(source, "ISBN:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        isbn = stripText(source[0:i])


    # Find Publisher
    i = string.find(source, ">Published By:")

    if i != -1:
        source = searchForPlus(source, ">Published By:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        publisher = stripText(source[0:i])


    # Find Series
    i = string.find(source, ">Series Title:&nbsp;")

    if i != -1:
        source = searchForPlus(source, ">Series Title:&nbsp;")
        i = string.find(source, "<")
        series = stripText(source[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, "<h3>From the Publisher</h3>")

    if i != -1:
        source = searchForPlus(source, "<h3>From the Publisher</h3>")
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        comments = "From the Publisher\n\n" + stripText(source[0:i])

    i = string.find(source, "<h3>About the Author</h3>")

    if i != -1:
        source = searchForPlus(source, "<h3>About the Author</h3>")
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")

        if comments != "":
            comments = comments + "\n\n" + "About the Author\n\n" + stripText(source[0:i])
        else:
            comments = "About the Author\n\n" + stripText(source[0:i])

    if comments != "":
        while (searchFor(comments, "<div ") != None):
            i = string.find(comments, "<div ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "<a ") != None):
            i = string.find(comments, "<a ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "\r") != None):
            i = string.find(comments, "\r")
            comments = comments[0:i] + comments[i+1:]

        comments = string.replace(comments, "&#8217;", "'")
        comments = string.replace(comments, "&#8212;", "-")
        comments = string.replace(comments, "&#8220;", "\"")
        comments = string.replace(comments, "&#8221;", "\"")
        comments = string.replace(comments, "<B>", "")
        comments = string.replace(comments, "</B>", "")
        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<br>", "\n")
        comments = string.replace(comments, "<BR>", "\n")
        comments = string.replace(comments, "<Br>", "\n")
        comments = string.replace(comments, "<ul>", "\n")
        comments = string.replace(comments, "<UL>", "\n")
        comments = string.replace(comments, "</ul>", "\n\n")
        comments = string.replace(comments, "</UL>", "\n\n")
        comments = string.replace(comments, "<li>", "\n")
        comments = string.replace(comments, "<LI>", "\n")
        comments = string.replace(comments, "</li>", "")
        comments = string.replace(comments, "</LI>", "")
        comments = string.replace(comments, "</P>", "\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "</p>", "\n")
        comments = string.replace(comments, "<EM>", "")
        comments = string.replace(comments, "</EM>", "")
        comments = string.replace(comments, "<h2>", "")
        comments = string.replace(comments, "</h2>", "\n")
        comments = string.replace(comments, "<h3>", "")
        comments = string.replace(comments, "</h3>", "\n")
        comments = string.replace(comments, "</div>", "\n")
        comments = string.replace(comments, "</a>", "\n")
        comments = string.replace(comments, "<blockquote>", "")
        comments = string.replace(comments, "</blockquote>", "")

        while (searchFor(comments, "\t") != None):
            i = string.find(comments, "\t")
            comments = comments[0:i] + " " + comments[i+1:]

        while (searchFor(comments, "\n ") != None):
            i = string.find(comments, "\n ")
            comments = comments[0:i+1] + comments[i+2:]

        while (searchFor(comments, " \n") != None):
            i = string.find(comments, " \n")
            comments = comments[0:i] + comments[i+1:]

        while (searchFor(comments, "\r ") != None):
            i = string.find(comments, "\r ")
            comments = comments[0:i+1] + comments[i+2:]

        while (searchFor(comments, "\n\n\n") != None):
            i = string.find(comments, "\n\n\n")
            comments = comments[0:i] + comments[i+1:]


try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
