# ar scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import calcISBN10CheckDigit
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "N" # [Haai - default is no, check for yes]
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to Australia
    # place = "Australia"

    i = string.find(source, ">Sorry, your search returned no matches.")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return


    #Get detail page, if necessary
    i = string.find(source, "Search Results for")

    if i != -1:
        source = searchForPlus(source, "Search Results for")
        source = searchForPlus(source, "<a href=\"")
        i = string.find(source, "\">")
        url = "http://www.bookworld.com.au/" + stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Main extraction

    # Find Title
    title_tag1 = "<font class=\"titlebig\">"
    title_tag2 = "<color=\"#FFFFFF\"><b>"
    i = string.find(source, title_tag1)
    if (i != -1):
        marketinfo = searchForPlus(marketinfo, title_tag1)
    else:
        i = string.find(source, title_tag2)
        if (i != -1):
            marketinfo = searchForPlus(marketinfo, title_tag1)
        else:
            # trouble - no title
            print "NO TITLE FOUND - CHECK SCRAPER"
            return
    i = string.find(marketinfo, "<")
    title = stripText(marketinfo[0:i])


    # Find Image (before title in new format)
    i = string.find(source, "seekbooks")

    if (i != -1):
        source = source[i-50:]
        source = searchForPlus(source, "src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])


    # [Haai 31/07/05]
    # Order should be:
    # Author:
    # Comments
    # Price
    # Category
    # additional Category
    # Date Published
    # ISBN
    # Format
    # Pages
    # Stock info

    source = marketinfo

    # Find Author
    i = string.find(source, ">Author:")

    if (i != -1):
        source = searchForPlus(source, ">Author:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        author = stripText(source[0:i])
    else:
        author = "Various"
        # or unknown.... [Haai]

    # Find comments
    tempsource = searchForPlus(source, "<br>\r")
    tempsource = searchForPlus(tempsource, "<br>\r")
    tempsource = searchForPlus(tempsource, "\n")
    i = string.find(tempsource, "\n")
    comments = stripText(tempsource[0:i])

    comments = string.replace(comments, "<br>", "\n")
    comments = string.replace(comments, "<b>", "")
    comments = string.replace(comments, "</b>", "")
    comments = string.replace(comments, "\n ", "\n")


    # Find Price
    i = string.find(source, "Online Price")

    if (i == -1):
        i = string.find(source, "Publisher Price:")

    if (i != -1):
        source = source[i:]
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])

    # Find Category
    i = string.find(source, "Category:")

    if (i != -1):
        source = searchForPlus(source, "Category:")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        category = stripText(source[0:i])


    # Find ISBN
    i = string.find(source, "ISBN:")

    if (i != -1):
        tempsource = searchForPlus(source, "ISBN:")
        i = string.find(tempsource, "<")
        isbn = stripText(tempsource[0:i])

        if isbn.startswith("978") == 1:
            isbn = stripText(isbn[3:])

            if len(isbn) > 9:
                isbn = stripText(isbn[0:9])

            isbn = calcISBN10CheckDigit(isbn)


    # Find Format
    i = string.find(source, "Format:")

    if (i != -1):
        tempsource = searchForPlus(source, "Format:")
        tempsource = searchForPlus(tempsource, ">")
        i = string.find(tempsource, "<")
        format = stripText(tempsource[0:i])


    # Find Pages
    i = string.find(source, "pages:")

    if (i != -1):
        tempsource = searchForPlus(source, "pages:")
        i = string.find(tempsource, "<")
        pages = stripText(tempsource[0:i])



    # Find Publisher
    i = string.find(source, "Publisher:")

    if (i != -1):
        source = searchForPlus(source, "Publisher:")
        i = string.find(source, "<")
        publisher = stripText(source[0:i])


    # Find Date Published
    i = string.find(source, "Date Published:")

    if (i != -1):
        source = searchForPlus(source, "Date Published:")
        i = string.find(source, "<")
        date = stripText(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, "/")

            if i != -1:
                date = stripText(date[i+1:])


    # Available ?
    i = string.find(source, "This title is in stock")
    if i != -1:
        available = 'Y'


try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
