# Internet Book Store (UK) scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import calcISBN10CheckDigit
from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""

    # No place extraction default to UK
    # place = "Italia"


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Find List Price
    i = string.find(marketinfo, ">EURO")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">EURO")
	marketinfo = searchForPlus(marketinfo, "Prezzo di copertina EURO")
        i = string.find(marketinfo, "<")
        listprice = stripText(marketinfo[0:i])
	listprice = " " + listprice


    # Main extraction
    # Sometimes ISBN only hidden at top. Try that now,
    # Regular ISBN is at end, I'll try that too, see
    # further down.
    # Find ISBN, first try
    i = string.find(source, "name=\"isbn\"")

    if (i != -1):
        str = searchForPlus(source, "name=\"isbn\"")
        str = searchForPlus(str, "value=\"")
        i = string.find(str, "\"")
        isbn = stripText(str[0:i])

        if isbn.startswith("978") == 1:
            isbn = isbn[3:]
            isbn = stripText(isbn[0:len(isbn)-1])
            isbn = calcISBN10CheckDigit(isbn)

    # Find Image
    source = searchForPlus(source, "class=\"ttitolettobianco\"")
    i = string.find(source, "<img border")

    if i != -1 and i < 10000:
        source = searchForPlus(source, "<img border")
        source = searchForPlus(source, "src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

    # Find Title
    source = searchFor(source, ">Titolo<")
    source = searchForPlus(source, "\">")
    i = string.find(source, "<")
    title = stripText(source[0:i])

    if title == "Libro non presente nel catalogo":
        title = ""
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")
        return

    if title == ("Segnala questo libro a un amico"):
        source = searchFor(source, "td bgcolor=\"#336633")
        source = searchForPlus(source, "><b>")
        i = string.find(source, "</b>")
        title = stripText(source[0:i])

    # Find Author
    i = string.find(source, ">Autore<")

    if i != -1:
        source = searchForPlus(source, ">Autore<")
        source = searchForPlus(source, "><b>")
        i = string.find(source, "</b>")
        authors = stripText(source[0:i]) + ";"
        authorList = []

        while (searchFor(authors, ";") != None):
            i = string.find(authors, ";")
            author = stripText(authors[0:i])
            authorList.append(author)
            authors = searchForPlus(authors, ";")

            if len(authorList) > 0:
                author = authorList[0]

            if len(authorList) > 1:
                author2 = authorList[1]

            if len(authorList) > 2:
                author3 = authorList[2]

            if len(authorList) > 3:
                author4 = authorList[3]

            if len(authorList) > 4:
                author5 = authorList[4]

            if len(authorList) > 5:
                author6 = authorList[5]
    else:
        author = "No Author"


    # Find Price
    i = string.find(source, ">Prezzo<")

    if i != -1:
        source = searchForPlus(source, ">Prezzo<")
        source = searchForPlus(source, "&euro;")
        i = string.find(source, "<")
        value = stripText(source[0:i])
        value = " " + value

    # Find Pages, Format
    i = string.find(source, ">Dati<")

    if (i != -1):
        source = searchForPlus(source, ">Dati<")
        source = searchForPlus(source, "\">")
        i = string.find(source, ",")
        date = stripText(source[0:i])
        source = searchForPlus(source, ",")
        i = string.find(source, "p.")
        pages = stripText(source[0:i-1])
        source = stripText(source[i:])

    i = string.find(source, "ill.,")

    if i == -1:
        i = string.find(source, "p.,")

    if i != -1:
        source = stripText(source[i:])
        source = searchForPlus(source, ",")
        i = string.find(source, "</")
        format = stripText(source[0:i])



    # Find Publisher
    i = string.find(source, "ExactPublisher")

    if (i != -1):
        source = searchFor(source, "ExactPublisher")
        source = searchForPlus(source, "><b>")
        i = string.find(source, "</b>")
        publisher = stripText(source[0:i])

    # Find Collection
    i = string.find(source, "EXACTSERIES")


    if (i != -1):
        source = searchFor(source, "EXACTSERIES")
        source = searchForPlus(source, "><b>")
        i = string.find(source, "</b>")
        series = stripText(source[0:i])


    # Find Comments
    i = string.find(source, "<b>In sintesi</b>")
    y = string.find(source, "<b>Descrizione</b>")

    if (i != -1):
        source = searchForPlus(source, "<b>In sintesi</b>")

    if (y != -1):
        source = searchForPlus(source, "<b>Descrizione</b>")
 
    if (y != -1 or i != -1): 
        source = searchForPlus(source, "class=\"tcorpotesto\">")
        i = string.find(source, "</span>")  
        comments = stripText(source[0:i])

        i = string.find(comments, "</font><p>")

        if (i != -1):
            comments = stripText(comments[0:i])

        comments = string.replace(comments, "&#39;", "'")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "</i>", "")

    # Find ISBN
    source = searchForPlus(source, "ISBN=")

    if (source == None):
        return

    i = string.find(source, "&")
    isbn = stripText(source[0:i])


try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
