# half.com scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML
from    scrapers.scrapers import stripNewLines


def extract():
    global title,author,isbn,publisher,format,first,signed,date,place
    global copies,rating,condition,category,read,pflag,eflag,value
    global comments,dateEntered,dataSource,cart,ordered
    global lccn,dewey,userNumber,copyDate,valueDate,location
    global series,pages,keywords,dimensions
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global author2,author3,author4,author5,author6
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,readinglevel,salesrank,available
    global buyerwaiting,editionNumber,weight,image
    global fullDateFormat,source


    # Defaults
    first               = "N"
    signed              = "N"
    read                = "N"
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    readinglevel        = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"
    weight              = ""


    # No place extraction default to US
    # place = "United States"


    #Check for not found
    i = string.find(source, "We were unable to find any matches for ")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")

    #Get detail page, if necessary
    #half.com sometimes has multiple matches for the same ISBN
    i = string.find(source, "items related to the IBSN")

    if i != -1:
        source = searchForPlus(source, "items related to the IBSN")
        source = searchForPlus(source, "<A HREF=")
        source = searchForPlus(source, "<A HREF=")

        # URL delimiter is somtimes single quote, sometimes
        # double quote, so use what is there
        i = string.find(source[1:], source[0:1])
        source = source[1:]
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    #Get detail page, if necessary
    #half.com sometimes has multiple matches for the same ISBN
    i = string.find(source, " products found for")

    if i != -1:
        source = searchForPlus(source, " products found for")
        source = searchForPlus(source, "align=\"center\"")
        source = searchForPlus(source, "<a href=")

        # URL delimiter is somtimes single quote, sometimes
        # double quote, so use what is there
        i = string.find(source[1:], source[0:1])
        source = source[1:]
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    #Get detail page, if necessary
    i = string.find(source, "Search Results<")

    if i != -1:
        source = searchForPlus(source, "Search Results<")
        source = searchForPlus(source, "<A HREF=")

        # URL delimiter is sometimes single quote, sometimes
        # double quote, so use what is there
        i = string.find(source[1:], source[0:1])
        source = source[1:]
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])

        i = string.find(listprice, "(")

        if (i != -1):
            listprice = stripText(listprice[0:i])


    # Find New price
    i = string.find(marketinfo, "header_items_brandNew.gif")

    if (i != -1):
        try:
            newprice = searchForPlus(marketinfo, "header_items_brandNew.gif")
            newprice = searchForPlus(newprice, "<table width=\"100%\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">")
            i = string.find(newprice, "</table>")
            newprice = stripText(newprice[0:i])
            newprice = searchFor(newprice, "$")
            
            i = string.find(newprice, "<")
            
            if (i != -1):
                newprice = stripText(newprice[0:i])
        except:
            newprice = ""


    # Find Used price
    i = string.find(marketinfo, "header_items_likeNew.gif")

    if (i != -1):
        try:
            usedprice = searchForPlus(marketinfo, "header_items_likeNew.gif")
            usedprice = searchForPlus(usedprice, "<table width=\"100%\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">")
            i = string.find(usedprice, "</table>")
            usedprice = stripText(usedprice[0:i])
            usedprice = searchFor(usedprice, "$")
            
            i = string.find(usedprice, "<")
            
            if (i != -1):
                usedprice = stripText(usedprice[0:i])
        except:
            usedprice = ""


    # Check for availability
    i = string.find(source, ">Sorry, this product is currently out of stock.<")

    if i != -1:
        available = "N";


    # Main extraction
    # Find Image
    source = searchForPlus(source, "&gt; Books<")
    i = string.find(source, ".ebayimg.")

    if i != -1 and i < 1500:
        source = stripText(source[i-50:])
        source = searchForPlus(source, "<img src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "593221")

        if (i != -1):
            image = ""


    # Get product attributes
    source = searchForPlus(source, "/branding/borderedbox/corner_grey_round_top_right.gif")
    source = searchForPlus(source, "><b>")
    i = string.find(source, "</td>")
    attributes = stripText(source[0:i])


    # Find ISBN
    i = string.find(attributes, "ISBN-10:")

    if i != -1:
        isbn = searchForPlus(attributes, "ISBN-10:")
        isbn = searchForPlus(isbn, "\">")
        i = string.find(isbn, "<")
        isbn = stripText(isbn[0:i])
        isbn = string.replace(isbn, "-", "")


    # Find Pages
    i = string.find(attributes, " pages")

    if (i != -1):
        pages = attributes[i-15:]
        pages = searchForPlus(pages, ">")
        i = string.find(pages, "<")
        pages = stripText(pages[0:i])

        # Just the numbers
        i = string.find(pages, "pages")

        if i != -1:
            pages = stripText(pages[0:i])


    # Find Format
    i = string.find(attributes, "Format:")

    if i != -1:
        format = searchForPlus(attributes, "Format:")
        format = searchForPlus(format, ">")
        i = string.find(format, "<")
        format = stripText(format[0:i])


    # Find Publication Date
    i = string.find(attributes, "ISBN:")

    if i != -1:
        date = searchForPlus(attributes, "ISBN:")
        date = searchForPlus(date, "\">")
        date = searchForPlus(date, "\"><br>")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Publisher
    i = string.find(attributes, ">Publisher:")

    if (i != -1):
        publisher = searchForPlus(attributes, ">Publisher:")
        publisher = searchForPlus(publisher, ">")
        i = string.find(publisher, "<")
        publisher = stripText(publisher[0:i])


    # Find Title
    source = searchForPlus(source, "class=\"pagetitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])
    source = stripText(source[i:])


    # Find Author
    authorList = []
    i = string.find(source, ">Author:")

    if i != -1:
        source = searchForPlus(source, ">Author:")
        i = string.find(source, "\n")
        authors = stripText(source[0:i])
        i = string.find(authors, "<br><b")

        if i != -1:
            authors = stripText(authors[0:i])

        while (searchFor(authors, "\">") != None):
            authors = searchForPlus(authors, "\">")
            i = string.find(authors, "<")
            tempValue = stripText(authors[0:i])

            if tempValue == "":
                continue;

            i = string.rfind(tempValue, " ")

            if i != -1:
                authorfirst = stripText(tempValue[0:i])
                authorlast = stripText(tempValue[i:])
                author = stripText(authorlast) + ", " + stripText(authorfirst)
            else:
                author = tempValue

            authorList.append(author)

    i = string.find(source, ">Illustrated by:")

    if (i != -1):
        source = searchForPlus(source, ">Illustrated by:")
        i = string.find(source, "\n")
        authors = stripText(source[0:i])

        while (searchFor(authors, "\">") != None):
            authors = searchForPlus(authors, "\">")
            i = string.find(authors, "<")
            tempValue = stripText(authors[0:i])
            i = string.rfind(tempValue, " ")

            authorfirst = stripText(tempValue[0:i])
            authorlast = stripText(tempValue[i:])
            author = stripText(authorlast) + ", " + stripText(authorfirst)
            authorList.append(author)


    if len(authorList) > 0:
        author = authorList[0]

    if len(authorList) > 1:
        author2 = authorList[1]

    if len(authorList) > 2:
        author3 = authorList[2]

    if len(authorList) > 3:
        author4 = authorList[3]

    if len(authorList) > 4:
        author5 = authorList[4]

    if len(authorList) > 5:
        author6 = authorList[5]

    if title != "" and len(authorList) == 0:
        author = "No Author"


    # Find Price
    # Only get price if available, half.com does not have
    # a price if unavailable, don't want to pick up the
    # price of a suggested alternative
    if available == "Y":
        source = searchForPlus(source, " class=\"red\">")
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])
        usedprice = stripText(source[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, "<b>Synopsis</b>")

    if i != -1:
        source = searchFor(source, "<b>Synopsis</b>")
        source = searchForPlus(source, "<b>")
        i = string.find(source, "<br/><br/>")
        comments = stripText(source[0:i])

        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<br/>", "\n")
        comments = string.replace(comments, "<br />", "\n")


    # Find Dimensions
    height = ""
    width = ""
    thickness = ""

    i = string.find(source, ">Height:<")

    if i != -1:
        height = searchForPlus(source, ">Height:<")
        height = searchForPlus(height, "\">")
        i = string.find(height, "<")
        height = stripText(height[0:i])

    i = string.find(source, ">Width:<")

    if i != -1:
        width = searchForPlus(source, ">Width:<")
        width = searchForPlus(width, "\">")
        i = string.find(width, "<")
        width = stripText(width[0:i])

    i = string.find(source, ">Thickness:<")

    if i != -1:
        thickness = searchForPlus(source, ">Thickness:<")
        thickness = searchForPlus(thickness, "\">")
        i = string.find(thickness, "<")
        thickness = stripText(thickness[0:i])

    if height != "" and width != "":
        dimensions = height + " x " + width+ " x " + thickness


    # Find Weight
    i = string.find(source, ">Weight:<")

    if i != -1:
        weight = searchForPlus(source, ">Weight:<")
        weight = searchForPlus(weight, "\">")
        i = string.find(weight, "<")
        weight = stripText(weight[0:i])


    # Find Comments - Part II
    i = string.find(source, "<b>Industry Reviews<")

    if i != -1:
        source = searchFor(source, "<b>Industry Reviews<")
        source = searchForPlus(source, "<b>")
        i = string.find(source, "<br/><br/>")
        comment = stripText(source[0:i])

        comment = string.replace(comment, "<b>", "")
        comment = string.replace(comment, "</b>", "")
        comment = string.replace(comment, "<i>", "")
        comment = string.replace(comment, "</i>", "")
        comment = string.replace(comment, "<br/>", "\n")
        comment = string.replace(comment, "<br />", "\n")

        if comments != "":
            comments = comments + "\n\n" + comment
        else:
            comments = comment



try:
    extract()
finally:
    if os.path.exists("scrapers/userexit.py"):
        execfile("scrapers/userexit.py") in globals()
