# half.com audio scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripNewLines
from    scrapers.scrapers import convertArtist
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,artist,label,composer,date,copyDate,running,artistLF
    global value,valueDate,copies,upc,isbn,lccn,dewey,userNumber
    global format,series,sound,rating,condition,category
    global location,keywords,played,pflag,eflag,comments
    global dateEntered,dataSource,cart,ordered
    global diskCount,artistURL,conductor
    global trackTitles,trackArtists,trackWriters,trackTimes
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    ordered             = "N"
    sound               = "Stereo"
    format              = "CD"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    artistLF            = ""

    switchArtist        = "N"
    available           = "Y"
    buyerwaiting        = "N"


    #Get detail page, if necessary
    i = string.find(source, "Search Results<")

    if i != -1:
        source = searchForPlus(source, "Search Results<")
        source = searchForPlus(source, "<A HREF=")

        # URL delimiter is somtimes single quote, sometimes
        # double quote, so use what is there
        i = string.find(source[1:], source[0:1])
        source = source[1:]
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    # Find optional fields, pricing info etc.
    marketinfo = source

    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])

        i = string.find(listprice, "(")

        if (i != -1):
            listprice = stripText(listprice[0:i])


    # Find New price
    i = string.find(marketinfo, "header_items_brandNew.gif")

    if (i != -1):
        try:
            newprice = searchForPlus(marketinfo, "header_items_brandNew.gif")
            newprice = searchForPlus(newprice, "<table width=\"100%\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">")
            i = string.find(newprice, "</table>")
            newprice = stripText(newprice[0:i])
            newprice = searchFor(newprice, "$")
            
            i = string.find(newprice, "<")
            
            if (i != -1):
                newprice = stripText(newprice[0:i])
        except:
            newprice = ""


    # Find Used price
    i = string.find(marketinfo, "header_items_likeNew.gif")

    if (i != -1):
        try:
            usedprice = searchForPlus(marketinfo, "header_items_likeNew.gif")
            usedprice = searchForPlus(usedprice, "<table width=\"100%\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">")
            i = string.find(usedprice, "</table>")
            usedprice = stripText(usedprice[0:i])
            usedprice = searchFor(usedprice, "$")
            
            i = string.find(usedprice, "<")
            
            if (i != -1):
                usedprice = stripText(usedprice[0:i])
        except:
            usedprice = ""


    # Main extraction
    # Find Image
    source = searchForPlus(source, "&gt; Music<")
    i = string.find(source, ".ebayimg.")

    if i != -1:
        source = stripText(source[i-50:])
        source = searchForPlus(source, "<img src=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "593226")

        if (i != -1):
            image = ""


    # Get product attributes
    source = searchForPlus(source, "/branding/borderedbox/corner_grey_round_top_right.gif")
    source = searchForPlus(source, "><b>")
    i = string.find(source, "</td>")
    attributes = stripText(source[0:i+5])


    # Find Format
    i = string.find(attributes, "Format:")

    if i != -1:
        format = searchForPlus(attributes, "Format:")
        format = searchForPlus(format, ">")
        i = string.find(format, "<")
        format = stripText(format[0:i])


    # Find Publication Date
    i = string.find(attributes, "Format:")

    if i != -1:
        date = searchForPlus(attributes, "Format:")
        date = searchForPlus(date, "\"><br>")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find UPC
    i = string.find(attributes, "UPC:")

    if (i != -1):
        upc = searchForPlus(attributes, "UPC:")
        upc = searchForPlus(upc, ">")
        i = string.find(upc, "<")
        upc = stripText(upc[0:i])
        upc = string.replace(upc, "-", "")


    # Find Label
    i = string.find(attributes, " Label:")

    if i != -1:
        label = searchForPlus(attributes, " Label:")
        label = searchForPlus(label, ">")
        i = string.find(label, "<")
        label = stripText(label[0:i])


    # Find Title
    source = searchForPlus(source, "class=\"pagetitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])


    # Find Artist
    i = string.find(source, ">Primary Artist:")

    if i != -1:
        source = searchForPlus(source, ">Primary Artist:")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        artist = stripText(source[0:i])
        artist = convertArtist(artist)


    # Find Price
    source = searchForPlus(source, " class=\"red\">")
    source = searchFor(source, "$")
    i = string.find(source, "<")
    value = stripText(source[0:i])
    usedprice = stripText(source[0:i])


    # Find Tracks
    i = string.find(source, ">Track Listing<")

    if i != -1:
        source = searchForPlus(source, ">Track Listing<")
        i = string.find(source, "<table ")
        disks = stripText(source[0:i])

        i = string.find(disks, ">DISC 1:<")

        if i == -1:
            disks = "<br class=\"br\"/>DISC 1:" + disks

        trackList = []
        artistList = []
        writerList = []
        diskCount = 0

        while (searchFor(disks, ">DISC ") != None):
            disks = searchForPlus(disks, ">DISC ")
            diskCount = diskCount + 1
            diskid = "%02d" % diskCount
            tracks = searchFor(disks, ":")
            i = string.find(tracks, ">DISC ")

            if i != -1:
                i = string.find(tracks, ">DISC ")
                tracks = stripText(tracks[0:i])

            trackIndex = 1

            while (searchFor(tracks, "<br class=\"br\"/>") != None):
                tracks = searchForPlus(tracks, "<br class=\"br\"/>")
                trackid = "%02d" % trackIndex
                i = string.find(tracks, ". ")

                if i == -1:
                    break

                track = searchForPlus(tracks, ". ")
                i = string.find(track, "<")
                track = stripText(track[0:i])

                i = string.find(track, " - ")
                trackArtist = ""

                if i != -1:
                    if track[i+3:i+4] != "(":
                        trackArtist = stripText(track[i+3:])
                        track = stripText(track[0:i])

                trackIndex = trackIndex + 1
                trackList.append(diskid + "-" + trackid + "-" + track)

                if trackArtist != "":
                    trackArtist = convertArtist(trackArtist)
                    artistList.append(diskid + "-" + trackid + "-" + trackArtist)
                else:
                    artistList.append(diskid + "-" + trackid + "-")

        if len(trackList) > 0:
            trackList.sort()
            trackTitles = array(trackList, String)

        if len(artistList) > 0:
            artistList.sort()
            trackArtists = array(artistList, String)

        if len(writerList) > 0:
            writerList.sort()
            trackWriters = array(writerList, String)


    # Find Comments
    i = string.find(source, ">Album Notes<")

    if i != -1:
        source = searchForPlus(source, ">Album Notes<")
        source = searchForPlus(source, "class=\"br\"")
        source = searchForPlus(source, ">")
        i = string.find(source, "</td>")
        comments = stripText(source[0:i])
        comments = string.replace(comments, "<br class=\"br\"/>", "\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "<B>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "</B>", "")
        comments = string.replace(comments, "<br />", "\n")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")



try:
    extract()
finally:
    if os.path.exists("scrapers/awuserexit.py"):
        execfile("scrapers/awuserexit.py") in globals()
