# Sam Goddy scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML
from    jarray import zeros, array
from    java.lang import String


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    sound               = "Mono"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    # Find optional fields, pricing info etc.
    marketinfo = source


    i = string.find(source, "Please wait while we search for the requested item")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")


    i = string.find(source, ">General Error<")

    if i != -1:
        source = searchFor(source, "force exception")
        source = searchFor(source, "force exception")


    # Find Title
    source = searchForPlus(source, "\"header03\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])
    i = string.find(title, "&nbsp;")

    if i != -1:
        title = stripText(title[0:i])

    title = stripHTML(title)


    # Find Format
    i = string.find(source, "&nbsp;DVD")

    if i != -1:
        format = stripHTML(source[i+6:i+9])

    i = string.find(source, "&nbsp;VHS")

    if i != -1:
        format = stripHTML(source[i+6:i+9])


    # Find Actors
    i = string.find(source, ">Starring:<")

    if i != -1:
        source = searchForPlus(source, ">Starring:<")
        source = searchForPlus(source, "\">")
        i = string.find(source, "</")
        actors = stripText(source[0:i])
        actorList = []

        while (searchFor(actors, ",") != None):
            i = string.find(actors, ",")
            actor = stripText(actors[0:i])
            actors = searchForPlus(actors, ",")

            i = string.rfind(actor, " ")

            if (i != -1):
                actor = stripText(actor[i+1:]) + ", " + stripText(actor[0:i])

            actorList.append(actor)

        actorList.append(stripText(actors))

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Directed By")

    if i != -1:
        source = searchForPlus(source, ">Directed By")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        director = stripHTML(source[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i+1:]) + ", " + stripText(director[0:i])


    # Find Image
    i = string.find(source, "/coverart")

    if i != -1 and i < 400:
        source = searchForPlus(source, "src=\"")
        i = string.find(source, "\"")
        image = stripHTML(source[0:i])

        i = string.find(source, ">Larger Image<")

        if i != -1:
            source = searchForPlus(source, "windowPop('")
            i = string.find(source, "'")
            image = stripHTML(source[0:i])

        i = string.find(image, "music_79x79.gif")

        if i != -1:
            image = ""

        i = string.find(image, "movies_79x79.gif")

        if i != -1:
            image = ""

        i = string.find(image, "music_200x200.gif")

        if i != -1:
            image = ""

        i = string.find(image, "movies_200x200.gif")

        if i != -1:
            image = ""


    # Find AMG Rating
    i = string.find(source, ">AMG Rating:")

    if i != -1:
        rating = searchForPlus(source, ">AMG Rating:")
        rating = searchForPlus(rating, "/ratings/")
        i = string.find(rating, "\"")
        rating = stripHTML(rating[0:i])

        if rating == "2_amg_rating.gif":
            rating = "1 Stars"

        if rating == "3_amg_rating.gif":
            rating = "1 1/2 Stars"

        if rating == "4_amg_rating.gif":
            rating = "2 Stars"

        if rating == "5_amg_rating.gif":
            rating = "2 1/2 Stars"

        if rating == "6_amg_rating.gif":
            rating = "3 Stars"

        if rating == "7_amg_rating.gif":
            rating = "3 1/2 Stars"

        if rating == "8_amg_rating.gif":
            rating = "4 Stars"

        if rating == "9_amg_rating.gif":
            rating = "4 1/2 Stars"


    # Find Studio
    i = string.find(source, ">Studio:")

    if i != -1:
        studio = searchForPlus(source, ">Studio:")
        studio = searchForPlus(studio, "\">")
        i = string.find(studio, "<")
        studio = stripHTML(studio[0:i])


    # Find Category
    i = string.find(source, ">Genre:")

    if i != -1:
        category = searchForPlus(source, ">Genre:")
        category = searchForPlus(category, "\">")
        i = string.find(category, "<")
        category = stripHTML(category[0:i])


    # Find Rating
    i = string.find(source, ">MPAA Rating:")

    if i != -1:
        mpaa = searchForPlus(source, ">MPAA Rating:")
        mpaa = searchForPlus(mpaa, "\">")
        i = string.find(mpaa, "<")
        mpaa = stripHTML(mpaa[0:i])


    # Find Release date
    i = string.find(source, ">Release Date:")

    if i != -1:
        date = searchForPlus(source, ">Release Date:")
        date = searchForPlus(date, "\">")
        i = string.find(date, "<")
        date = stripHTML(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Price
    i = string.find(source, ">Price:")

    if i != -1:
        value = searchForPlus(source, ">Price:")
        value = searchForPlus(value, "\">")
        value = searchFor(value, "$")
        i = string.find(value, "<")
        value = stripHTML(value[0:i])


    # Find Comments
    comments = ""
    i = string.find(source, ">Synopsis:")

    if (i != -1):
        source = searchForPlus(source, ">Synopsis:")
        source = searchForPlus(source, "\"content02\">")
        i = string.find(source, "</td>")
        comments = stripText(source[0:i])
        comments = string.replace(comments, "\n", " ")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "<BR>", "\n")

        while (searchFor(comments, "<A ") != None):
            i = string.find(comments, "<A ")
            j = string.find(comments[i:], "\">")
            comments = comments[0:i] + comments[i+j+2:]

        comments = string.replace(comments, "</A>", "")


    # Find Running Time
    i = string.find(source, ">Running Time:")

    if i != -1:
        running = searchForPlus(source, ">Running Time:")
        running = searchForPlus(running, "\">")
        i = string.find(running, "<")
        running = stripHTML(running[0:i])


    # Find Sound
    i = string.find(source, ">Sound:")

    if i != -1:
        sound = searchForPlus(source, ">Sound:")
        sound = searchForPlus(sound, "\">")
        i = string.find(sound, "<")
        sound = stripHTML(sound[0:i])


    # Find Copyright date
    i = string.find(source, ">Theatrical Release Date:")

    if i != -1:
        copyDate = searchForPlus(source, ">Theatrical Release Date:")
        copyDate = searchForPlus(copyDate, "\">")
        i = string.find(copyDate, "<")
        copyDate = stripHTML(copyDate[0:i])

        if fullDateFormat == "false":
            i = string.rfind(copyDate, " ")

            if i != -1:
                copyDate = stripText(copyDate[i+1:])


    # Find Editor
    i = string.find(source, ";Editor:")

    if i != -1:
        editor = searchForPlus(source, ";Editor:")
        i = string.find(editor, "<")
        editor = stripHTML(editor[0:i])

        i = string.rfind(editor, " ")

        if (i != -1):
            editor = stripText(editor[i+1:]) + ", " + stripText(editor[0:i])


    # Find Composer
    i = string.find(source, ";Composer (Music Score):")

    if i != -1:
        composer = searchForPlus(source, ";Composer (Music Score):")
        i = string.find(composer, "<")
        composer = stripHTML(composer[0:i])

        i = string.rfind(composer, " ")

        if (i != -1):
            composer = stripText(composer[i+1:]) + ", " + stripText(composer[0:i])


    # Find Photographer
    i = string.find(source, ";Cinematographer:")

    if i != -1:
        photographer = searchForPlus(source, ";Cinematographer:")
        i = string.find(photographer, "<")
        photographer = stripHTML(photographer[0:i])

        i = string.rfind(photographer, " ")

        if (i != -1):
            photographer = stripText(photographer[i+1:]) + ", " + stripText(photographer[0:i])


    # Find Screenwriter
    i = string.find(source, ";Screenwriter:")

    if i != -1:
        screenwriter = searchForPlus(source, ";Screenwriter:")
        i = string.find(screenwriter, "<")
        screenwriter = stripHTML(screenwriter[0:i])

        i = string.rfind(screenwriter, " ")

        if (i != -1):
            screenwriter = stripText(screenwriter[i+1:]) + ", " + stripText(screenwriter[0:i])
    else:
        i = string.find(source, ";Teleplay By:")

        if i != -1:
            screenwriter = searchForPlus(source, ";Teleplay By:")
            i = string.find(screenwriter, "<")
            screenwriter = stripHTML(screenwriter[0:i])

            i = string.rfind(screenwriter, " ")

            if (i != -1):
                screenwriter = stripText(screenwriter[i+1:]) + ", " + stripText(screenwriter[0:i])


    # Find Comments (Part 2)
    i = string.find(source, ">Movie Reviews:")

    if (i != -1):
        if comments != "":
            comments = comments + "\n\n"

        source = searchForPlus(source, ">Movie Reviews:")
        source = searchForPlus(source, "\"content02\">")
        i = string.find(source, "</td>")
        comments = comments + stripText(source[0:i])
        comments = string.replace(comments, "              ", "\n\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")

        while (searchFor(comments, "<a ") != None):
            i = string.find(comments, "<a ")
            j = string.find(comments[i:], "\">")
            comments = comments[0:i] + comments[i+j+2:]

        comments = string.replace(comments, "</a>", "")


try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
