# imdb scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,imageref,fullDateFormat,source


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    sound               = "Mono"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)

        i = string.find(source, "amazon.imdb.com")

        if i != -1:
            source = source[i-100:]
            source = searchForPlus(source, "<a href=\"")
            i = string.find(source, "\">")
            url = "http://www.amazon.com" + stripText(source[0:i])
            http = HTTPConnection()
            http.resetReferer();
            http.blockForLoad();
            source = http.getContents(url)
            imageref=url
            t2 = open("trace2.html", "w")
            t2.write(source)
            t2.close()


    # Find Image
    i = string.find(source, "/ImageView")

    if i != -1:
        source = searchForPlus(source, "/ImageView")
        source = searchForPlus(source, "<IMG SRC=\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])
    else:
        i = string.find(source, "name=\"poster\"")

        if i != -1:
            source = searchForPlus(source, "name=\"poster\"")
            source = searchForPlus(source, "src=\"")
            i = string.find(source, "\"")
            image = stripText(source[0:i])


    # Find Title
    source = searchForPlus(source, "id=\"tn15title\"")
    source = searchForPlus(source, "<h1>")
    i = string.find(source, "<")
    title = stripText(source[0:i])
    title = string.replace(title, "&#34;", "\"")
    title = string.replace(title, "&#38;", "&")


    # Find keywords
    i = string.find(source, "/keywords\"")

    if i != -1:
        keywords = ""
        url = stripText(source[i-50:])
        url = searchForPlus(url, "href=\"")
        i = string.find(url, "\"")
        url = "http://www.imdb.com" + stripText(url[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        page2 = http.getContents(url)
        t2 = open("trace2.html", "w")
        t2.write(page2)
        t2.close()

        i = string.find(page2, "class=\"keyword\"")

        if i != -1:
            page2 = searchForPlus(page2, "class=\"keyword\"")
            i = string.find(page2, "</ul>")
            page2 = stripText(page2[0:i])

            while string.find(page2, "href=\"") != -1:
                page2 = searchForPlus(page2, "href=\"")
                page2 = searchForPlus(page2, "\">")
                i = string.find(page2, "<")
                keyword = stripText(page2[0:i])

                if keywords == "":
                    keywords =keyword   
                else:
                    keywords = keywords + ", " + keyword


    # Find credits
    comments = ""
    i = string.find(source, "/fullcredits\"")

    if i != -1:
        url = stripText(source[i-50:])
        url = searchForPlus(url, "href=\"")
        i = string.find(url, "\"")
        url = "http://www.imdb.com" + stripText(url[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        page2 = http.getContents(url)
        t2 = open("trace2.html", "w")
        t2.write(page2)
        t2.close()

        i = string.find(page2, "/E#editor\"")

        if i != -1:
            editor = searchForPlus(page2, "/E#editor\"")
            editor = searchForPlus(editor, " href=")
            editor = searchForPlus(editor, "\">")
            i = string.find(editor, "<")
            editor = stripText(editor[0:i])

            i = string.rfind(editor, " ")

            if (i != -1):
                editor = stripText(editor[i:]) + ", " + stripText(editor[0:i])

        i = string.find(page2, "/C#composer\"")

        if i != -1:
            composer = searchForPlus(page2, "/C#composer\"")
            composer = searchForPlus(composer, " href=")
            composer = searchForPlus(composer, "\">")
            i = string.find(composer, "<")
            composer = stripText(composer[0:i])

            i = string.rfind(composer, " ")

            if (i != -1):
                composer = stripText(composer[i:]) + ", " + stripText(composer[0:i])

        i = string.find(page2, "/C#cinematographer\"")

        if i != -1:
            photographer = searchForPlus(page2, "/C#cinematographer\"")
            photographer = searchForPlus(photographer, " href=")
            photographer = searchForPlus(photographer, "\">")
            i = string.find(photographer, "<")
            photographer = stripText(photographer[0:i])

            i = string.rfind(photographer, " ")

            if (i != -1):
                photographer = stripText(photographer[i:]) + ", " + stripText(photographer[0:i])


    # Find Copyright date
    source = searchForPlus(source, "(")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    copyDate = stripText(source[0:i])


    # Find user rating
    i = string.find(source, ">User Rating:")

    if i != -1:
        rating = searchForPlus(source, ">User Rating:")
        rating = searchForPlus(rating, "<b>")
        i = string.find(rating, "<")
        rating = stripText(rating[0:i]) + " (IMDB)"


    # Find Director
    i = string.find(source, ">Director")

    if i != -1:
        source = searchForPlus(source, ">Director")
        source = searchForPlus(source, "href")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        director = stripText(source[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find writers
    i = string.find(source, ">Writer")

    if i != -1:
        source = searchForPlus(source, ">Writer")
        i = string.find(source, "<br/>\n")
        writers = stripText(source[0:i])

        screenwriter = ""
        author = ""
        writer = ""

        while (searchFor(writers, "href") != None):
            writers = searchForPlus(writers, "href")
            writers = searchForPlus(writers, ">")
            i = string.find(writers, "<br/>")
            tempWriter = stripText(writers[0:i])
            writerType = 0

            if string.find(tempWriter, "(novel)") != -1:
                writerType = 1

            if string.find(tempWriter, "(story)") != -1:
                writerType = 2

            if string.find(tempWriter, "(short story)") != -1:
                writerType = 2

            i = string.find(tempWriter, "<")
            tempWriter = stripText(tempWriter[0:i])

            i = string.find(tempWriter, "(")

            if i != -1:
                tempWriter = stripText(tempWriter[0:i])

            i = string.rfind(tempWriter, " ")

            if i != -1:
                tempWriter = stripText(tempWriter[i:]) + ", " + stripText(tempWriter[0:i])

            if tempWriter != "(more)" and tempWriter != "WGA" and tempWriter != "":
                if writerType == 0:
                    if screenwriter != "":
                        screenwriter = screenwriter + " / "

                    screenwriter = screenwriter + tempWriter
                elif writerType == 1:
                    if author != "":
                        author = author + " / "

                    author = author + tempWriter
                    book = title
                elif writerType == 2:
                    if writer != "":
                        writer = writer + " / "

                    writer = writer + tempWriter


    # Find Category
    i = string.find(source, ">Genre")

    if i != -1:
        source = searchForPlus(source, ">Genre")
        i = string.find(source, "</div>")
        categories = stripText(source[0:i])
        category = ""

        while (searchFor(categories, "href") != None):
            categories = searchForPlus(categories, "href")
            categories = searchForPlus(categories, ">")
            i = string.find(categories, "<")

            next = stripText(categories[0:i])

            if next != "more":
                if (category != ""):
                    category = category + " : "

                category = category + next

            categories = categories[i:]


    # Find Comments
    i = string.find(source, ">Plot Summary:")

    if i != -1:
        source = searchForPlus(source, ">Plot Summary:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        comments = stripText(source[0:i])

    i = string.find(source, ">Plot Outline:")

    if i != -1:
        source = searchForPlus(source, ">Plot Outline:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        comments = stripText(source[0:i])


    # Find Actors
    castTag = "XXX"

    i = string.find(source, "class=\"cast\"")

    if i != -1:
        castTag = "class=\"cast\""

    i = string.find(source, castTag)

    if i != -1:
        source = searchForPlus(source, castTag)
        i = string.find(source, "</div>")
        actors = stripText(source[0:i])
        actorList = []

        while (searchFor(actors, "<tr") != None):
            actors = searchForPlus(actors, "<tr")
            i = string.find(actors, "</tr>")

            if (i == -1):
                actor = actors
            else:
                actor = stripText(actors[0:i])

            actors = searchForPlus(actors, "</tr>")
            i = string.rfind(actor, "href=\"/name")

            if (i != -1):
                actor = stripText(actor[i:])
                actor = searchForPlus(actor, "\">")
                i = string.find(actor, "<")
                actor = stripText(actor[0:i])

                i = string.find(actor, "(")

                if i != -1:
                    actor = stripText(actor[0:i])

                i = string.rfind(actor, " ")

                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]



    # Find Running time
    i = string.find(source, ">Runtime")

    if i != -1:
        temp = searchForPlus(source, ">Runtime")
        i = string.find(temp, "</div>")

        if i != -1:
            temp = stripText(temp[0:i+1])

        i = string.find(temp, "USA:")

        if i != -1:
            temp = searchForPlus(temp, "USA:")
            i = string.find(temp, "<")
            running = stripText(temp[0:i])
        else:
            temp = searchForPlus(temp, ">")
            i = string.find(temp, "<")
            running = stripText(temp[0:i])
            i = string.find(running, ":")

            if i != -1:
                running = stripText(running[i+1:])


    # Find Place
    i = string.find(source, ">Country")

    if i != -1:
        source = searchForPlus(source, ">Country")
        source = searchForPlus(source, "href")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        place = stripText(source[0:i])


    # Find Color
    i = string.find(source, ">Color")

    if i != -1:
        source = searchForPlus(source, ">Color")
        source = searchForPlus(source, "href")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        color = stripText(source[0:i])


    # Find Sound
    i = string.find(source, ">Sound")

    if i != -1:
        source = searchForPlus(source, ">Sound")
        source = searchForPlus(source, "href")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        sound = stripText(source[0:i])


    # Find Rating
    i = string.find(source, ">Certification")

    if i != -1:
        source = searchForPlus(source, ">Certification")
        i = string.find(source, ">USA:")

        if i != -1:
            source = searchForPlus(source, ">USA:")
            i = string.find(source, "<")
            mpaa = stripText(source[0:i]) + " (MPAA)"
        else:
            source = searchForPlus(source, "href")
            source = searchForPlus(source, ">")
            i = string.find(source, "<")
            mpaa = stripText(source[0:i])



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
