# CD Universe video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    format              = "DVD"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"




    # Find marketplace pricing
    marketinfo = source


    # Find List price
    i = string.find(marketinfo, ">List Price<")

    if (i != -1):
        listprice = searchForPlus(marketinfo, ">List Price<")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Find Ranking
    i = string.find(marketinfo, ">All Time Sales Rank<")

    if (i != -1):
        usedinfo = searchForPlus(marketinfo, ">All Time Sales Rank<")
        usedinfo = searchForPlus(usedinfo, ">&nbsp;&nbsp;")
        i = string.find(usedinfo, "&")
        salesrank = stripText(usedinfo[0:i])



    # Main Extraction
    soyrce = searchForPlus(source, "<table cellPadding=\"4\"")


    # Find Image
    tag = "/CDUCoverArt/"
    i = string.find(source, tag)

    if i == -1 or i > 6000:
        tag = "/CDUCoverart/"
        i = string.find(source, tag)

    if i == -1 or i > 6000:
        tag = "/MuzeVideoArt/"
        i = string.find(source, tag)

    if i != -1 and i < 6000:
        image = stripText(source[i-100:])
        image = searchForPlus(image, "src=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])

        i = string.find(image, "no-image-avail")

        if i != -1:
            image = ""


    # Find Title
    i = string.find(source, "size=3><strong>")

    if i != -1:
        source = searchForPlus(source, "size=3><strong>")
        i = string.find(source, "<")
        title = stripText(source[0:i])

        if title.endswith(" DVD") == 1:
            title = stripText(title[0: len(title)-4])
            format = "DVD"


    # Find Price
    i = string.find(source, ">Sale Price:")

    if i == -1:
        i = string.find(source, ">Regular Price:")

    if i != -1:
        source = source[i:]
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find keywords
    keywords = ""
    i = string.find(source, ">Category<")

    if i != -1:
        temp = searchForPlus(source, ">Category<")
        i = string.find(temp, "</tr>")
        temp = stripText(temp[0:i])

        while (searchFor(temp, "<a href") != None):
            temp = searchForPlus(temp, "<a href")
            temp = searchForPlus(temp, "\">")
            i = string.find(temp, "<")

            if keywords == "":
                keywords = stripText(temp[0:i])
                category = keywords
            else:
                keywords = keywords + ", " + stripText(temp[0:i])


    # Find Studio
    i = string.find(source, ">Studio<")

    if i != -1:
        studio = searchForPlus(source, ">Studio<")
        studio = searchForPlus(studio, "\">")
        i = string.find(studio, "<")
        studio = stripText(studio[0:i])


    # Find Copyright date
    i = string.find(source, ">Orig Year<")

    if i != -1:
        copyDate = searchForPlus(source, ">Orig Year<")
        copyDate = searchForPlus(copyDate, "<td>")
        i = string.find(copyDate, "<")
        copyDate = stripText(copyDate[0:i])


    # Find Release date
    i = string.find(source, ">Street Date<")

    if i != -1:
        date = searchForPlus(source, ">Street Date<")
        date = searchForPlus(date, "<td>")
        i = string.find(date, "<")
        date = stripText(date[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])


    # Find Rating
    i = string.find(source, ">Rating<")

    if i != -1:
        mpaa = searchForPlus(source, ">Rating<")
        mpaa = searchForPlus(mpaa, "<td>")
        i = string.find(mpaa, "</td>")
        mpaa = stripText(mpaa[0:i])
        mpaa = string.replace(mpaa, "<span class=sm>", "")
        mpaa = string.replace(mpaa, "</span>", "")


    # Find Running time
    i = string.find(source, ">Running Time<")

    if i != -1:
        running = searchForPlus(source, ">Running Time<")
        running = searchForPlus(running, "<td>")
        i = string.find(running, "<")
        running = stripText(running[0:i])


    # Find Attributes
    attrs = ""

    i = string.find(source, ">Additional Info<")

    if i != -1:
        tempData = searchForPlus(source, ">Additional Info<")
        tempData = searchForPlus(tempData, "<td>")
        i = string.find(tempData, "<")
        attrs = attrs + " " + stripText(tempData[0:i])

    i = string.find(source, ">Movie Details<")

    if i != -1:
        tempData = searchForPlus(source, ">Movie Details<")
        tempData = searchForPlus(tempData, "<td>")
        i = string.find(tempData, "<")
        attrs = attrs + " " + stripText(tempData[0:i])


    # Find Widescreen
    i = string.find(attrs, "Widescreen")

    if i != -1:
        wide = "Y"
    else: 
        wide = "N"


    # Find Closed Captions
    i = string.find(attrs, "Closed Captioned")

    if i != -1:
        closedCap = "Y"
    else: 
        closedCap = "N"


    # Find Color
    i = string.find(attrs, "Color")

    if i != -1:
        color = "Color"


    # Find Sound
    i = string.find(source, ">Audio:<")

    if i != -1:
        sound = searchForPlus(source, ">Audio:<")
        sound = searchForPlus(sound, ">")
        i = string.find(sound, "<")
        sound = stripText(sound[0:i])


    # Find Actors
    i = string.find(source, ">Starring<")

    if i != -1:
        tempData = searchForPlus(source, ">Starring<")
        i = string.find(tempData, "</tr>")
        actors = stripText(tempData[0:i])
        actorList = []

        while (searchFor(actors, "href") != None):
            i = string.find(actors, "href")

            if i != -1:
                actors = searchForPlus(actors, "href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])

                i = string.rfind(actor, " ")

                if i != -1:
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Director<")

    if i != -1:
        tempData = searchForPlus(source, ">Director<")
        tempData = searchForPlus(tempData, "<a href")
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        director = stripText(tempData[0:i])

        i = string.rfind(director, " ")

        if i != -1:
            director = stripText(director[i:]) + ", " + stripText(director[0:i])


    # Find Composer
    tag = ">Composer<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Music<"
        i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        composer = stripText(tempData[0:i])

        i = string.rfind(composer, " ")

        if i != -1:
            composer = stripText(composer[i:]) + ", " + stripText(composer[0:i])


    # Find Photographer
    i = string.find(source, ">Director of Photography<")

    if i != -1:
        tempData = searchForPlus(source, ">Director of Photography<")
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        photographer = stripText(tempData[0:i])

        i = string.rfind(photographer, " ")

        if i != -1:
            photographer = stripText(photographer[i:]) + ", " + stripText(photographer[0:i])


    # Find Screenwriter
    tag = ">Screenwriter<"
    i = string.find(source, tag)

    if i == -1:
        tag = ">Screenplay<"
        i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        screenwriter = stripText(tempData[0:i])

        i = string.rfind(screenwriter, " ")

        if i != -1:
            screenwriter = stripText(screenwriter[i:]) + ", " + stripText(screenwriter[0:i])


    # Find Editor
    tag = ">Editor<"
    i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        editor = stripText(tempData[0:i])

        i = string.rfind(editor, " ")

        if i != -1:
            editor = stripText(editor[i:]) + ", " + stripText(editor[0:i])


    # Find Author
    tag = ">Story<"
    i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        author = stripText(tempData[0:i])

        i = string.rfind(author, " ")

        if i != -1:
            author = stripText(author[i:]) + ", " + stripText(author[0:i])


    # Find Writer
    tag = ">Source Writer<"
    i = string.find(source, tag)

    if i != -1:
        tempData = searchForPlus(source, tag)
        tempData = searchForPlus(tempData, "\">")
        i = string.find(tempData, "<")
        writer = stripText(tempData[0:i])

        i = string.rfind(writer, " ")

        if i != -1:
            writer = stripText(writer[i:]) + ", " + stripText(writer[0:i])


    # Find Comments (Review)
    i = string.find(source, "> Description")

    if i != -1:
        source = searchForPlus(source, "> Description")
        source = searchForPlus(source, "<hr ")
        source = searchForPlus(source, ">")
        i = string.find(source, "<span")
        tempcomment = source[0:i]

        i = string.find(tempcomment, "</div>")

        if i != -1:
            tempcomment = tempcomment[0:i]

        i = string.find(tempcomment, "<form ")

        if i != -1:
            tempcomment = tempcomment[0:i]

        while (searchFor(tempcomment, "\n") != None):
            i = string.find(tempcomment, "\n")
            tempcomment = tempcomment[0:i] + tempcomment[i+1:]

        tempcomment = string.replace(tempcomment, "</a>", "")
        tempcomment = string.replace(tempcomment, "</A>", "")
        tempcomment = string.replace(tempcomment, "<p>", "\n\n")
        tempcomment = string.replace(tempcomment, "<P>", "\n\n")
        tempcomment = string.replace(tempcomment, "</p>", "")
        tempcomment = string.replace(tempcomment, "</P>", "")
        tempcomment = string.replace(tempcomment, "<BR>", "\n")
        tempcomment = string.replace(tempcomment, "<br>", "\n")
        tempcomment = string.replace(tempcomment, "<br />", "\n")
        tempcomment = string.replace(tempcomment, "<i>", "")
        tempcomment = string.replace(tempcomment, "</i>", "")
        tempcomment = string.replace(tempcomment, "<I>", "")
        tempcomment = string.replace(tempcomment, "</I>", "")
        tempcomment = string.replace(tempcomment, "<b>", "")
        tempcomment = string.replace(tempcomment, "</b>", "")
        tempcomment = string.replace(tempcomment, "</font>", "")
        tempcomment = string.replace(tempcomment, "</span>", "")
        tempcomment = string.replace(tempcomment, "&copy;", "")
        tempcomment = string.replace(tempcomment, "&quot;", "\"")
        tempcomment = string.replace(tempcomment, "&ccedil;", "")
        tempcomment = string.replace(tempcomment, "<strong>", "")
        tempcomment = string.replace(tempcomment, "</strong>", "")

        comments = comments + "\n\n" + tempcomment


    # Find user rating
    i = string.find(source, ">Average Rating:<")

    if i != -1:
        temp = searchForPlus(source, ">Average Rating:<")
        temp = searchForPlus(temp, ">")
        i = string.find(temp, "out")
        rating = stripText(temp[0:i])






try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
