# Chapters [CA] video scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText
from    scrapers.scrapers import stripHTML


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    # No place extraction default to US
    # place = "United States"


    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    color               = "Color"
    sound               = "Stereo"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"



    # Find marketplace pricing
    marketinfo = source
    i = string.find(marketinfo, ">From Our Marketplace Sellers:<")

    if (i != -1):
        marketinfo = searchForPlus(marketinfo, ">From Our Marketplace Sellers:<")


    # Find List price
    i = string.find(marketinfo, "List Price:")

    if (i != -1):
        listprice = searchForPlus(marketinfo, "List Price:")
        listprice = searchFor(listprice, "$")
        i = string.find(listprice, "<")
        listprice = stripText(listprice[0:i])


    # Main extraction


    # Find Image
    i = string.find(source, "\"OTSProductImage\"")

    if i != -1:
        image = searchForPlus(source, "\"OTSProductImage\"")
        image = searchForPlus(image, "src=\"")
        i = string.find(image, "\"")
        image = stripText(image[0:i])

        while (searchFor(image, "&amp;") != None):
            i = string.find(image, "&amp;")
            image = image[0:i+1] + image[i+5:]

        i = string.find(image, "?sale=")

        if i != -1:
            j = string.find(image[i:], "&")
            image = image[0:i+6] + "0" + image[i+j:]

        i = string.find(image, "&sale=")

        if i != -1:
            j = string.find(image[i+1:], "&")
            image = image[0:i+6] + "0" + image[i+j+1:]

        i = string.find(image, "&header=")

        if i != -1:
            j = string.find(image[i+1:], "&")
            image = image[0:i+8] + "0" + image[i+1+j:]

        if string.find(image, "NotAvailable") != -1:
            image = ""


    # Find Title
    i = string.find(source, "\"OTSProductHeading\"")

    if i != -1:
        source = searchForPlus(source, "\"OTSProductHeading\"")
        source = searchForPlus(source, "<h1>")
        i = string.find(source, "<")
        title = stripText(source[0:i])

        i = string.rfind(title, "(")

        if i != -1:
            title = stripText(title[0:i])


    # Find Actors
    actorTag = ">Starring:"
    i = string.find(source, actorTag)

    if i != -1:
        source = searchForPlus(source, actorTag)
        i = string.find(source, "</p>")
        actors = stripText(source[0:i])

        i = string.find(actors, "<br/>")

        if i != -1:
            actors = stripText(actors[0:i])

        i = string.find(actors, "<span>")

        if i != -1:
            actors = stripText(actors[0:i])

        actorList = []

        while (searchFor(actors, "<a href") != None):
            actors = searchForPlus(actors, "<a href")
            actors = searchForPlus(actors, "\">")
            i = string.find(actors, "<")
            actor = stripText(actors[0:i])

            i = string.rfind(actor, " ")

            if (i != -1):
                actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

            actorList.append(actor)

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]


    # Find Director
    i = string.find(source, ">Director:")

    if i != -1:
        source = searchForPlus(source, ">Director:")
        source = searchForPlus(source, "<a href")
        source = searchForPlus(source, "\">")
        i = string.find(source, "<")
        director = stripText(source[0:i])

        i = string.rfind(director, " ")

        if (i != -1):
            director = stripText(director[i:]) + ", " + stripText(director[0:i])

    # Find Price
    i = string.find(source, ">Our Price:")

    if i != -1:
        source = searchForPlus(source, ">Our Price:")
        source = searchFor(source, "$")
        i = string.find(source, "<")
        value = stripText(source[0:i])


    # Find Release date
    i = string.find(source, ">Video Release:")

    if i != -1:
        source = searchForPlus(source, ">Video Release:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        date = stripHTML(source[0:i])

        if fullDateFormat == "false":
            i = string.rfind(date, " ")

            if i != -1:
                date = stripText(date[i+1:])

        i = string.rfind(date, ".")

        if i != -1:
            date = stripText(date[0:i])


    # Find Copyright date
    i = string.find(source, "Theatrical Release:")

    if i != -1:
        source = searchForPlus(source, "Theatrical Release:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        copyDate = stripHTML(source[0:i])

        i = string.find(copyDate, "")

        if i != -1:
            copyDate = stripHTML(copyDate[0:i])

        if fullDateFormat == "false":
            i = string.rfind(copyDate, " ")

            if i != -1:
                copyDate = stripText(copyDate[i+1:])

        i = string.rfind(copyDate, ".")

        if i != -1:
            copyDate = stripText(copyDate[0:i])


    # Find Runtime
    i = string.find(source, "Runtime:")

    if i != -1:
        source = searchForPlus(source, "Runtime:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        running = stripHTML(source[0:i])

        i = string.find(running, "")

        if i != -1:
            running = stripHTML(running[0:i])


    # Find Rating
    i = string.find(source, "Rating:")

    if i != -1:
        source = searchForPlus(source, "Rating:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        mpaa = stripHTML(source[0:i])

        i = string.find(mpaa, "")

        if i != -1:
            mpaa = stripHTML(mpaa[0:i])


    # Find Studio
    i = string.find(source, "Studio:")

    if i != -1:
        source = searchForPlus(source, "Studio:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        studio = stripHTML(source[0:i])


    # Find UPC
    i = string.find(source, "UPC:")

    if i != -1:
        source = searchForPlus(source, "UPC:")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        upc = stripHTML(source[0:i])


    # Find Comments
    comments = ""

    while (searchFor(source, "\"OTSItemNotes\"") != None):
        source = searchForPlus(source, "\"OTSItemNotes\"")
        source = searchForPlus(source, "</a>")
        i = string.find(source, "</div>")
        tempcomments = stripText(source[0:i])

        while (searchFor(tempcomments, "\n") != None):
            i = string.find(tempcomments, "\n")
            tempcomments = tempcomments[0:i] + tempcomments[i+1:]

        while (searchFor(tempcomments, "\r") != None):
            i = string.find(tempcomments, "\r")
            tempcomments = tempcomments[0:i] + tempcomments[i+1:]

        while (searchFor(tempcomments, "<div ") != None):
            i = string.find(tempcomments, "<div ")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        if comments == "":
            comments = stripText(tempcomments)
        else:
            comments = comments + "\n\n" + stripText(tempcomments)

    if comments != "":
        comments = string.replace(comments, "&#8217;", "'")
        comments = string.replace(comments, "&#8212;", "-")
        comments = string.replace(comments, "&#8220;", "\"")
        comments = string.replace(comments, "&#8221;", "\"")
        comments = string.replace(comments, "<B>", "")
        comments = string.replace(comments, "</B>", "")
        comments = string.replace(comments, "<b>", "")
        comments = string.replace(comments, "</b>", "")
        comments = string.replace(comments, "<I>", "")
        comments = string.replace(comments, "</I>", "")
        comments = string.replace(comments, "<i>", "")
        comments = string.replace(comments, "</i>", "")
        comments = string.replace(comments, "<br>", "\n")
        comments = string.replace(comments, "<BR>", "\n")
        comments = string.replace(comments, "<Br>", "\n")
        comments = string.replace(comments, "</br>", "\n")
        comments = string.replace(comments, "<br/>", "\n")
        comments = string.replace(comments, "<ul>", "\n")
        comments = string.replace(comments, "<UL>", "\n")
        comments = string.replace(comments, "</ul>", "\n\n")
        comments = string.replace(comments, "</UL>", "\n\n")
        comments = string.replace(comments, "<li>", "\n")
        comments = string.replace(comments, "<LI>", "\n")
        comments = string.replace(comments, "</li>", "")
        comments = string.replace(comments, "</LI>", "")
        comments = string.replace(comments, "</P>", "\n")
        comments = string.replace(comments, "<P>", "\n\n")
        comments = string.replace(comments, "<p>", "\n\n")
        comments = string.replace(comments, "</p>", "\n")
        comments = string.replace(comments, "<EM>", "")
        comments = string.replace(comments, "</EM>", "")
        comments = string.replace(comments, "<h3>", "")
        comments = string.replace(comments, "</h3>", "\n")
        comments = string.replace(comments, "<blockquote>", "")
        comments = string.replace(comments, "</blockquote>", "")

        while (searchFor(comments, "<p ") != None):
            i = string.find(comments, "<p ")
            j = string.find(comments[i:], ">")
            comments = comments[0:i] + comments[i+j+1:]

        while (searchFor(comments, "\t") != None):
            i = string.find(comments, "\t")
            comments = comments[0:i] + " " + comments[i+1:]

        while (searchFor(comments, " \n") != None):
            i = string.find(comments, " \n")
            comments = comments[0:i] + comments[i+1:]

        while (searchFor(comments, "\n ") != None):
            i = string.find(comments, "\n ")
            comments = comments[0:i+1] + comments[i+2:]

        while (searchFor(comments, "\r ") != None):
            i = string.find(comments, "\r ")
            comments = comments[0:i+1] + comments[i+2:]

        while (searchFor(comments, "\n\n\n") != None):
            i = string.find(comments, "\n\n\n")
            comments = comments[0:i] + comments[i+1:]





try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
