# amazon.fr scraper
#
# Copyright  1999-2007 Readerware Corporation.  All Rights Reserved.

import  os
import  string

from    com.readerware.http import HTTPConnection

from    scrapers.scrapers import searchFor
from    scrapers.scrapers import searchForPlus
from    scrapers.scrapers import stripText


def extract():
    global title,actor1,actor2,actor3,actor4,actor5,actor6
    global actor7,actor8,actor9,actor10,director,writer
    global screenwriter,photographer,composer,editor,series
    global upc,isbn,lccn,dewey,userNumber,format,studio,place
    global date,copyDate,mpaa,wide,closedCap,sound,copies
    global rating,condition,category,viewed,pflag,eflag,value
    global valueDate,comments,dateEntered,dataSource,cart,ordered
    global copies,location,keywords,book,author,running,color
    global track1,track2,track3,track4,track5
    global track6,track7,track8,track9,track10
    global track11,track12,track13,track14,track15
    global track16,track17,track18,track19,track20
    global user1,user2,user3,user4,user5,user6,user7,user8,user9,user10
    global usedprice,usedcount,collectibleprice,collectiblecount
    global newprice,newcount,listprice,salesrank,available
    global buyerwaiting,editionNumber,image,fullDateFormat,source

    
    # Defaults
    pflag               = "Y"
    eflag               = "Y"
    wide                = "N"
    closedCap           = "N"
    usedprice           = ""
    usedcount           = ""
    collectibleprice    = ""
    collectiblecount    = ""
    newprice            = ""
    newcount            = ""
    listprice           = ""
    salesrank           = ""
    available           = "Y"
    buyerwaiting        = "N"


    i = string.find(source, "sai-identify")

    if i != -1:
        source = searchForPlus(source, "sai-identify")
        i = string.find(source, "/ASIN/")
        source = source[i-50:]
        source = searchForPlus(source, "<a href=")
        i = string.find(source, ">")
        url = stripText(source[0:i])
        http = HTTPConnection()
        http.resetReferer();
        http.blockForLoad();
        source = http.getContents(url)


    i = string.find(source, "il ne peut pas tre vendu sur Amazon Marketplace")

    if (i != -1):
        # Not found
        return;


    # Find Format (DVD, VHS)
    i = string.find(source, "<meta name=\"description\"")
    if i != -1:
      format = "VHS"
      source = searchForPlus(source,"<meta name=\"description\"")
      i = string.find(source, "/>")
      i = string.find(stripText(source[0:i]), "DVD")
      if i != -1:
        format = "DVD"
        
    

    # Find Image (first try)
    image = ""
    i = string.find(source, "registerImage(\"original_image\"")

    if i != -1:
        source = searchForPlus(source, "registerImage(\"original_image\"")
        source = searchForPlus(source, "\"")
        i = string.find(source, "\"")
        image = stripText(source[0:i])

        i = string.find(image, "no-image")

        if i != -1:
            image = ""

        i = string.find(image, ",")

        if i != -1:
            j = string.rfind(image[i:], ",")
            image = stripText(image[0:i] + image[i+j+1:])

        i = string.find(image, "_AA")

        if i != -1:
            image = stripText(image[0:i] + "_SL" + image[i+3:])


    # Find Title
    source = searchForPlus(source, "<b class=\"sans\">")
    source = searchForPlus(source, "\"btAsinTitle\"")
    source = searchForPlus(source, ">")
    i = string.find(source, "<")
    title = stripText(source[0:i])
 
    # Find Price
    i = string.find(source, "<b class=\"price\"")
    if (i != -1):
      source = searchForPlus(source, "<b class=\"price\">")
      source = searchForPlus(source, "EUR ")
      i = string.find(source, "</b>")
      value = stripText(source[0:i])     
    
    i = string.find(source, "tails sur le produit")
    if i != -1:
        source = searchForPlus(source, "tails sur le produit")
    
    # Find Actors
    tag = '>Acteurs'
    i = string.find(source, tag)
    if (i != -1):
        source = searchForPlus(source, tag)
        i = string.find(source, "</li>")
        actors = stripText(source[0:i] + ">, <")
        actorList = []
        # color = actors

        while (searchFor(actors, ", ") != None):
            i = string.find(actors, "<a href")

            if i != -1:
                actors = searchForPlus(actors, "<a href")
                actors = searchForPlus(actors, ">")
                i = string.find(actors, "<")
                actor = stripText(actors[0:i])
                i = string.rfind(actor, " ")
                if (i != -1):
                    actor = stripText(actor[i:]) + ", " + stripText(actor[0:i])

                actorList.append(actor)

            actors = searchForPlus(actors, ", ")

        if len(actorList) > 0:
            actor1 = actorList[0]

        if len(actorList) > 1:
            actor2 = actorList[1]

        if len(actorList) > 2:
            actor3 = actorList[2]

        if len(actorList) > 3:
            actor4 = actorList[3]

        if len(actorList) > 4:
            actor5 = actorList[4]

        if len(actorList) > 5:
            actor6 = actorList[5]

        if len(actorList) > 6:
            actor7 = actorList[6]

        if len(actorList) > 7:
            actor8 = actorList[7]

        if len(actorList) > 8:
            actor9 = actorList[8]

        if len(actorList) > 9:
            actor10 = actorList[9]
            
        source = searchForPlus(source, "</li>")
    # Find Director
    tag = ">Ralisateurs"
    i = string.find(source, tag)
    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "<a href")
        source = searchForPlus(source, ">")
        i = string.find(source, "<")
        director = stripText(source[0:i])
        i = string.rfind(director, " ")
        if (i != -1):
           director = stripText(director[i:]) + ", " + stripText(director[0:i])
    
    #Find Studio
    tag = ">Studio"
    i = string.find(source, tag)
    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "</b>")
        i = string.find(source, "</li>")
        studio = stripText(source[0:i])
    
    #Find Date
    tag = ">Date de sortie"
    i = string.find(source, tag)
    if i != -1:
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "</b>")
        i = string.find(source, "</li>")
        date = stripText(source[0:i])
           
    # Find Comments
    tag = ">Descriptions du produit<"
    i = string.find(source, tag)

    if i != -1:
        comments = ""
        source = searchForPlus(source, tag)
        source = searchForPlus(source, "<div")
        source = searchForPlus(source, ">")
        i = string.find(source, "</div>")
        tempcomments = source[0:i]

        i = string.find(tempcomments, "</form>")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<hr noshade")

        if (i != -1):
            tempcomments = tempcomments[0:i]

        i = string.find(tempcomments, "<font")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<font")

        i = string.find(tempcomments, "<span")

        if (i != -1):
            tempcomments = searchForPlus(tempcomments, "<span")

        tempcomments = searchForPlus(tempcomments, ">")
        i = string.find(tempcomments, "<span")

        if (i != -1):
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        tempcomments = string.replace(tempcomments, "\n", "")

        while (searchFor(tempcomments, "  ") != None):
            i = string.find(tempcomments, "  ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<p> ") != None):
            i = string.find(tempcomments, "<p> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<P> ") != None):
            i = string.find(tempcomments, "<P> ")
            tempcomments = tempcomments[0:i+3] + tempcomments[i+4:]

        while (searchFor(tempcomments, "<br> ") != None):
            i = string.find(tempcomments, "<br> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        while (searchFor(tempcomments, "<BR> ") != None):
            i = string.find(tempcomments, "<BR> ")
            tempcomments = tempcomments[0:i+4] + tempcomments[i+5:]

        tempcomments = string.replace(tempcomments, "</a>", "")
        tempcomments = string.replace(tempcomments, "</A>", "")
        tempcomments = string.replace(tempcomments, "<p>", "\n\n")
        tempcomments = string.replace(tempcomments, "<P>", "\n\n")
        tempcomments = string.replace(tempcomments, "<BR>", "\n")
        tempcomments = string.replace(tempcomments, "<br>", "\n")
        tempcomments = string.replace(tempcomments, "<br />", "\n")
        tempcomments = string.replace(tempcomments, "<i>", "")
        tempcomments = string.replace(tempcomments, "</i>", "")
        tempcomments = string.replace(tempcomments, "<I>", "")
        tempcomments = string.replace(tempcomments, "</I>", "")
        tempcomments = string.replace(tempcomments, "<b>", "")
        tempcomments = string.replace(tempcomments, "</b>", "")
        tempcomments = string.replace(tempcomments, "<em>", "")
        tempcomments = string.replace(tempcomments, "</em>", "")
        tempcomments = string.replace(tempcomments, "</font>", "")
        tempcomments = string.replace(tempcomments, "</span>", "")
        tempcomments = string.replace(tempcomments, "&copy;", "")
        tempcomments = string.replace(tempcomments, "&#145;", "'")
        tempcomments = string.replace(tempcomments, "&#169;", "")
        tempcomments = string.replace(tempcomments, "&#8217;", "'")
        tempcomments = string.replace(tempcomments, "&quot;", "\"")
        tempcomments = string.replace(tempcomments, "&#8211;", "-")
        tempcomments = string.replace(tempcomments, "&#146;", "\'")
        tempcomments = string.replace(tempcomments, "&#x9C;", "&#156;")
        tempcomments = string.replace(tempcomments, "&#xC9;", "&#201;")
        tempcomments = string.replace(tempcomments, "&#xE0;", "&#224;")
        tempcomments = string.replace(tempcomments, "&#xEE;", "&#238;")
        tempcomments = string.replace(tempcomments, "&#xEF;", "&#239;")
        tempcomments = string.replace(tempcomments, "&#xE7;", "&#231;")
        tempcomments = string.replace(tempcomments, "&#xE8;", "&#232;")
        tempcomments = string.replace(tempcomments, "&#xE9;", "&#233;")
        tempcomments = string.replace(tempcomments, "&#xEA;", "&#234;")
        tempcomments = string.replace(tempcomments, "&#xF4;", "&#244;")
        tempcomments = string.replace(tempcomments, "&#xF9;", "&#249;")
        tempcomments = string.replace(tempcomments, "&#xFB;", "&#251;")
        tempcomments = string.replace(tempcomments, "&#xE2;", "&#226;")
        tempcomments = string.replace(tempcomments, "<blockquote>", "")
        tempcomments = string.replace(tempcomments, "</blockquote>", "")
        tempcomments = string.replace(tempcomments, "<li>", "")
        tempcomments = string.replace(tempcomments, "</li>", "")

        while (searchFor(tempcomments, "\n ") != None):
            i = string.find(tempcomments, "\n ")
            tempcomments = tempcomments[0:i+1] + tempcomments[i+2:]

        while (searchFor(tempcomments, "<a ") != None):
            i = string.find(tempcomments, "<a ")
            j = string.find(tempcomments[i:], ">")

            if j == -1:
                j = 2

            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<A ") != None):
            i = string.find(tempcomments, "<A ")
            j = string.find(tempcomments[i:], ">")
            tempcomments = stripText(tempcomments[0:i] + tempcomments[i+j+1:])

        while (searchFor(tempcomments, "<img src") != None):
            i = string.find(tempcomments, "<img src")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        while (searchFor(tempcomments, "<span") != None):
            i = string.find(tempcomments, "<span")
            j = string.find(tempcomments[i:], ">")
            tempcomments = tempcomments[0:i] + tempcomments[i+j+1:]

        tempcomments = stripText(tempcomments);
        if tempcomments.endswith("See all reviews") == 1:
            i = string.rfind(tempcomments, "See all reviews")
            tempcomments = stripText(tempcomments[0:i])

        comments = tempcomments



try:
    extract()
finally:
    if os.path.exists("scrapers/vwuserexit.py"):
        execfile("scrapers/vwuserexit.py") in globals()
