#!/usr/bin/python

#
# Bid Monkey Engine
#
# pedram amini <pedram@redhive.com> <http://pedram.redhive.com>
#

import pycurl
import random
import re
import socket
import string
import StringIO
import sys
import urllib

# import exception handling class.
from monkeyx import *


class bid_monkey_engine:
    ############################################################################
    ### constructor
    ###
    ### initalizes internal variables, attempts to initialize databases and
    ### randomly selects a user agent to use from the user agents database.
    ###
    ### args:    path to user agent database, path to patterns database.
    ### raises:  monkeyx.LOAD on db load failure.
    ### returns: bid monkey engine object.
    ###
    def __init__(self, u, p, c):
        # declared variables and defaults.
        self.currency       = ""      # currency of item number.
        self.db_cookies     = c       # cookies database.
        self.db_patterns    = p       # patterns database.
        self.db_user_agents = u       # user agents database.
        self.ebay_key       = 0       # ebay key, discovered in review_bid().
        self.headers        = ""      # headers for currently chosen user agent.
        self.item_number    = 0       # item number to bid on.
        self.max_bid        = 0       # maximum bid to place on item number.
        self.password       = ""      # ebay password.
        self.password_enc   = ""      # ebay enoded password.
        self.patterns       = {}      # patterns loaded from patterns db.
        self.patterns_ver   = ""      # patterns database version.
        self.price          = 0       # current price of item number.
        self.quantity       = 1       # quantity of item number to bid on.
        self.safety         = 30      # safety time in seconds.
        self.signed_in      = 0       # flag whether or not we are signed in.
        self.title          = ""      # auction title for item number.
        self.tl_days        = 0       # days left.
        self.tl_hours       = 0       # hours left.
        self.tl_mins        = 0       # minutes left.
        self.tl_secs        = 0       # seconds left.
        self.ttl_secs       = 0       # total time left in seconds.
        self.user_agent     = ""      # currently chosen user agent.
        self.user_agents    = []      # list of user agents to choose from.
        self.username       = ""      # ebay username.

        # constants.
        self.author_name  = "pedram amini"
        self.author_email = "pedram@redhive.com"
        self.author_url   = "http://pedram.redhive.com"
        self.version      = "2.5.1"

        # load user agents and patterns databases.
        try:    self.load_db_user_agents()
        except: raise monkeyx(monkeyx.LOAD, self.db_user_agents)

        try:    self.load_db_patterns()
        except: raise monkeyx(monkeyx.LOAD, self.db_patterns)

        # select a user agent and set the appropriate headers.
        self.set_user_agent()

        # create and initialize a PyCURL object.
        self.curl = pycurl.Curl()
        self.data = StringIO.StringIO()

        self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
        self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        self.curl.setopt(pycurl.COOKIEFILE,     self.db_cookies)
        self.curl.setopt(pycurl.COOKIEJAR,      self.db_cookies)
        self.curl.setopt(pycurl.WRITEFUNCTION,  self.data.write)


    ############################################################################
    ### getters - for accessing internal variables.
    ###
    def get_author_name    (self): return str  (self.author_name)
    def get_author_email   (self): return str  (self.author_email)
    def get_author_url     (self): return str  (self.author_url)
    def get_currency       (self): return str  (self.currency)
    def get_db_user_agents (self): return str  (self.db_user_agents)
    def get_db_patterns    (self): return str  (self.db_patterns)
    def get_ebay_key       (self): return str  (self.ebay_key)
    def get_headers        (self): return str  (self.headers)
    def get_item_number    (self): return str  (self.item_number)
    def get_max_bid        (self): return float(self.max_bid)
    def get_password       (self): return str  (self.password)
    def get_password_enc   (self): return str  (self.password_enc)
    def get_patterns_ver   (self): return str  (self.patterns_ver)
    def get_price          (self): return float(self.price)
    def get_quantity       (self): return int  (self.quantity)
    def get_safety         (self): return int  (self.safety)
    def get_signed_in      (self): return int  (self.signed_in)
    def get_title          (self): return str  (self.title)
    def get_tl_days        (self): return int  (self.tl_days)
    def get_tl_hours       (self): return int  (self.tl_hours)
    def get_tl_mins        (self): return int  (self.tl_mins)
    def get_tl_secs        (self): return int  (self.tl_secs)
    def get_ttl_secs       (self): return int  (self.ttl_secs)
    def get_user_agent     (self): return str  (self.user_agent)
    def get_username       (self): return str  (self.username)
    def get_version        (self): return str  (self.version)


    ############################################################################
    ### load_db_patterns()
    ###
    ### loads the patterns database into the patterns dictionary. these patterns
    ### are abstracted into an easy to update file.
    ###
    ### raises: generic exception on failure.
    def load_db_patterns(self):
        # open the database as read-only.
        try:    db = file(self.db_patterns, 'r')
        except: raise Exception

        # initialize the patterns data structure thus enabling this function
        # to be re-entrant.
        self.patterns = {}

        for line in db.readlines():
            # extract the pattern database version.
            matches = re.search("Patterns Database (?P<version>.*)$", line)
            if matches:
                self.patterns_ver = matches.groupdict()['version']

            # ignore comments and blank lines.
            if len(line) == 0 or re.search("^[#|\n|\r]", line):
                continue

            # seperate the pattern name from the pattern.
            (name, pattern) = line.split(":", 1)

            # strip new lines (\n) and carriage returns (\r) from the pattern.
            pattern = pattern.replace("\r", "")
            pattern = pattern.replace("\n", "")

            # store the pattern name and actual pattern in a dictionary.
            self.patterns[name] = pattern

        db.close


    ############################################################################
    ### load_db_user_agents()
    ###
    ### loads the user agent database into a list of dictionaries that can be
    ### referenced by 'weight', 'description' or 'headers'.
    ###
    ### raises: generic exception on failure.
    ###
    def load_db_user_agents(self):
        # open the database as read-only.
        try:    db = file(self.db_user_agents, 'r')
        except: raise Exception

        # initialize the user agents data structure thus enabling this function
        # to be re-entrant
        self.user_agents = []

        for line in db.readlines():
            # ignore comments and blank lines.
            if len(line) == 0 or re.search("^[#|\n|\r]", line):
                continue

            # extract the weight, description, and headers. we store these in a
            # tuple (array) of dictionaries.
            # seperate the weight, description, and headers.
            (w, d, h) = line.split(":", 2)

            # string new lines (\n) and carriage returns (\r) from the header.
            h = h.replace("\r", "")
            h = h.replace("\n", "")

            # store the 3 fields in a dictionary.
            self.user_agents.append(dict([('weight',      int(w)),
                                          ('description', d),
                                          ('headers',     h)]))

        db.close


    ############################################################################
    ### load_url()
    ###
    ### args:    host, url.
    ### raises:  general exception on socket failure.
    ### returns: list of returned data.
    ###
    def load_url(self, host, url):
        data = []

        # generate the get request.
        get_request  = "GET %s HTTP/1.0\r\n" % url
        get_request += "Host: %s\r\n"        % host
        get_request += "\r\n"

        # evaluate carriage returns and new lines
        get_request = get_request.replace("\\r", "\r")
        get_request = get_request.replace("\\n", "\n")

        # create a socket, connect and retrieve data into a file.
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.connect((host, 80))
            s.send(get_request)
            sf = s.makefile()
        except:
            raise Exception

        # step through the retrieved data and store it for return.
        for line in sf.readlines():
            data.append(line)

        # cleanup.
        s.close()
        sf.close()

        # return the retrieved data.
        return data


    ############################################################################
    ### parse_max_bid()
    ###
    ### parses a maximum bid string into a floating point integer. useful for
    ### parsing value provided by user.
    ###
    ### args:    max bid as a human readable string.
    ### raises:  monkeyx.PARSING if unable to parse max bid.
    ### returns: max bid as floating point integer.
    ###
    def parse_max_bid(self, max_bid):
        try:
            max_bid = str("%.2f" % (float(max_bid)))
        except:
            raise monkeyx(monkeyx.PARSING, "max bid must be in numeric form")

        # load the patterns into temporary variables.
        s = self.patterns['parse_max_bid_search']
        r = self.patterns['parse_max_bid_replace']

        max_bid = re.sub(s, r, max_bid)
        max_bid = re.sub(",", "", max_bid)
        return max_bid


    ############################################################################
    ### renew()
    ###
    ### renews the patterns database file with a new copy from the web.
    ###
    ### args:   path to patterns database.
    ### raises: monkeyx.SOCKET if load_url() fails.
    ###         monkeyx.LOAD if pattern database reload fails.
    ###         generic exception on file i/o error.
    ###
    def renew(self):
        # this flag controls when we start writing.
        write_flag = False

        # retrieve a fresh patterns database from the web.
        try:    data = self.load_url("pedram.redhive.com", "/patterns.db")
        except: raise monkeyx(monkeyx.SOCKET, "renew()")

        # truncate and open the current patterns database for writing.
        try:    db = file(self.db_patterns, 'w+')
        except: raise Exception

        # write the new patterns database.
        for line in data:
            # we don't want to return http headers.
            # so we look for the first comment.
            if line.startswith("#"):
                write_flag = True

            # extract the pattern database version.
            matches = re.search("Patterns Database (?P<version>.*)$", line)
            if matches:
                self.patterns_ver = matches.groupdict()['version']

            # if write flag is enabled, write the current line to db.
            if write_flag:
                db.write(line)

        db.close()

        # reload the patterns database into memory.
        try:    self.load_db_patterns()
        except: raise monkeyx(monkeyx.LOAD, self.db_patterns)


    ############################################################################
    ### scrape_auction_end()
    ###
    ### determines how much time is left on an auction, how much the auction is
    ### currently at and what currency it is in. time left is stored by days,
    ### hours, minutes and seconds in appropriate member variables as well as
    ### total time in seconds in ttl_secs.
    ###
    ### requires: item_number.
    ### raises:   monkeyx.EXCPECTED if required member var's are uninitialized.
    ###           monkeyx.SOCKET if curl fails.
    ###           monkeyx.BIDDING if ebay returns an error.
    ###           monkeyx.PARSING if unable to parse data returned by ebay.
    ###
    def scrape_auction_end(self):
        flag          = ""
        time_left     = ""
        self.price    = 0
        self.tl_days  = 0
        self.tl_hours = 0
        self.tl_mins  = 0
        self.tl_secs  = 0

        # check for required variables.
        if not self.item_number: raise monkeyx(monkeyx.EXPECTED, "item_number")

        url  = self.patterns['url_auction_end']
        url += "&item=" + str(self.item_number)

        self.curl.setopt(pycurl.URL, url)

        try:    self.curl.perform()
        except: raise monkeyx(monkeyx.SOCKET, "scrape_auction_end()")

        # step through and parse the returned data line by line.
        for line in self.data.getvalue().split("\n"):
            # auction is already over.
            if re.search(self.patterns['auction_over'], line, re.I):
                raise monkeyx(monkeyx.BIDDING, "auction has ended")

            # item number could not be found.
            if re.search(self.patterns['item_not_found'], line, re.I):
                raise monkeyx(monkeyx.BIDDING, "item number not found")

            # extract and store the auction title if we don't already have it.
            if self.title == "":
                matches = re.search(self.patterns['auction_title'], line, re.I)
                if matches:
                    self.title = matches.groupdict()['title']

            # recording triggers.
            if re.search(self.patterns['time_left_flag'], line, re.I):
                flag = "time left"

            if re.search(self.patterns['price/currency_flag'], line, re.I):
                flag = "price/currency"

            # extract the "time left" data.
            if flag == "time left":
                if re.search(self.patterns['time_left'], line, re.I):
                    time_left = line
                    flag = ""

                    # we have all the information we need so stop recording.
                    if re.search(self.patterns['stop_recording'], line, re.I):
                        break

            # extract the current price and currency.
            if flag == "price/currency" and not self.price:
                # commas get in the way when prices are > $999.
                line = line.replace(",", "")

                matches = re.search(self.patterns['price/currency'], line, re.I)
                if matches:
                    self.currency = matches.groupdict()['currency']
                    self.price    = matches.groupdict()['price']
                    flag = ""

            # extract the days, hours, minutes, and seconds left on the auction.
            matches = re.search(self.patterns['days'], time_left, re.I)
            if matches:
                self.tl_days = int(matches.groupdict()['days'])

            matches = re.search(self.patterns['hours'], time_left, re.I)
            if matches:
                self.tl_hours = int(matches.groupdict()['hours'])

            matches = re.search(self.patterns['mins'], time_left, re.I)
            if matches:
                self.tl_mins = int(matches.groupdict()['mins'])

            matches = re.search(self.patterns['secs'], time_left, re.I)
            if matches:
                self.tl_secs = int(matches.groupdict()['secs'])

        # calculate the total time left on the auction in seconds.
        self.ttl_secs  = self.tl_days  * 86400
        self.ttl_secs += self.tl_hours * 3600
        self.ttl_secs += self.tl_mins  * 60
        self.ttl_secs += self.tl_secs

        # if we haven't successfully extracted the price at this point then
        # we have a problem.
        if self.price == 0:
            msg = "could not extract price information"
            raise monkeyx(monkeyx.PARSING, msg)

        # make sure that the user provided maximum  bid is greater then the
        # current price of the item.
        if float(self.max_bid) <= float(self.price):
            xmsg  = "maximum bid too low. bid more then "
            xmsg += self.currency
            xmsg += " "
            xmsg += self.price
            raise monkeyx(monkeyx.BIDDING, xmsg)


    ############################################################################
    ### scrape_place_bid()
    ###
    ### step 2 in bid placing process. places a bid of max_bid on item_number.
    ###
    ### requires: item_number, max_bid, ebay_key, username, password.
    ### raises:   monkeyx.EXCPECTED if required member var's are uninitialized.
    ###           monkeyx.SOCKET if curl fails.
    ###           monkeyx.BIDDING if ebay returns an error.
    ###
    def scrape_place_bid(self):
        # check for required variables.
        if not self.item_number: raise monkeyx(monkeyx.EXPECTED, "item_number")
        if not self.max_bid:     raise monkeyx(monkeyx.EXPECTED, "max_bid")
        if not self.ebay_key:    raise monkeyx(monkeyx.EXPECTED, "ebay_key")
        if not self.username:    raise monkeyx(monkeyx.EXPECTED, "username")
        if not self.password:    raise monkeyx(monkeyx.EXPECTED, "password")

        # prepare the post data for the login form.
        bid_form_seq = [
            ('MfcISAPICommand', 'MakeBid'),
            ('item',            self.item_number),
            ('maxbid',          self.max_bid),
            ('quant',           self.quantity),
            ('user',            self.username),
            ('pass',            self.password_enc),
            ('mode',            '1')]

        bid_form_data = urllib.urlencode(bid_form_seq)

        # login.
        self.curl.setopt(pycurl.POSTFIELDS, bid_form_data)
        self.curl.setopt(pycurl.URL, self.patterns['url_place_bid'])

        try:    self.curl.perform()
        except: raise monkeyx(monkeyx.SOCKET, "scrape_place_bid()")

        # step through and parse the returned data line by line.
        for line in self.data.getvalue().split("\n"):
            # failures:
            if re.search(self.patterns['outbid'], line, re.I):
                raise monkeyx(monkeyx.BIDDING, "you have been outbid")

            matches = re.search(self.patterns['low_bid_2'], line, re.I)
            if matches:
                xmsg  = "maximum bid too low. bid more then "
                xmsg += matches.groupdict()['price']
                raise monkeyx(monkeyx.BIDDING, xmsg)

            # success:
            if re.search(self.patterns['current_high_bidder'], line, re.I):
                break


    ############################################################################
    ### scrape_review_bid()
    ###
    ### step 1 in bid placing process. puts up an offer and checks for errors.
    ###
    ### requires: item_number, max_bid.
    ### raises:   monkeyx.EXCPECTED if required member var's are uninitialized.
    ###           monkeyx.SOCKET if curl fails.
    ###           monkeyx.BIDDING if ebay returns an error.
    ###           monkeyx.PARSING if ebay key could not be found
    ###
    def scrape_review_bid(self):
        # check for required variables.
        if not self.item_number: raise monkeyx(monkeyx.EXPECTED, "item_number")
        if not self.max_bid:     raise monkeyx(monkeyx.EXPECTED, "max_bid")

        # construct url and retrieve the page.
        url  = self.patterns['url_review_bid']
        url += "&item="   + str(self.item_number)
        url += "&maxbid=" + self.max_bid

        self.curl.setopt(pycurl.URL, url)

        try:    self.curl.perform()
        except: raise monkeyx(monkeyx.SOCKET, "scrape_review_bid()")

        # step through and parse the returned data line by line.
        for line in self.data.getvalue().split("\n"):
            matches = re.search(self.patterns['low_bid_1'], line, re.I)
            if matches:
                xmsg  = "maximum bid too low. bid more then "
                xmsg += matches.groupdict()['price']
                raise monkeyx(monkeyx.BIDDING, xmsg)

            # extract the ebay key.
            matches = re.search(self.patterns['ebay_key'], line, re.I)
            if matches:
                self.ebay_key = matches.groupdict()['key']

            # extract the encoded password.
            matches = re.search(self.patterns['pass_enc'], line, re.I)
            if matches:
                self.password_enc = matches.groupdict()['pass_enc']

        # if we haven't successfully extracted the ebay key or encoded password
        # then we can't continue.
        if not self.ebay_key:
            raise monkeyx(monkeyx.PARSING, "ebay key not found")

        if not self.password_enc:
            raise monkeyx(monkeyx.PARSING, "ebay encoded password not found")


    ############################################################################
    ### scrape_sign_in()
    ###
    ### attempt to sign into "my ebay". this routine is mainly used to determine
    ### if the supplied credentials are correct.
    ###
    ### requires: username, password.
    ### raises:   monkeyx.EXCPECTED if required member var's are uninitialized.
    ###           monkeyx.SOCKET if curl fails.
    ###           monkeyx.BIDDING if ebay returns an error.
    ###
    def scrape_sign_in(self):
        # check for required variables.
        if not self.username: raise monkeyx(monkeyx.EXPECTED, "username")
        if not self.password: raise monkeyx(monkeyx.EXPECTED, "password")

        # if we are already signed in, then return.
        if self.signed_in:
            return

        # go to the first login screen.
        self.curl.setopt(pycurl.URL, self.patterns['url_sign_in_1'])

        try:    self.curl.perform()
        except: raise monkeyx(monkeyx.SOCKET, "scrape_sign_in()")

        # prepare the post data for the login form.
        login_form_seq = [
            ('MfcISAPICommand',    'SignInWelcome'),
            ('siteid',             '0'),
            ('co_partnerId',       '2'),
            ('UsingSSL',           '1'),
            ('userid',             self.username),
            ('pass',               self.password),
            ('keepMeSignInOption', 'on')]

        login_form_data = urllib.urlencode(login_form_seq)

        # login.
        self.curl.setopt(pycurl.POSTFIELDS, login_form_data)
        self.curl.setopt(pycurl.URL, self.patterns['url_sign_in_2'])

        try:    self.curl.perform()
        except: raise monkeyx(monkeyx.SOCKET, "scrape_sign_in()")

        # step through and parse the returned data line by line.
        for line in self.data.getvalue().split("\n"):
            # success:
            if re.search(self.patterns['good_sign_in'], line, re.I):
                self.signed_in = 1
                return

        raise monkeyx(monkeyx.BIDDING, "invalid username or password")


    ############################################################################
    ### setters - for accessing internal variables.
    ###
    def set_currency    (self, value): self.currency    = value
    def set_headers     (self)       : self.set_user_agent()
    def set_item_number (self, value): self.item_number = value
    def set_max_bid     (self, value): self.max_bid     = value
    def set_password    (self, value): self.password    = value
    def set_price       (self, value): self.price       = value
    def set_quantity    (self, value): self.quantity    = value
    def set_safety      (self, value): self.safety      = value
    def set_title       (self, value): self.title       = value
    def set_tl_days     (self, value): self.tl_days     = value
    def set_tl_hours    (self, value): self.tl_hours    = value
    def set_tl_mins     (self, value): self.tl_mins     = value
    def set_tl_secs     (self, value): self.tl_secs     = value
    def set_username    (self, value): self.username    = value

    # randomly choose a user agent and update the internal variables.
    def set_user_agent(self):
        rand     = random.randint(0, 100)
        previous = 0
        current  = self.user_agents[0]['weight']

        for i in range(len(self.user_agents)):
            if previous <= rand <= current:
                self.headers    = self.user_agents[i]['headers']
                self.user_agent = self.user_agents[i]['description']
                break

            previous = current
            current += self.user_agents[i+1]['weight']