-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScraperClass.py
More file actions
executable file
·96 lines (79 loc) · 3.64 KB
/
ScraperClass.py
File metadata and controls
executable file
·96 lines (79 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class Scraper:
def __init__ (self):
gox = None
def Scrape_gox_trades(self,tidoverride=None):
#PUll in trade history of a currency pair
#Will only pull 1000 rows at a time
from MtgoxClass import Mtgox
import json, time, MySQLdb, requests, boto.ses, socket
import ConfigParser
config = ConfigParser.ConfigParser()
config.read('config.cfg')
gox = Mtgox() #create instance of mtgox obj
#load our SES API public/private Key
key = [config.get('ses','key'),config.get('ses','secretkey')]
#load mysql db credentials
hostname = socket.gethostname()
if hostname.find(config.get('mysql','remotehostname'))>=0: #IF we are on the server, use localhost.
host = 'localhost'
else:
host = config.get('mysql','host') #If we are remote use the server name
dbuser = config.get('mysql','dbuser')
dbpwd = config.get('mysql','dbpwd')
dbname = config.get('mysql','dbname')
db=MySQLdb.connect(host=host,user=dbuser,passwd=dbpwd,db=dbname)
if tidoverride:
maxtid=[tidoverride]
else:
stmnt = 'select max(tid) from mtgoxUSD'
with db:
c = db.cursor()
c.execute(stmnt)
maxtid = c.fetchone()
time_since = maxtid[0] #time in seconds
#gox format is microtime which they call a TID, must be an int for the URLENCODE to work properly
time_since_gox = int(time_since)
#print 'time since: ', str(time.strftime("%m/%d/%Y %H:%M:%S", time.localtime(time_since)))
j = None
i = 0
while j == None: #Keep retrying until server responds...
r = gox.auth('BTCUSD/money/trades/fetch',{'since':str(time_since_gox)})
#TEST BAD REQUEST: r = requests.get('http://httpbin.org/status/404')
#print json.dumps(r.json(), sort_keys = True, indent=4, separators=(',', ': '))
if r.status_code == requests.codes.ok:
j = r.json()
i = 0
elif i > 10:
conn = boto.ses.connect_to_region('us-east-1',aws_access_key_id=key[0],aws_secret_access_key=key[1])
message = 'Server status code - %s\nServer Response Content - %s' % (r.status_code,r.text)
sender = config.get('ses','sender')
recip = config.get('ses','recipient')
subj = 'MtGox Scraper ERROR'
conn.send_email(sender,subj,message,[recip])
print 'sent error email to %s' % (recip)
r.raise_for_status()
else:
time.sleep(10) #sleep 10 seconds between retries
i = i + 1
x=0
for item in j['data']:
#print item['date'],item['price'],item['amount'],item['price_int'],item['amount_int'],item['tid'],item['price_currency'],item['item'],item['trade_type'],item['primary'],item['properties']
data = [item['date'],item['price'],item['amount'],item['price_int'],item['amount_int'],item['tid'],item['price_currency'],item['item'],item['trade_type'],item['primary'],item['properties']]
#In case of TID OVERRIDE Check for existing rows in mySQL to prevent Duplicate Insert Errors
if tidoverride:
with db:
c = db.cursor()
c.execute('select tid from mtgoxUSD where tid = %s',data[5])
existingtid = c.fetchone()
#IF we have not specified a TIDoverride, then run the simple duplicate check instead of triggering extra mySQL calls
if (tidoverride == None and data[5] <= time_since) or (tidoverride != None and existingtid!=None): #data[5] <= time_since:
#prevent insertion of duplicates just in case Mtgox starts processing trades with duplicate TID's
print 'Duplicate TID Found - insert aborted ', data[5]
else:
with db:
c = db.cursor()
c.execute('insert into mtgoxUSD values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,''NULL'')',data)
#c.execute("""SELECT * FROM mtgoxUSD order by date desc LIMIT %s""",(limit,))
x+=1
print 'data inserted! row: %s TID: %s' % (x,data[5])
return x