From 90131c21245b7d2d2453a63f6f98a2b9b74c2f1f Mon Sep 17 00:00:00 2001 From: Saul Date: Mon, 21 Jul 2014 16:48:28 -0400 Subject: [PATCH 1/6] pep8 formatting corrections for python version --- pyforget/distribution.py | 40 +++++++++++++++++++++------------------- pyforget/forget_table.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/pyforget/distribution.py b/pyforget/distribution.py index 78e2819..5240ed1 100644 --- a/pyforget/distribution.py +++ b/pyforget/distribution.py @@ -1,14 +1,15 @@ import numpy as np -import logging import time import redis + r = redis.StrictRedis( 'localhost', port=6379, db=2 ) + def interleave_izip(*iterables): # interleave_izip('ABCD', 'xy') --> A x B y iterators = map(iter, iterables) @@ -16,8 +17,10 @@ def interleave_izip(*iterables): for i in iterators: yield i.next() + class Distribution(object): - def __init__(self,k): + + def __init__(self, k): self.k = k def decay(self, rate=0.02): @@ -25,36 +28,36 @@ def decay(self, rate=0.02): returns the amount to decay each bin by """ t = int(time.time()) - tau = t-self.last_updated + tau = t - self.last_updated rates = [v * rate * tau for v in self.values] y = np.random.poisson(rates) - return y,t + return y, t - def incr(self,bin): + def incr(self, bin): """ on an event, update the sorted set and the normalizing constant """ r.zincrby(self.k, bin) - a = r.incr(self.k+"_z") + a = r.incr(self.k + "_z") if a == 1: - # this catches the situtation where we've never seen the + # this catches the situtation where we've never seen the # the key before, setting t to the time of the initial write - r.set(self.k+'_t', int(time.time())) + r.set(self.k + '_t', int(time.time())) def __str__(self): - return str(dict(zip(self.keys,self.values))) + return str(dict(zip(self.keys, self.values))) def decrement(self): # check this distribution exists to decrement if not r.exists(self.k): raise KeyError('Cannot find distribution in Redis') # get the currently stored data - self.keys, self.values = zip(*r.zrevrange(self.k,0,-1,withscores=True)) - self.z = r.get(self.k+"_z") + self.keys, self.values = zip(*r.zrevrange(self.k, 0, -1, withscores=True)) + self.z = r.get(self.k + "_z") self.n = len(self.values) - self.last_updated = int(r.get(self.k+"_t")) + self.last_updated = int(r.get(self.k + "_t")) # get the amount to decay by - y,t = self.decay() + y, t = self.decay() # decay values by y self.values -= y self.values[self.values <= 0] = 1 @@ -62,11 +65,11 @@ def decrement(self): self.z = int(self.values.sum()) # build multi call pipeline = r.pipeline() - pipeline.watch(self.k, self.k+'_t', self.k+'_z') + pipeline.watch(self.k, self.k + '_t', self.k + '_z') pipeline.multi() - pipeline.zadd(self.k, *interleave_izip(self.values, self.keys)) - pipeline.set(self.k+'_t', t) - pipeline.set(self.k+'_z', self.z) + pipeline.zadd(self.k, *interleave_izip(self.values, self.keys)) + pipeline.set(self.k + '_t', t) + pipeline.set(self.k + '_z', self.z) try: # try to excute pipeline.execute() @@ -75,7 +78,7 @@ def decrement(self): def get_dist(self): self.decrement() - normalised = dict([(k, v/self.z) for k,v in zip(self.keys, self.values)]) + normalised = dict([(k, v / self.z) for k, v in zip(self.keys, self.values)]) return normalised def get_bin(self, bin): @@ -85,4 +88,3 @@ def get_bin(self, bin): except ValueError: raise ValueError('bin not in distribution') return out - diff --git a/pyforget/forget_table.py b/pyforget/forget_table.py index 147520c..f0369ab 100644 --- a/pyforget/forget_table.py +++ b/pyforget/forget_table.py @@ -2,14 +2,17 @@ import tornado.web import tornado.httpserver import tornado.ioloop + from distribution import Distribution + class Application(tornado.web.Application): + def __init__(self): app_settings = { 'debug': True, - "autoescape" : None, + "autoescape": None, } handlers = [ @@ -20,60 +23,69 @@ def __init__(self): ] tornado.web.Application.__init__(self, handlers, **app_settings) + class PingHandler(tornado.web.RequestHandler): + def get(self): self.finish('OK') + def head(self): self.finish('OK') + class IncrHandler(tornado.web.RequestHandler): + def get(self): key = self.get_argument('key') bin = self.get_argument('bin') Distribution(key).incr(bin) + class GetHandler(tornado.web.RequestHandler): + def get(self): key = self.get_argument('key') bin = self.get_argument('bin') try: self.finish({ - "status_code":200, - "data":[{ + "status_code": 200, + "data": [{ "bin": bin, "probability": Distribution(key).get_bin(bin) }] }) except ValueError: self.finish({ - "status_code":404, - "data":[], + "status_code": 404, + "data": [], "error_message": "Could not find bin in distribution" }) except KeyError: self.finish({ - "status_code":404, - "data":[], + "status_code": 404, + "data": [], "error_message": "Could not find distribution in Forget Table" }) + class DistHandler(tornado.web.RequestHandler): + def get(self): key = self.get_argument('key') try: dist = Distribution(key).get_dist() except KeyError: return self.finish({ - "status_code":404, - "data":[], + "status_code": 404, + "data": [], "error_message": "Could not find distribution in Forget Table" }) return self.finish({ - "status_code":200, - "data":[{ - "bin":key, - "probability":value - } for key,value in dist.iteritems()] + "status_code": 200, + "data": [{ + "bin": key, + "probability": value + } for key, value in dist.iteritems()] }) if __name__ == "__main__": From 4543c4711782fab0f60bc165d55b3ab0589d03cf Mon Sep 17 00:00:00 2001 From: Saul Date: Mon, 21 Jul 2014 17:00:09 -0400 Subject: [PATCH 2/6] allow using custom redis backend for python version --- pyforget/distribution.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pyforget/distribution.py b/pyforget/distribution.py index 5240ed1..04ec02c 100644 --- a/pyforget/distribution.py +++ b/pyforget/distribution.py @@ -20,9 +20,12 @@ def interleave_izip(*iterables): class Distribution(object): - def __init__(self, k): + def __init__(self, k, redis=None): self.k = k + if not redis: + self.redis = r + def decay(self, rate=0.02): """ returns the amount to decay each bin by @@ -37,25 +40,25 @@ def incr(self, bin): """ on an event, update the sorted set and the normalizing constant """ - r.zincrby(self.k, bin) - a = r.incr(self.k + "_z") + self.redis.zincrby(self.k, bin) + a = self.redis.incr(self.k + "_z") if a == 1: # this catches the situtation where we've never seen the # the key before, setting t to the time of the initial write - r.set(self.k + '_t', int(time.time())) + self.redis.set(self.k + '_t', int(time.time())) def __str__(self): return str(dict(zip(self.keys, self.values))) def decrement(self): # check this distribution exists to decrement - if not r.exists(self.k): + if not self.redis.exists(self.k): raise KeyError('Cannot find distribution in Redis') # get the currently stored data - self.keys, self.values = zip(*r.zrevrange(self.k, 0, -1, withscores=True)) - self.z = r.get(self.k + "_z") + self.keys, self.values = zip(*self.redis.zrevrange(self.k, 0, -1, withscores=True)) + self.z = self.redis.get(self.k + "_z") self.n = len(self.values) - self.last_updated = int(r.get(self.k + "_t")) + self.last_updated = int(self.redis.get(self.k + "_t")) # get the amount to decay by y, t = self.decay() # decay values by y @@ -64,7 +67,7 @@ def decrement(self): # normalizing constant self.z = int(self.values.sum()) # build multi call - pipeline = r.pipeline() + pipeline = self.redis.pipeline() pipeline.watch(self.k, self.k + '_t', self.k + '_z') pipeline.multi() pipeline.zadd(self.k, *interleave_izip(self.values, self.keys)) From b4b4b18c1968001495573d39827f1cdf04117093 Mon Sep 17 00:00:00 2001 From: Saul Date: Mon, 21 Jul 2014 17:03:40 -0400 Subject: [PATCH 3/6] allow custom rate in python backend --- pyforget/distribution.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyforget/distribution.py b/pyforget/distribution.py index 04ec02c..78efe86 100644 --- a/pyforget/distribution.py +++ b/pyforget/distribution.py @@ -20,19 +20,19 @@ def interleave_izip(*iterables): class Distribution(object): - def __init__(self, k, redis=None): + def __init__(self, k, redis=None, rate=0.02): self.k = k - + self.rate = rate if not redis: self.redis = r - def decay(self, rate=0.02): + def decay(self): """ returns the amount to decay each bin by """ t = int(time.time()) tau = t - self.last_updated - rates = [v * rate * tau for v in self.values] + rates = [v * self.rate * tau for v in self.values] y = np.random.poisson(rates) return y, t From 986249fb8331a0c26bbf1c2ee7171ae9d603bdac Mon Sep 17 00:00:00 2001 From: Saul Date: Tue, 22 Jul 2014 10:20:03 -0400 Subject: [PATCH 4/6] Make pyforget a python package --- pyforget/MANIFEST.in | 1 + pyforget/forgettable/__init__.py | 0 pyforget/{ => forgettable}/distribution.py | 19 ++++++++-------- .../server.py} | 9 ++++++-- pyforget/readme.md | 5 ++++- pyforget/setup.py | 22 +++++++++++++++++++ 6 files changed, 43 insertions(+), 13 deletions(-) create mode 100644 pyforget/MANIFEST.in create mode 100644 pyforget/forgettable/__init__.py rename pyforget/{ => forgettable}/distribution.py (89%) rename pyforget/{forget_table.py => forgettable/server.py} (97%) create mode 100644 pyforget/setup.py diff --git a/pyforget/MANIFEST.in b/pyforget/MANIFEST.in new file mode 100644 index 0000000..1a69f4c --- /dev/null +++ b/pyforget/MANIFEST.in @@ -0,0 +1 @@ +include readme.md diff --git a/pyforget/forgettable/__init__.py b/pyforget/forgettable/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyforget/distribution.py b/pyforget/forgettable/distribution.py similarity index 89% rename from pyforget/distribution.py rename to pyforget/forgettable/distribution.py index 78efe86..e13d9a3 100644 --- a/pyforget/distribution.py +++ b/pyforget/forgettable/distribution.py @@ -1,13 +1,7 @@ import numpy as np import time import redis - - -r = redis.StrictRedis( - 'localhost', - port=6379, - db=2 -) +import os def interleave_izip(*iterables): @@ -20,11 +14,16 @@ def interleave_izip(*iterables): class Distribution(object): - def __init__(self, k, redis=None, rate=0.02): + def __init__(self, k, redis_client=None, rate=0.02): self.k = k self.rate = rate - if not redis: - self.redis = r + if not redis_client: + redis_client = redis.StrictRedis( + os.environ['REDIS_1_PORT_6379_TCP_ADDR'], + port=6379, + db=1 + ) + self.redis = redis_client def decay(self): """ diff --git a/pyforget/forget_table.py b/pyforget/forgettable/server.py similarity index 97% rename from pyforget/forget_table.py rename to pyforget/forgettable/server.py index f0369ab..d640acb 100644 --- a/pyforget/forget_table.py +++ b/pyforget/forgettable/server.py @@ -3,7 +3,7 @@ import tornado.httpserver import tornado.ioloop -from distribution import Distribution +from .distribution import Distribution class Application(tornado.web.Application): @@ -88,9 +88,14 @@ def get(self): } for key, value in dist.iteritems()] }) -if __name__ == "__main__": + +def main(): tornado.options.define("port", default=8000, help="Listen on port", type=int) tornado.options.parse_command_line() http_server = tornado.httpserver.HTTPServer(request_callback=Application()) http_server.listen(tornado.options.options.port, address="0.0.0.0") tornado.ioloop.IOLoop.instance().start() + + +if __name__ == "__main__": + main() diff --git a/pyforget/readme.md b/pyforget/readme.md index c1235c1..bf2f316 100644 --- a/pyforget/readme.md +++ b/pyforget/readme.md @@ -2,7 +2,10 @@ Written by [Mike Dewar](http://twitter.com/mikedewar) and [Micha Gorelick](http://micha.gd/). -To start the service run `python forget-table.py --port=8080` which will start the wrapper. Note that you will need a Redis database running locally on port 6379. Forget Table will write into db 2 by default. +To install run `pip install forgettable`. +For the development version run `pip install -e git+https://github.com/bitly/forgettable.git#egg=forgettable&subdirectory=pyforget` + +To start the service run `forgettable --port=8080` which will start the wrapper. Note that you will need a Redis database running locally on port 6379. Forget Table will write into db 2 by default. Upon recieving an event, to increment a bin in a distribution call diff --git a/pyforget/setup.py b/pyforget/setup.py new file mode 100644 index 0000000..d5a6a22 --- /dev/null +++ b/pyforget/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + + +setup( + name="forgettable", + version="0.0.0", + packages=find_packages(), + entry_points={ + 'console_scripts': [ + 'forgettable=forgettable.server:main' + ] + }, + install_requires=[ + 'tornado', + 'numpy', + 'redis', + ], + + long_description=open('readme.md').read(), + author="bitly", + url="https://github.com/bitly/forgettable/tree/master/pyforget", +) From 85aaa0583985b832d870cd44f248b056123d0dec Mon Sep 17 00:00:00 2001 From: Saul Date: Tue, 22 Jul 2014 10:47:55 -0400 Subject: [PATCH 5/6] dont include readme, breaks sometimes when i try to pip install this, sometimes it wont include the readme and so it wont install. i am not sure exactly why.... --- pyforget/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyforget/setup.py b/pyforget/setup.py index d5a6a22..10186d2 100644 --- a/pyforget/setup.py +++ b/pyforget/setup.py @@ -16,7 +16,7 @@ 'redis', ], - long_description=open('readme.md').read(), + #long_description=open('readme.md').read(), author="bitly", url="https://github.com/bitly/forgettable/tree/master/pyforget", ) From 2df1222f51c7fd65a3a2c8c85fd89b81d9d2d051 Mon Sep 17 00:00:00 2001 From: Saul Date: Wed, 23 Jul 2014 01:36:35 -0400 Subject: [PATCH 6/6] Change to published package --- .gitignore | 3 +++ pyforget/readme.md | 1 - pyforget/setup.py | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3cff359..36b4119 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ *.sw[op] *.rdb +pyforget/build/ +pyforget/dist/ +pyforget/*.egg-info/ diff --git a/pyforget/readme.md b/pyforget/readme.md index bf2f316..3035858 100644 --- a/pyforget/readme.md +++ b/pyforget/readme.md @@ -3,7 +3,6 @@ Written by [Mike Dewar](http://twitter.com/mikedewar) and [Micha Gorelick](http://micha.gd/). To install run `pip install forgettable`. -For the development version run `pip install -e git+https://github.com/bitly/forgettable.git#egg=forgettable&subdirectory=pyforget` To start the service run `forgettable --port=8080` which will start the wrapper. Note that you will need a Redis database running locally on port 6379. Forget Table will write into db 2 by default. diff --git a/pyforget/setup.py b/pyforget/setup.py index 10186d2..9de8766 100644 --- a/pyforget/setup.py +++ b/pyforget/setup.py @@ -3,7 +3,7 @@ setup( name="forgettable", - version="0.0.0", + version="0.1.0", packages=find_packages(), entry_points={ 'console_scripts': [ @@ -16,7 +16,8 @@ 'redis', ], - #long_description=open('readme.md').read(), - author="bitly", + long_description=open('readme.md').read(), url="https://github.com/bitly/forgettable/tree/master/pyforget", + maintainer="Saul Shanabrook", + maintainer_email="s.shanabrook@gmail.com", )