From d47d556bd7a479c434fe1fa06751003fdc84578b Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Sat, 28 Sep 2019 14:53:58 -0700 Subject: [PATCH 1/7] add heroku app for bundling events from api, filtering and posting on S3 --- events/fetch.py | 28 ++++++++++++++++++++++++++++ events/filter.py | 37 +++++++++++++++++++++++++++++++++++++ fetch-events.sh | 10 ++++++++++ requirements.txt | 2 ++ runtime.txt | 1 + 5 files changed, 78 insertions(+) create mode 100644 events/fetch.py create mode 100644 events/filter.py create mode 100755 fetch-events.sh create mode 100644 requirements.txt create mode 100644 runtime.txt diff --git a/events/fetch.py b/events/fetch.py new file mode 100644 index 0000000..1fab3fb --- /dev/null +++ b/events/fetch.py @@ -0,0 +1,28 @@ +import requests +import time +import json +import sys + +if len(sys.argv) < 2: + print('usage: fetch.py ') + exit() + +mobilize_api_events_url = 'https://api.mobilize.us/v1/organizations/1316/events?timeslot_start=gte_now&is_virtual=false&per_page=100' + +next = mobilize_api_events_url + +events = [] + +while next is not None: + + r = requests.get(next) + resp = r.json() + events += resp['data'] + next = resp['next'] + print('requested {} next {}'.format(r.url, next)) + # be nice to the api + time.sleep(5) + + +with open(sys.argv[1], 'w') as f: + f.write(json.dumps(events)) \ No newline at end of file diff --git a/events/filter.py b/events/filter.py new file mode 100644 index 0000000..e2f3aba --- /dev/null +++ b/events/filter.py @@ -0,0 +1,37 @@ +import json +import sys + +events = [] + +if len(sys.argv) < 3: + print("usage: filter.py ") + exit() + +with open(sys.argv[1]) as f: + events = json.loads(f.read()) + +slim_events = [] + +for e in events: + + # if the event doesnt have a location then we can skip it. + if 'location' not in e['location'] or 'latitude' not in e['location']['location'] or e['location']['location']['latitude'] is None: + continue + + # remove attributes we arent going to display. + del e['location']['congressional_district'] + del e['location']['state_leg_district'] + del e['location']['state_senate_district'] + + slim_events.append({ + 'browser_url': e['browser_url'], + 'location': e['location'], + 'title': e['title'], + 'timeslots': e['timeslots'][0:3], + 'timeslot_count': len(e['timeslots']), + 'featured_image_url': e['featured_image_url'], + 'event_type': e['event_type'] + }) + +with open(sys.argv[2], 'w') as f: + f.write(json.dumps(slim_events)) \ No newline at end of file diff --git a/fetch-events.sh b/fetch-events.sh new file mode 100755 index 0000000..ce4d7e8 --- /dev/null +++ b/fetch-events.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -ex + +python3 events/fetch.py all-events.json + +python3 events/filter.py all-events.json events.json + +aws s3 cp events.json $S3_PATH/events.json + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f710b3a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.22.0 +awscli==1.16.248 \ No newline at end of file diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..2af1a62 --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.7.3 \ No newline at end of file From 31fd42ce0d7ca7b76fc17d31ad833e7bcc91650c Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Sat, 28 Sep 2019 17:58:05 -0700 Subject: [PATCH 2/7] add app.json --- app.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 app.json diff --git a/app.json b/app.json new file mode 100644 index 0000000..912e17b --- /dev/null +++ b/app.json @@ -0,0 +1,5 @@ +{ + "name": "warren-events", + "description": "Bundles warren events to download in bulk", + "image": "heroku/python" + } \ No newline at end of file From 7335d86ac04ba8b7920f688d888df1cc0aee22a3 Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Sat, 28 Sep 2019 18:14:55 -0700 Subject: [PATCH 3/7] ignore some files that are confusing heroku --- .slugignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .slugignore diff --git a/.slugignore b/.slugignore new file mode 100644 index 0000000..fbd429e --- /dev/null +++ b/.slugignore @@ -0,0 +1,3 @@ +package.json +package-lock.json +yarn.lock \ No newline at end of file From f29d0c355a397bf6011686c7fb99360a6c4ede9c Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Sat, 28 Sep 2019 18:17:19 -0700 Subject: [PATCH 4/7] app.json didnt help --- app.json | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 app.json diff --git a/app.json b/app.json deleted file mode 100644 index 912e17b..0000000 --- a/app.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "name": "warren-events", - "description": "Bundles warren events to download in bulk", - "image": "heroku/python" - } \ No newline at end of file From 2ef32aa0dd027ac287c4c1d2920601131e2f9743 Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Sat, 28 Sep 2019 18:55:25 -0700 Subject: [PATCH 5/7] gzip json events file --- fetch-events.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fetch-events.sh b/fetch-events.sh index ce4d7e8..ec48eac 100755 --- a/fetch-events.sh +++ b/fetch-events.sh @@ -6,5 +6,7 @@ python3 events/fetch.py all-events.json python3 events/filter.py all-events.json events.json -aws s3 cp events.json $S3_PATH/events.json +gzip events.json + +aws s3 cp events.json.gz $S3_PATH/events.json --acl public-read --metadata "Content-Encoding=gzip,Content-Type=application/json" From 4ffa5686587408d71176fab7919eeb0791bcb8e9 Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Mon, 30 Sep 2019 12:15:43 -0700 Subject: [PATCH 6/7] properly set content-encoding and content-type --- fetch-events.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-events.sh b/fetch-events.sh index ec48eac..01b3115 100755 --- a/fetch-events.sh +++ b/fetch-events.sh @@ -8,5 +8,5 @@ python3 events/filter.py all-events.json events.json gzip events.json -aws s3 cp events.json.gz $S3_PATH/events.json --acl public-read --metadata "Content-Encoding=gzip,Content-Type=application/json" +aws s3 cp events.json.gz $S3_PATH/events.json --acl public-read --content-encoding gzip --content-type "application/json" From 6caffdc077373419a80c78ca1ed10239e0cde837 Mon Sep 17 00:00:00 2001 From: Mick Thompson Date: Fri, 4 Oct 2019 14:56:49 -0700 Subject: [PATCH 7/7] keep event ids --- events/filter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/events/filter.py b/events/filter.py index e2f3aba..dd13f70 100644 --- a/events/filter.py +++ b/events/filter.py @@ -24,6 +24,7 @@ del e['location']['state_senate_district'] slim_events.append({ + 'id': e['id'], 'browser_url': e['browser_url'], 'location': e['location'], 'title': e['title'],