From ae6773fd9a0d952ecac2d338c90489bb4bfb0d4c Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Tue, 22 May 2012 13:10:15 +0200 Subject: [PATCH 1/6] adds some improvements to the original - use commandline arguments to search - pretty print a table of results - provide way for user to get magnet links --- pytpb/pytpb.py | 264 ++++++++++++++++++++++++++----------------------- 1 file changed, 140 insertions(+), 124 deletions(-) mode change 100644 => 100755 pytpb/pytpb.py diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py old mode 100644 new mode 100755 index 276c8d4..53a676d --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # encoding: utf-8 # This program is free software; you can redistribute it and/or modify @@ -16,6 +17,7 @@ # MA 02110-1301, USA. import datetime +import sys from urllib import quote_plus from urlparse import urljoin import urllib2 @@ -23,133 +25,147 @@ import lxml.html class SearchResultParser: - def __init__(self, html): - self.doc = lxml.html.parse(html).getroot() - - def parse(self): - row_data = [] - try: - table = self.doc.xpath('//*[@id="searchResult"]')[0] - rows = [row for row in table.iterchildren() if row.tag == 'tr'] - for row in rows: - columns = row.getchildren()[1:] - row_data.append(self.parse_row_columns(columns)) - except: - pass - return row_data - - def parse_row_columns(self, columns): - """Parse the columns of a table row. - - *Returns* - a dictionary with parsed data. - """ - data = {} - data["user_type"] = "standard" - for ele in columns[0].iterchildren(): - if ele.tag == 'div' and ele.get('class') == 'detName': - a = ele.find('a') - data["torrent_info_url"] = urljoin(ele.base, a.get('href')) - data["name"] = a.text_content() - elif ele.tag == 'a': - if ele.get('title') == "Download this torrent": - data["torrent_url"] = ele.get("href") - elif ele.get('title') == "Download this torrent using magnet": - data["magnet_url"] = ele.get("href") - elif ele[0].tag == 'img': - if ele[0].get('title') == "VIP": - data["user_type"] = "VIP" - elif ele[0].get('title') == "Trusted": - data["user_type"] = "trusted" - - elif ele.tag == 'font': - a = ele.find('a') - if a is None: - data['user'] = "Anonymous" - else: - data['user'] = urljoin(ele.base, a.get('href')) - data["uploaded_at"], data["size_of"] = self.process_datetime_string(ele.text_content()) - data['seeders'] = int(columns[1].text_content().strip()) - data['leechers'] = int(columns[2].text_content().strip()) - return data + def __init__(self, html): + self.doc = lxml.html.parse(html).getroot() + + def parse(self): + row_data = [] + try: + table = self.doc.xpath('//*[@id="searchResult"]')[0] + rows = [row for row in table.iterchildren() if row.tag == 'tr'] + for row in rows: + columns = row.getchildren()[1:] + row_data.append(self.parse_row_columns(columns)) + except: + pass + return row_data + + def parse_row_columns(self, columns): + """Parse the columns of a table row. + + *Returns* + a dictionary with parsed data. + """ + data = {} + data["user_type"] = "standard" + for ele in columns[0].iterchildren(): + if ele.tag == 'div' and ele.get('class') == 'detName': + a = ele.find('a') + data["torrent_info_url"] = urljoin(ele.base, a.get('href')) + data["name"] = a.text_content() + elif ele.tag == 'a': + if ele.get('title') == "Download this torrent": + data["torrent_url"] = ele.get("href") + elif ele.get('title') == "Download this torrent using magnet": + data["magnet_url"] = ele.get("href") + elif ele[0].tag == 'img': + if ele[0].get('title') == "VIP": + data["user_type"] = "VIP" + elif ele[0].get('title') == "Trusted": + data["user_type"] = "trusted" + + elif ele.tag == 'font': + a = ele.find('a') + if a is None: + data['user'] = "Anonymous" + else: + data['user'] = urljoin(ele.base, a.get('href')) + data["uploaded_at"], data["size_of"] = self.process_datetime_string(ele.text_content()) + data['seeders'] = int(columns[1].text_content().strip()) + data['leechers'] = int(columns[2].text_content().strip()) + return data - def process_datetime_string(self, string): - """Process the datetime string from a torrent upload. - - *Returns* - Tuple with (datetime, (size, unit)) - """ - def process_datetime(part): - if part.startswith("Today"): - h, m = part.split()[1].split(':') - return datetime.datetime.now().replace( - hour=int(h), minute=int(m)) - elif part.startswith("Y-day"): - h, m = part.split()[1].split(':') - d = datetime.datetime.now() - return d.replace( - hour=int(h), minute=int(m), - day=d.day-1 - ) - elif part.endswith("ago"): - amount, unit = part.split()[:2] - d = datetime.datetime.now() - if unit == "mins": - d = d.replace(minute=d.minute - int(amount)) - return d - else: - d = datetime.datetime.now() - if ':' in part: - current_date, current_time = part.split() - h, m = current_time.split(':') - month, day = current_date.split('-') - d = d.replace(hour=int(h), minute=int(m), month=int(month), day=int(day)) - else: - current_date, year = part.split() - month, day = current_date.split('-') - d = d.replace(year=int(year), month=int(month), day=int(day)) - return d - def process_size(part): - units = {'MiB':1048576, 'GiB': 1073741824} - size, unit = part.split()[1:] - size = float(size) * units[unit] - return int(size) - string = string.replace(u"\xa0", " ") - results = [x.strip() for x in string.split(',')] - date = process_datetime(' '.join(results[0].split()[1:])) - size = process_size(results[1]) - return (date, size) - + def process_datetime_string(self, string): + """Process the datetime string from a torrent upload. + + *Returns* + Tuple with (datetime, (size, unit)) + """ + def process_datetime(part): + if part.startswith("Today"): + h, m = part.split()[1].split(':') + return datetime.datetime.now().replace( + hour=int(h), minute=int(m)) + elif part.startswith("Y-day"): + h, m = part.split()[1].split(':') + d = datetime.datetime.now() + return d.replace( + hour=int(h), minute=int(m), + day=d.day-1 + ) + elif part.endswith("ago"): + amount, unit = part.split()[:2] + d = datetime.datetime.now() + if unit == "mins": + d = d.replace(minute=d.minute - int(amount)) + return d + else: + d = datetime.datetime.now() + if ':' in part: + current_date, current_time = part.split() + h, m = current_time.split(':') + month, day = current_date.split('-') + d = d.replace(hour=int(h), minute=int(m), month=int(month), day=int(day)) + else: + current_date, year = part.split() + month, day = current_date.split('-') + d = d.replace(year=int(year), month=int(month), day=int(day)) + return d + def process_size(part): + units = {'MiB':1048576, 'GiB': 1073741824} + size, unit = part.split()[1:] + size = float(size) * units[unit] + return int(size) + string = string.replace(u"\xa0", " ") + results = [x.strip() for x in string.split(',')] + date = process_datetime(' '.join(results[0].split()[1:])) + size = process_size(results[1]) + return (date, size) + class ThePirateBay: - """Api for the Pirate Bay""" + """Api for the Pirate Bay""" - name = 'The Pirate Bay' - - searchUrl = 'https://thepiratebay.org/search/%s/0/7/%d' - - def __init__(self): - pass - - def search(self, term, cat=None): - if not cat: - cat = 0 - url = self.searchUrl % (quote_plus(term), cat) - - req = urllib2.Request(url) - html = urllib2.urlopen(req) - parser = SearchResultParser(html) - return parser.parse() + name = 'The Pirate Bay' + + searchUrl = 'https://thepiratebay.org/search/%s/0/7/%d' + + def __init__(self): + pass + + def search(self, term, cat=None): + if not cat: + cat = 0 + url = self.searchUrl % (quote_plus(term), cat) + + req = urllib2.Request(url) + html = urllib2.urlopen(req) + parser = SearchResultParser(html) + return parser.parse() if __name__ == '__main__': - def prettySize(size): - suffixes = [("B",2**10), ("K",2**20), ("M",2**30), ("G",2**40), ("T",2**50)] - for suf, lim in suffixes: - if size > lim: - continue - else: - return round(size/float(lim/2**10),2).__str__()+suf - t = ThePirateBay() - for t in t.search('the walking dead'): - print t['name'] + ' ' +str(t['size_of'])+ '////' + str(prettySize(t['size_of'])) - + def prettySize(size): + suffixes = [("B",2**10), ("K",2**20), ("M",2**30), ("G",2**40), ("T",2**50)] + for suf, lim in suffixes: + if size > lim: + continue + else: + return round(size/float(lim/2**10),2).__str__()+suf + t = ThePirateBay() + if sys.argv[1:]: + term = ' '.join(sys.argv[1:]) + else: + term = 'the walking dead' + print 'Searching for "{0}"'.format(term) + + torrents = t.search(term) + maxlen = max(len(x['name']) for x in torrents) + 3 + torrents = sorted(torrents, key=lambda x : int(x['seeders'])) + for i, t in enumerate(t.search(term)): + print '{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, maxlen=maxlen, name=t['name'], size='(' + prettySize(t['size_of']) + ')', seeders=t['seeders']) + + for num in raw_input("Please provide a comma separated list of magnet links you want to print: ").split(','): + try: + print int(num), '\n', torrents[int(num)]['magnet_url'] + except: + continue From 06ed6b60e908b7fed86403c82611334c491c9884 Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Tue, 22 May 2012 13:46:42 +0200 Subject: [PATCH 2/6] adds slightly nicer printing --- pytpb/pytpb.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py index 53a676d..cf44974 100755 --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -164,8 +164,10 @@ def prettySize(size): for i, t in enumerate(t.search(term)): print '{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, maxlen=maxlen, name=t['name'], size='(' + prettySize(t['size_of']) + ')', seeders=t['seeders']) - for num in raw_input("Please provide a comma separated list of magnet links you want to print: ").split(','): + for num in raw_input("Please provide a comma separated list of torrents you want to get: ").split(','): try: - print int(num), '\n', torrents[int(num)]['magnet_url'] - except: - continue + t = torrents[int(num)] + print num, ':', t['magnet_url'] + except Exception, e: + print 'Something went wrong:', e + sys.exit() From 4e9218ce603dc10adbf8ee28d9221eec1b0cdb29 Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Tue, 22 May 2012 14:29:46 +0200 Subject: [PATCH 3/6] adds torrent file creation capability (tested with rtorrent only) --- pytpb/pytpb.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py index cf44974..0bc50cf 100755 --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -151,6 +151,10 @@ def prettySize(size): continue else: return round(size/float(lim/2**10),2).__str__()+suf + + def magnet_to_torrent(uri): + return "d10:magnet-uri" + str(len(uri)) + ':' + uri + 'e' + t = ThePirateBay() if sys.argv[1:]: term = ' '.join(sys.argv[1:]) @@ -160,14 +164,18 @@ def prettySize(size): torrents = t.search(term) maxlen = max(len(x['name']) for x in torrents) + 3 - torrents = sorted(torrents, key=lambda x : int(x['seeders'])) - for i, t in enumerate(t.search(term)): + + for i, t in enumerate(torrents): print '{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, maxlen=maxlen, name=t['name'], size='(' + prettySize(t['size_of']) + ')', seeders=t['seeders']) for num in raw_input("Please provide a comma separated list of torrents you want to get: ").split(','): try: t = torrents[int(num)] print num, ':', t['magnet_url'] + outname = t['torrent_info_url'][t['torrent_info_url'].rfind('/') + 1:] + '.torrent' + with open(outname, 'w') as out: + out.write(magnet_to_torrent(t['magnet_url'])) + print 'Written magnet URI to file "{name}"'.format(name=outname) except Exception, e: print 'Something went wrong:', e sys.exit() From d731c9ee555cd7a487e31148f4b21908da6953b0 Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Tue, 10 Jul 2012 01:03:48 +0200 Subject: [PATCH 4/6] fixed some PEP8 violations and added some error checking --- pytpb/pytpb.py | 82 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py index 0bc50cf..92c05e3 100755 --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -5,12 +5,12 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, @@ -24,10 +24,11 @@ import lxml.html + class SearchResultParser: def __init__(self, html): self.doc = lxml.html.parse(html).getroot() - + def parse(self): row_data = [] try: @@ -39,10 +40,10 @@ def parse(self): except: pass return row_data - + def parse_row_columns(self, columns): """Parse the columns of a table row. - + *Returns* a dictionary with parsed data. """ @@ -63,21 +64,22 @@ def parse_row_columns(self, columns): data["user_type"] = "VIP" elif ele[0].get('title') == "Trusted": data["user_type"] = "trusted" - + elif ele.tag == 'font': a = ele.find('a') if a is None: data['user'] = "Anonymous" else: data['user'] = urljoin(ele.base, a.get('href')) - data["uploaded_at"], data["size_of"] = self.process_datetime_string(ele.text_content()) + data["uploaded_at"], data["size_of"] = \ + self.process_datetime_string(ele.text_content()) data['seeders'] = int(columns[1].text_content().strip()) data['leechers'] = int(columns[2].text_content().strip()) return data def process_datetime_string(self, string): """Process the datetime string from a torrent upload. - + *Returns* Tuple with (datetime, (size, unit)) """ @@ -91,7 +93,7 @@ def process_datetime(part): d = datetime.datetime.now() return d.replace( hour=int(h), minute=int(m), - day=d.day-1 + day=d.day - 1 ) elif part.endswith("ago"): amount, unit = part.split()[:2] @@ -105,14 +107,17 @@ def process_datetime(part): current_date, current_time = part.split() h, m = current_time.split(':') month, day = current_date.split('-') - d = d.replace(hour=int(h), minute=int(m), month=int(month), day=int(day)) + d = d.replace(hour=int(h), minute=int(m), + month=int(month), day=int(day)) else: current_date, year = part.split() month, day = current_date.split('-') - d = d.replace(year=int(year), month=int(month), day=int(day)) + d = d.replace(year=int(year), month=int(month), + day=int(day)) return d + def process_size(part): - units = {'MiB':1048576, 'GiB': 1073741824} + units = {'MiB': 1048576, 'GiB': 1073741824} size, unit = part.split()[1:] size = float(size) * units[unit] return int(size) @@ -121,23 +126,23 @@ def process_size(part): date = process_datetime(' '.join(results[0].split()[1:])) size = process_size(results[1]) return (date, size) - + class ThePirateBay: """Api for the Pirate Bay""" name = 'The Pirate Bay' - + searchUrl = 'https://thepiratebay.org/search/%s/0/7/%d' - + def __init__(self): pass - + def search(self, term, cat=None): if not cat: cat = 0 url = self.searchUrl % (quote_plus(term), cat) - + req = urllib2.Request(url) html = urllib2.urlopen(req) parser = SearchResultParser(html) @@ -145,12 +150,13 @@ def search(self, term, cat=None): if __name__ == '__main__': def prettySize(size): - suffixes = [("B",2**10), ("K",2**20), ("M",2**30), ("G",2**40), ("T",2**50)] + suffixes = [("B", 2 ** 10), ("K", 2 ** 20), ("M", 2 ** 30), + ("G", 2 ** 40), ("T", 2 ** 50)] for suf, lim in suffixes: if size > lim: continue else: - return round(size/float(lim/2**10),2).__str__()+suf + return round(size / float(lim / 2 ** 10), 2).__str__() + suf def magnet_to_torrent(uri): return "d10:magnet-uri" + str(len(uri)) + ':' + uri + 'e' @@ -161,21 +167,33 @@ def magnet_to_torrent(uri): else: term = 'the walking dead' print 'Searching for "{0}"'.format(term) - + torrents = t.search(term) + if not torrents: + print u'No torrents found!' + sys.exit(1) maxlen = max(len(x['name']) for x in torrents) + 3 for i, t in enumerate(torrents): - print '{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, maxlen=maxlen, name=t['name'], size='(' + prettySize(t['size_of']) + ')', seeders=t['seeders']) + print u'{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, + maxlen=maxlen, name=t['name'], size='(' + + prettySize(t['size_of']) + ')', seeders=t['seeders']) - for num in raw_input("Please provide a comma separated list of torrents you want to get: ").split(','): - try: - t = torrents[int(num)] - print num, ':', t['magnet_url'] - outname = t['torrent_info_url'][t['torrent_info_url'].rfind('/') + 1:] + '.torrent' - with open(outname, 'w') as out: - out.write(magnet_to_torrent(t['magnet_url'])) - print 'Written magnet URI to file "{name}"'.format(name=outname) - except Exception, e: - print 'Something went wrong:', e - sys.exit() + try: + for num in raw_input(("Please provide a comma separated list of " + "torrents you want to get: ")).split(','): + try: + t = torrents[int(num)] + print num, ':', t['magnet_url'] + outname = t['torrent_info_url'][t['torrent_info_url'].rfind( + '/') + 1:] + '.torrent' + with open(outname, 'w') as out: + out.write(magnet_to_torrent(t['magnet_url'])) + print 'Written magnet URI to file "{name}"'.format( + name=outname) + except Exception, e: + print 'Something went wrong:', e + sys.exit() + except KeyboardInterrupt: + print u'\nExitting' + sys.exit(-1) From 16a995c246e9a0f0bff880568895358da15f6953 Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Sat, 8 Dec 2012 15:03:39 +0100 Subject: [PATCH 5/6] Add 'Uploaded at' line, and date --- pytpb/pytpb.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py index 92c05e3..f1afa1d 100755 --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -64,7 +64,6 @@ def parse_row_columns(self, columns): data["user_type"] = "VIP" elif ele[0].get('title') == "Trusted": data["user_type"] = "trusted" - elif ele.tag == 'font': a = ele.find('a') if a is None: @@ -174,9 +173,12 @@ def magnet_to_torrent(uri): sys.exit(1) maxlen = max(len(x['name']) for x in torrents) + 3 + print u'{i:>2s}. {uploaded_at:17s} {name:>{maxlen}} {size:10} : {seeders}'.format( + i='#', uploaded_at='Uploaded at', maxlen=maxlen, name='Name', size='Size', + seeders='Seeders') for i, t in enumerate(torrents): - print u'{i:2d}. {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, - maxlen=maxlen, name=t['name'], size='(' + + print u'{i:2d}. {uploaded_at} {name:>{maxlen}} {size:10} : {seeders}'.format(i=i, + uploaded_at=t['uploaded_at'].strftime('%x %X'), maxlen=maxlen, name=t['name'], size='(' + prettySize(t['size_of']) + ')', seeders=t['seeders']) try: From 526f24934dd37b77f14874346f6adde1b7af73f8 Mon Sep 17 00:00:00 2001 From: Peter Wagenaar Date: Sat, 8 Dec 2012 15:03:49 +0100 Subject: [PATCH 6/6] Save to .magnet instead of .torrent file --- pytpb/pytpb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytpb/pytpb.py b/pytpb/pytpb.py index f1afa1d..d50653c 100755 --- a/pytpb/pytpb.py +++ b/pytpb/pytpb.py @@ -188,7 +188,7 @@ def magnet_to_torrent(uri): t = torrents[int(num)] print num, ':', t['magnet_url'] outname = t['torrent_info_url'][t['torrent_info_url'].rfind( - '/') + 1:] + '.torrent' + '/') + 1:] + '.magnet' with open(outname, 'w') as out: out.write(magnet_to_torrent(t['magnet_url'])) print 'Written magnet URI to file "{name}"'.format(