From 0df1e93939b100e13ce87c23b6b40c6d80aec395 Mon Sep 17 00:00:00 2001 From: Willian Rocha Date: Mon, 10 Jul 2017 13:50:22 -0300 Subject: [PATCH 1/3] atualizado requirements, modificado sintaxe de imports e adaptado codigo para python3 --- packtrack/__init__.py | 6 +++--- packtrack/correios.py | 6 +++--- packtrack/scraping.py | 30 ++++++++++++++---------------- requirements.txt | 9 +++------ tests/correios_api_test.py | 6 +++--- tests/correios_test.py | 2 +- tests/scraping_test.py | 3 ++- 7 files changed, 29 insertions(+), 33 deletions(-) diff --git a/packtrack/__init__.py b/packtrack/__init__.py index 80d4852..a635c9e 100755 --- a/packtrack/__init__.py +++ b/packtrack/__init__.py @@ -1,6 +1,6 @@ -from correios import EncomendaRepository -from royal import RoyalMail -from dhl_gm import DhlGmTracker +from packtrack.correios import EncomendaRepository +from packtrack.royal import RoyalMail +from packtrack.dhl_gm import DhlGmTracker class Correios(object): diff --git a/packtrack/correios.py b/packtrack/correios.py index 4514751..5fcf0ba 100644 --- a/packtrack/correios.py +++ b/packtrack/correios.py @@ -15,7 +15,7 @@ def get(self, numero, auth=None): return func(numero, **kwargs) def _init_scraper(self, backend): - from scraping import CorreiosWebsiteScraper, CorreiosRastroService + from packtrack.scraping import CorreiosWebsiteScraper, CorreiosRastroService if backend is None: backend = 'www2' @@ -36,8 +36,8 @@ def __init__(self, numero): def adicionar_status(self, status): d = datetime self.status.append(status) - t_format = self.validar_data(status.data) - self.status.sort(lambda x, y: 1 if d.strptime(x.data, t_format) > d.strptime(y.data, t_format) else -1) + self.status.sort( + key=lambda x: d.strptime(x.data, self.validar_data(x.data))) def validar_data(self, data): if re.match('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$', data): diff --git a/packtrack/scraping.py b/packtrack/scraping.py index cf1952d..1576f43 100644 --- a/packtrack/scraping.py +++ b/packtrack/scraping.py @@ -1,15 +1,14 @@ import os import re -from HTMLParser import HTMLParser -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup import requests from requests.exceptions import RequestException from zeep import Client as Zeep from zeep.cache import InMemoryCache from zeep.transports import Transport -from correios import Encomenda, Status +from packtrack.correios import Encomenda, Status class CorreiosWebsiteScraper(object): @@ -50,7 +49,7 @@ def get_encomenda_info(self, numero): if html: try: - html = html.decode('latin-1') + html = html.encode('latin-1') except UnicodeDecodeError: pass encomenda = Encomenda(numero) @@ -59,20 +58,19 @@ def get_encomenda_info(self, numero): return encomenda def _text(self, value): - value = BeautifulSoup(value.strip()).text - return value.replace(' ', ' ') + value = BeautifulSoup(value.strip(), 'lxml').text + return value.replace(' ', ' ').replace('\xa0',' ') def _get_all_status_from_html(self, html): status = [] - html_parser = HTMLParser() - if ").*', html, re.S) + html_info = re.search(b'.*().*', html, re.S) if not html_info: return status table = html_info.group(1) - soup = BeautifulSoup(table) + soup = BeautifulSoup(table, 'lxml') for tr in soup.table: try: @@ -80,15 +78,15 @@ def _get_all_status_from_html(self, html): except AttributeError: continue for td in tds: - content = td.renderContents().replace('\r', ' ') \ - .split('
') - class_ = td['class'] + content = td.renderContents().replace(b'\r', b' ') \ + .split(b'
') + class_ = td['class'][0] if class_ == 'sroDtEvent': - data = '%s %s' % (content[0].strip(), content[1].strip()) + data = '%s %s' % (content[0].strip().decode(), content[1].strip().decode()) local = '/'.join(self._text(content[2]).rsplit(' / ', 1)).upper() elif class_ == 'sroLbEvent': - situacao = html_parser.unescape(self._text(content[0])) - detalhes = html_parser.unescape(self._text(content[1])) + situacao = self._text(content[0].decode()) + detalhes = self._text(content[1].decode()) if detalhes: detalhes = u'%s %s' % (situacao, detalhes) status.append(Status(data=data, local=local, diff --git a/requirements.txt b/requirements.txt index 5e0d12c..8eb349e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ -BeautifulSoup==3.2.1 -argparse==1.2.1 -beautifulsoup4==4.3.2 -lxml==2.3.5 -mockito==0.5.1 +beautifulsoup4==4.6.0 +lxml==3.8.0 +mockito==1.0.12 requests==2.18.1 -wsgiref==0.1.2 zeep==1.6.0 diff --git a/tests/correios_api_test.py b/tests/correios_api_test.py index 9f68a14..5e0dd32 100644 --- a/tests/correios_api_test.py +++ b/tests/correios_api_test.py @@ -1,6 +1,6 @@ import unittest -from mockito import when, Mock +from mockito import when, mock from packtrack import Correios @@ -8,7 +8,7 @@ class CorreiosTest(unittest.TestCase): def test_should_use_repository_to_get_encomenda(self): - encomenda_repository_mock = Mock() + encomenda_repository_mock = mock() when(encomenda_repository_mock).get('123', auth=None) \ .thenReturn('encomenda123') @@ -18,7 +18,7 @@ def test_should_use_repository_to_get_encomenda(self): def test_service_should_receive_auth(self): auth = ('mi', 'mimi') - encomenda_repository_mock = Mock() + encomenda_repository_mock = mock() when(encomenda_repository_mock).get('123', auth=auth) \ .thenReturn('encomenda123') diff --git a/tests/correios_test.py b/tests/correios_test.py index ed27249..2127679 100644 --- a/tests/correios_test.py +++ b/tests/correios_test.py @@ -9,7 +9,7 @@ class EncomendaRepositoryTest(unittest.TestCase): def test_should_get_encomenda_by_numero(self): encomenda_123 = Status(data='2009-01-28 17:49:00') - correios_website_scraper_mock = Mock() + correios_website_scraper_mock = mock() when(correios_website_scraper_mock).get_encomenda_info('123', auth=None).thenReturn(encomenda_123) repository = EncomendaRepository() diff --git a/tests/scraping_test.py b/tests/scraping_test.py index 547c605..af37032 100644 --- a/tests/scraping_test.py +++ b/tests/scraping_test.py @@ -17,7 +17,8 @@ def _assert_status(self, status, data, local, situacao, detalhes): self.assertEqual(detalhes, status.detalhes) def test_should_get_data_from_correios_website(self): - example_file = open('%s/tests/correios_website/exemplo_rastreamento_correios1.html' % os.getcwd()) + example_file = open('%s/tests/correios_website/exemplo_rastreamento_correios1.html' % os.getcwd(), + encoding='iso-8859-1') sample_html = example_file.read() example_file.close() From 366a927340e9e4be0ab51d849c18e5c93083fdfc Mon Sep 17 00:00:00 2001 From: Willian Rocha Date: Mon, 10 Jul 2017 20:00:28 -0300 Subject: [PATCH 2/3] modificado travis ci para o python3 --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8e1e41c..d55f6ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,8 @@ language: python +python: + - "3.6" + before_install: pip install -r requirements.txt -script: nosetests \ No newline at end of file +script: nosetests From a7fa1b307b9d92ba53cbcd0fc5dee26b52aebe07 Mon Sep 17 00:00:00 2001 From: Willian Rocha Date: Wed, 12 Jul 2017 00:37:17 -0300 Subject: [PATCH 3/3] modificados imports para v2 e v3 removido encode latin-1 desnecessario adicionado ao travis python 2.7 --- .travis.yml | 1 + packtrack/__init__.py | 8 +++++--- packtrack/correios.py | 4 +++- packtrack/scraping.py | 18 ++++++++---------- tests/scraping_test.py | 4 ++-- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index d55f6ce..4de4a34 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python python: + - "2.7" - "3.6" before_install: pip install -r requirements.txt diff --git a/packtrack/__init__.py b/packtrack/__init__.py index a635c9e..1178f57 100755 --- a/packtrack/__init__.py +++ b/packtrack/__init__.py @@ -1,6 +1,8 @@ -from packtrack.correios import EncomendaRepository -from packtrack.royal import RoyalMail -from packtrack.dhl_gm import DhlGmTracker +from __future__ import absolute_import + +from .correios import EncomendaRepository +from .royal import RoyalMail +from .dhl_gm import DhlGmTracker class Correios(object): diff --git a/packtrack/correios.py b/packtrack/correios.py index 5fcf0ba..733d71e 100644 --- a/packtrack/correios.py +++ b/packtrack/correios.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import absolute_import + from datetime import datetime import re @@ -15,7 +17,7 @@ def get(self, numero, auth=None): return func(numero, **kwargs) def _init_scraper(self, backend): - from packtrack.scraping import CorreiosWebsiteScraper, CorreiosRastroService + from .scraping import CorreiosWebsiteScraper, CorreiosRastroService if backend is None: backend = 'www2' diff --git a/packtrack/scraping.py b/packtrack/scraping.py index 1576f43..445ef70 100644 --- a/packtrack/scraping.py +++ b/packtrack/scraping.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import + import os import re @@ -8,7 +10,7 @@ from zeep.cache import InMemoryCache from zeep.transports import Transport -from packtrack.correios import Encomenda, Status +from .correios import Encomenda, Status class CorreiosWebsiteScraper(object): @@ -48,10 +50,6 @@ def get_encomenda_info(self, numero): html = response.content if html: - try: - html = html.encode('latin-1') - except UnicodeDecodeError: - pass encomenda = Encomenda(numero) for status in self._get_all_status_from_html(html): encomenda.adicionar_status(status) @@ -59,13 +57,13 @@ def get_encomenda_info(self, numero): def _text(self, value): value = BeautifulSoup(value.strip(), 'lxml').text - return value.replace(' ', ' ').replace('\xa0',' ') + return value.replace(' ', ' ').replace(u'\xa0',' ') def _get_all_status_from_html(self, html): status = [] - if b').*', html, re.S) + html_info = re.search('.*().*', html, re.S) if not html_info: return status @@ -85,8 +83,8 @@ def _get_all_status_from_html(self, html): data = '%s %s' % (content[0].strip().decode(), content[1].strip().decode()) local = '/'.join(self._text(content[2]).rsplit(' / ', 1)).upper() elif class_ == 'sroLbEvent': - situacao = self._text(content[0].decode()) - detalhes = self._text(content[1].decode()) + situacao = self._text(content[0].decode('utf-8')) + detalhes = self._text(content[1].decode('utf-8')) if detalhes: detalhes = u'%s %s' % (situacao, detalhes) status.append(Status(data=data, local=local, diff --git a/tests/scraping_test.py b/tests/scraping_test.py index af37032..5d4faf9 100644 --- a/tests/scraping_test.py +++ b/tests/scraping_test.py @@ -1,7 +1,7 @@ # encoding: UTF-8 import os import unittest - +import io import mock from packtrack.scraping import CorreiosWebsiteScraper @@ -17,7 +17,7 @@ def _assert_status(self, status, data, local, situacao, detalhes): self.assertEqual(detalhes, status.detalhes) def test_should_get_data_from_correios_website(self): - example_file = open('%s/tests/correios_website/exemplo_rastreamento_correios1.html' % os.getcwd(), + example_file = io.open('%s/tests/correios_website/exemplo_rastreamento_correios1.html' % os.getcwd(), encoding='iso-8859-1') sample_html = example_file.read() example_file.close()