forked from pari08tosh/Inshorts-API
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinshorts.py
More file actions
89 lines (72 loc) · 2.38 KB
/
inshorts.py
File metadata and controls
89 lines (72 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# pylint: disable=C0103, C0111, R0914
'''
Make the request to the inshorts url according to category with requests module.
Parse using beautiful soup and lxml to form the newsDictionary.
'''
import requests
from bs4 import BeautifulSoup
def getNews(category):
newsDictionary = {
'success': True,
'category': category,
'data': []
}
try:
htmlBody = requests.get('https://www.inshorts.com/en/read/' + category)
except requests.exceptions.RequestException as e:
newsDictionary['success'] = False
newsDictionary['errorMessage'] = str(e.message)
return newsDictionary
soup = BeautifulSoup(htmlBody.text, 'lxml')
newsCards = soup.find_all(class_='news-card')
if not newsCards:
newsDictionary['success'] = False
newsDictionary['errorMessage'] = 'Invalid Category'
return newsDictionary
for card in newsCards:
try:
title = card.find(class_='news-card-title').find('a').text
except AttributeError:
title = None
try:
imageUrl = card.find(
class_='news-card-image')['style'].split("'")[1]
except AttributeError:
imageUrl = None
try:
url = ('https://www.inshorts.com' + card.find(class_='news-card-title')
.find('a').get('href'))
except AttributeError:
url = None
try:
content = card.find(class_='news-card-content').find('div').text
except AttributeError:
content = None
try:
author = card.find(class_='author').text
except AttributeError:
author = None
try:
date = card.find(clas='date').text
except AttributeError:
date = None
try:
time = card.find(class_='time').text
except AttributeError:
time = None
try:
readMoreUrl = card.find(class_='read-more').find('a').get('href')
except AttributeError:
readMoreUrl = None
newsObject = {
'title': title,
'imageUrl': imageUrl,
'url': url,
'content': content,
'author': author,
'date': date,
'time': time,
'readMoreUrl': readMoreUrl
}
newsDictionary['data'].append(newsObject)
return newsDictionary