-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwikipediascraper.py
More file actions
37 lines (29 loc) · 984 Bytes
/
wikipediascraper.py
File metadata and controls
37 lines (29 loc) · 984 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#Importing Packages
import csv
from bs4 import BeautifulSoup
import requests
# Empty array for Data
rows =[]
#open the website
url = "https://en.wikipedia.org/wiki/Category:Women_computer_scientists"
page = requests.get(url)
paghe_content = page.count
#parse the page with the Beautiful Soup Library
soup = BeautifulSoup(page_content, "html.parser")
content = soup.find("div", class_="mw-category")
all_groupings = content.find_all("div", class_="mw-category-group")
for grouping in all_groupings:
name_list = grouping.find("ul")
category = grouping.find("h3"),get_text()
alphabetical_names = names_list.find_all("li")
for alphabetical_name in alphabetical_names:
#get the name
name = alphabetical_name.find("a",href = True)
link anchortag["href"]
#get the letter
letter_name = category
#Make a data dictonary that will be written into the CSV
row = {"name": name,
"link": link,
"letter_name": letter_name}
row.append(row)