PythonProjects2021/wikipediascraper.py at main · DylanJHansen/PythonProjects2021 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#Importing Packages
import csv

from bs4 import BeautifulSoup
import requests


# Empty array for Data
rows =[]


#open the website
url = "https://en.wikipedia.org/wiki/Category:Women_computer_scientists"
page = requests.get(url)
paghe_content = page.count


#parse the page with the Beautiful Soup Library
soup = BeautifulSoup(page_content, "html.parser")
content = soup.find("div", class_="mw-category")
all_groupings = content.find_all("div", class_="mw-category-group")
for grouping in all_groupings:
name_list = grouping.find("ul")
category = grouping.find("h3"),get_text()
alphabetical_names = names_list.find_all("li")

for alphabetical_name in alphabetical_names:
#get the name
name = alphabetical_name.find("a",href = True)
link anchortag["href"]
#get the letter
letter_name = category
#Make a data dictonary that will be written into the CSV
row = {"name": name,
       "link": link,
       "letter_name": letter_name}
row.append(row)