This repository was archived by the owner on Feb 15, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbooks.py
More file actions
58 lines (45 loc) · 2.05 KB
/
books.py
File metadata and controls
58 lines (45 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
from bs4 import BeautifulSoup
import csv
BASE_URL = "https://books.toscrape.com/"
def fetch_all_category_urls():
response = requests.get(BASE_URL)
soup = BeautifulSoup(response.content, 'html.parser')
return [(category.a.text.strip(), BASE_URL + category.a['href']) for category in soup.select('.side_categories ul li')[1:]] # [1:] to exclude the "Books" category
def fetch_books_from_category(category_url):
books = []
while category_url:
response = requests.get(category_url)
soup = BeautifulSoup(response.content, 'html.parser')
for book in soup.select('article.product_pod'):
title = book.select_one('h3 a')['title']
price = book.select_one('.price_color').text
availability = book.select_one('.availability').text.strip()
rating = book.select_one('p')['class'][1]
books.append([title, price, availability, rating])
# Handle pagination
next_page = soup.select_one('.next a')
category_url = category_url.rsplit('/', 1)[0] + '/' + next_page['href'] if next_page else None
return books
def save_to_csv(data, filename):
with open(filename, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["Title", "Price", "Availability", "Rating"])
writer.writerows(data)
def main():
categories = fetch_all_category_urls()
print("Available Categories:")
for idx, (name, _) in enumerate(categories, 1):
print(f"{idx}. {name}")
choice = input("\nEnter the number of the category you want to scrape (or press Enter to scrape all): ")
all_books = []
if not choice:
for _, cat_url in categories:
all_books.extend(fetch_books_from_category(cat_url))
else:
_, category_url = categories[int(choice) - 1]
all_books = fetch_books_from_category(category_url)
save_to_csv(all_books, "books.csv")
print(f"\nScraped {len(all_books)} books and saved to books.csv")
if __name__ == "__main__":
main()