-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_pdf.py
More file actions
28 lines (24 loc) · 788 Bytes
/
extract_pdf.py
File metadata and controls
28 lines (24 loc) · 788 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pypdf
import json
def extract_pdf_text(pdf_path):
try:
reader = pypdf.PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
except Exception as e:
return f"Error: {str(e)}"
# Extract text from both PDFs
pdf1_path = "Assignment for Associate Product Manager/6814842c327d0_Assessment_for_APM_Role.pdf"
pdf2_path = "Assignment for Associate Product Manager/assignment task UnStop.pdf"
print("=" * 80)
print("PDF 1: 6814842c327d0_Assessment_for_APM_Role.pdf")
print("=" * 80)
pdf1_text = extract_pdf_text(pdf1_path)
print(pdf1_text)
print("\n" + "=" * 80)
print("PDF 2: assignment task UnStop.pdf")
print("=" * 80)
pdf2_text = extract_pdf_text(pdf2_path)
print(pdf2_text)