-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoc.py
More file actions
23 lines (17 loc) · 679 Bytes
/
oc.py
File metadata and controls
23 lines (17 loc) · 679 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import cv2
import pytesseract
from PIL import Image
# Path to the Tesseract executable in Google Colab
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
# Load the image from the specified path
image_path = 'math.jpg'
image = cv2.imread(image_path)
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to preprocess the image
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
# Perform OCR on the preprocessed image
custom_config = r'--oem 3' # Customize configuration as needed
text = pytesseract.image_to_string(thresh, config=custom_config)
# Print the extracted text
print("Extracted text:", text)