-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
71 lines (53 loc) · 2.39 KB
/
cli.py
File metadata and controls
71 lines (53 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python3
"""
cli.py — batch-process a folder of documents from the command line.
Usage:
python cli.py --input ./samples --output ./output
python cli.py --input ./samples --output ./output --lang german
"""
from __future__ import annotations
import argparse
import json
import sys
import time
from pathlib import Path
from app.detector import StampDetector
SUPPORTED = {".pdf", ".jpg", ".jpeg", ".png", ".tiff", ".tif", ".bmp"}
def main() -> None:
parser = argparse.ArgumentParser(description="Batch stamp detection")
parser.add_argument("--input", "-i", required=True, help="Input folder with PDFs / images")
parser.add_argument("--output", "-o", default="./output", help="Output folder for JSON results")
parser.add_argument("--lang", "-l", default="german", help="OCR language (default: german)")
args = parser.parse_args()
input_dir = Path(args.input)
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
files = [f for f in sorted(input_dir.iterdir()) if f.suffix.lower() in SUPPORTED]
if not files:
print(f"No supported files found in {input_dir}", file=sys.stderr)
sys.exit(1)
print(f"Found {len(files)} file(s). Loading model …")
detector = StampDetector(lang=args.lang)
print("Model ready. Processing …\n")
summary = []
for path in files:
t0 = time.perf_counter()
data = path.read_bytes()
ext = path.suffix.lower()
if ext == ".pdf":
result = detector.process_pdf_bytes(data, path.name)
else:
result = detector.process_image_bytes(data, path.name)
elapsed = time.perf_counter() - t0
stamp_count = result.get("total_stamps", result.get("stamp_count", 0))
out_file = output_dir / (path.stem + ".json")
out_file.write_text(json.dumps(result, ensure_ascii=False, indent=2))
line = f" {path.name:<40} stamps: {stamp_count:>3} ({elapsed:.2f}s) → {out_file.name}"
print(line)
summary.append({"file": path.name, "stamps": stamp_count, "elapsed_s": round(elapsed, 2)})
summary_file = output_dir / "_summary.json"
summary_file.write_text(json.dumps(summary, ensure_ascii=False, indent=2))
total = sum(s["stamps"] for s in summary)
print(f"\nDone. {len(files)} file(s), {total} stamp(s) total. Summary → {summary_file}")
if __name__ == "__main__":
main()