-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathclassify_cli.py
More file actions
72 lines (63 loc) · 2.03 KB
/
classify_cli.py
File metadata and controls
72 lines (63 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
import argparse
import json
import sys
from binary_classifiers.predict_class import PredictClass
def main():
parser = argparse.ArgumentParser(
description="Classify DNA sequences as Virus or Host"
)
parser.add_argument(
"--model",
choices=["RandomForest", "SVM"],
default="RandomForest",
help="Model to use",
)
parser.add_argument("--sequence", "-s", help="DNA sequence to classify")
parser.add_argument("--file", "-f", help="File with sequences (one per line)")
parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
args = parser.parse_args()
predictor = PredictClass(model_name=args.model)
sequences = []
if args.sequence:
sequences = [("input", args.sequence)]
elif args.file:
with open(args.file, "r") as f:
sequences = [
(f"line_{i + 1}", line.strip())
for i, line in enumerate(f)
if line.strip()
]
else:
seq = sys.stdin.read().strip()
if seq:
sequences = [("stdin", seq)]
if not sequences:
print(
"Error: Provide --sequence, --file, or pipe sequence via stdin",
file=sys.stderr,
)
sys.exit(1)
results = []
for seq_id, seq in sequences:
label, confidence = predictor.predict_with_confidence(seq)
results.append(
{
"id": seq_id,
"sequence": seq[:50] + "..." if len(seq) > 50 else seq,
"prediction": label,
"confidence": round(confidence, 4),
"length": len(seq),
}
)
if args.json:
print(json.dumps(results, indent=2))
else:
for r in results:
print(f"ID: {r['id']}")
print(f"Sequence: {r['sequence']}")
print(f"Prediction: {r['prediction']}")
print(f"Confidence: {r['confidence']}")
print("-" * 40)
if __name__ == "__main__":
main()