-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredict.py
More file actions
71 lines (56 loc) · 2.39 KB
/
predict.py
File metadata and controls
71 lines (56 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import json
import os
from headbytes import HeadBytes
from extpredict import FileReader, SystemReader
from randbytes import RandBytes
from randhead import RandHead
def predict_single_file(filename, trained_classifier, feature, head_bytes=512, rand_bytes=512):
"""Predicts the type of file.
filename (str): Name of file to predict the type of.
trained_classifier: (sklearn model): Trained model.
feature (str): Type of feature that trained_classifier was trained on.
"""
with open('CLASS_TABLE.json', 'r') as f:
label_map = json.load(f)
f.close()
if feature == "head":
features = HeadBytes(head_size=head_bytes)
elif feature == "randhead":
features = RandHead(head_size=head_bytes, rand_size=rand_bytes)
elif feature == "rand":
features = RandBytes(number_bytes=rand_bytes)
else:
raise Exception("Not a valid feature set. ")
reader = FileReader(feature_maker=features, filename=filename)
reader.run()
data = [line for line in reader.data][2]
x = np.array([int.from_bytes(c, byteorder="big") for c in data])
x = [x]
prediction = trained_classifier.predict(x)
label = (list(label_map.keys())[list(label_map.values()).index(int(prediction[0]))])
return label
def predict_directory(dir_name, trained_classifier, feature, head_bytes=512, rand_bytes=512):
file_predictions = {}
with open('CLASS_TABLE.json', 'r') as f:
label_map = json.load(f)
f.close()
if feature == "head":
features = HeadBytes(head_size=head_bytes)
elif feature == "randhead":
features = RandHead(head_size=head_bytes,
rand_size=rand_bytes)
elif feature == "rand":
features = RandBytes(number_bytes=rand_bytes)
else:
raise Exception("Not a valid feature set. ")
reader = SystemReader(feature_maker=features, top_dir=dir_name)
reader.run()
for file_data in reader.data:
data = [line for line in file_data][2]
x = np.array([int.from_bytes(c, byteorder="big") for c in data])
x = [x]
prediction = trained_classifier.predict(x)
label = (list(label_map.keys())[list(label_map.values()).index(int(prediction[0]))])
file_predictions[os.path.join(file_data[0], file_data[1])] = label
return file_predictions