-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrandbytes.py
More file actions
67 lines (50 loc) · 1.98 KB
/
randbytes.py
File metadata and controls
67 lines (50 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from os.path import getsize
from feature import FeatureMaker
from random import randint
import numpy as np
class RandBytes(FeatureMaker):
"""Retrieves random bytes from a file."""
def __init__(self, number_bytes=512):
"""Initializes RandBytes class.
Parameters:
number_bytes (int): Number of random bytes to get.
"""
self.name = "rand"
self.nfeatures = number_bytes
self.class_table = {}
def get_feature(self, open_file):
"""Retrieves number_bytes number of random bytes from open_file.
Parameter:
open_file (file): An opened file to retrieve data from.
Return:
sample_bytes (list): A list of number_bytes number of random
bytes from open_file.
"""
size = getsize(open_file.name)
if size == 0:
return [b'' for i in range(self.nfeatures)]
else:
rand_index = [randint(0, size-1) for _ in range(self.nfeatures)]
# For files where size < nfeatures, this will oversample.
# This may be something to look out for though.
rand_index.sort()
sample_bytes = []
for index in rand_index:
open_file.seek(index)
sample_bytes.append(open_file.read(1))
return sample_bytes
def translate(self, entry):
"""Translates a feature into an integer.
Parameter:
entry (list): A list of a file path, file name, list of bytes, and a label.
Return:
(tuple): 2-tuple of a numpy array containing an integer version of
entry and a dictionary of labels and indices.
"""
x = [int.from_bytes(c, byteorder="big") for c in entry[2]]
try:
y = self.class_table[entry[-1]]
except KeyError:
self.class_table[entry[-1]] = len(self.class_table) + 1
y = self.class_table[entry[-1]]
return np.array(x), y