-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_handler.py
More file actions
151 lines (125 loc) · 3.94 KB
/
data_handler.py
File metadata and controls
151 lines (125 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import sys
import os
import pandas as pd
LABELS = {
"folder_name" : "labels",
"type_name" : "labeled_sleep",
"separator" : " "
}
HEART_RATES = {
"folder_name" : "heart_rate",
"type_name" : "heartrate",
"separator" : ","
}
MOTIONS = {
"folder_name" : "motion",
"type_name" : "acceleration",
"separator" : " "
}
STEPS = {
"folder_name" : "steps",
"type_name" : "steps",
"separator" : ","
}
DATA_FOLDER = sys.path[0] + "/data"
INNER_FOLDER_STRUCTURE = "{}/{}" #.format(DATA_FOLDER, NAMINGS['folder_name']])
PREPARED_DATA_FOLDER = sys.path[0] + "/prepared_data"
PATH_STRUCTURE = "{}/{}/{}_{}.txt" #.format(DATA_FOLDER, NAMINGS['folder_name'], idx, NAMINGS['type_name'])
def get_data_type(type):
"""Matches const dict for the given feature set
Parameters
----------
type : str
feature set type (motions/steps/labels/heart_rates)
Returns
-------
const dict
const dict of given feature set
Raises
------
KeyError
If the given type is not found
"""
if type == "motions":
return MOTIONS
elif type == "steps":
return STEPS
elif type == "labels":
return LABELS
elif type == "heart_rates":
return HEART_RATES
else:
raise KeyError("{} feature set type not found in [motions, steps, labels, heart_rates]".format(type))
def check_dir(path):
"""Checks if a directory exists and if not creates it
Parameters
----------
path : str
path to directory
"""
found = os.path.isdir(path)
if not found:
os.makedirs(path)
return found
def save_data_frame(type_const, df, id):
try:
data_type = get_data_type(type_const)
except KeyError as e:
print(e)
return None
check_dir(PREPARED_DATA_FOLDER)
file_w_path = PATH_STRUCTURE.format(PREPARED_DATA_FOLDER, data_type["folder_name"], id, data_type["type_name"])
check_dir(INNER_FOLDER_STRUCTURE.format(PREPARED_DATA_FOLDER, data_type["folder_name"]))
df.to_csv(file_w_path, sep=data_type["separator"], header=False)
def get_subject_ids():
"""Defines all subject ids
Returns
-------
list of ints
subject indices
"""
directory = DATA_FOLDER + "/" + LABELS["folder_name"]
ids = []
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
ids.append(int(f.split('/')[-1].split('_')[0]))
return ids
def read_single_file(type_const, idx, use_prepared):
if not use_prepared:
file_w_path = PATH_STRUCTURE.format(DATA_FOLDER, type_const["folder_name"], idx, type_const["type_name"])
else:
file_w_path = PATH_STRUCTURE.format(PREPARED_DATA_FOLDER, type_const["folder_name"], idx, type_const["type_name"])
return pd.read_csv(file_w_path, sep=type_const["separator"], header=None).astype("float")
def read_files(type, idx=None, use_prepared=False):
"""Reads all files of the given feature set
Parameters
----------
type : str
feature set type (motions/steps/labels/heart_rates)
idx : int, optional
subject index (if we want to read only a given feature set), if None all subject features will be loaded, by default None
use_prepared : bool, optional
use True if you want to use the already prepared data, by default False
Returns
-------
dict
Loaded data with its subject ids as keys
Raises
------
KeyError
If the given type is not found
"""
ret_dict = {}
subject_ids = get_subject_ids()
try:
data_type = get_data_type(type)
except KeyError as e:
print(e)
return None
if idx is not None:
return read_single_file(data_type, idx, use_prepared)
for idx in subject_ids:
ret_dict[idx] = read_single_file(data_type, idx, use_prepared)
return ret_dict