-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCombineScript.py
More file actions
136 lines (102 loc) · 5.72 KB
/
CombineScript.py
File metadata and controls
136 lines (102 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#
# ES335- Machine Learning- Assignment 1
#
# This script combines the data from the UCI HAR Dataset into a more usable format.
# The data is combined into a single csv file for each subject and activity.
# The data is then stored in the Combined folder.
#
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Library imports
import pandas as pd
import numpy as np
import os
# Give the path of the test and train folder of UCI HAR Dataset
train_path = "./UCI HAR Dataset/train"
test_path = "./UCI HAR Dataset/test"
# Dictionary of activities. Provided by the dataset.
ACTIVITIES = {
1: 'WALKING' ,
2: 'WALKING_UPSTAIRS' ,
3: 'WALKING_DOWNSTAIRS' ,
4: 'SITTING' ,
5: 'STANDING' ,
6: 'LAYING' ,
}
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Combining Traing Data
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Load all the accelerometer data
total_acc_x = pd.read_csv(os.path.join(train_path,"Inertial Signals","total_acc_x_train.txt"),delim_whitespace=True,header=None)
total_acc_y = pd.read_csv(os.path.join(train_path,"Inertial Signals","total_acc_y_train.txt"),delim_whitespace=True,header=None)
total_acc_z = pd.read_csv(os.path.join(train_path,"Inertial Signals","total_acc_z_train.txt"),delim_whitespace=True,header=None)
# Read the subject IDs
subject_train = pd.read_csv(os.path.join(train_path,"subject_train.txt"),delim_whitespace=True,header=None)
# Read the labels
y = pd.read_csv(os.path.join(train_path,"y_train.txt"),delim_whitespace=True,header=None)
# Toggle through all the subjects.
for subject in np.unique(subject_train.values):
sub_idxs = np.where( subject_train.iloc[:,0] == subject )[0]
labels = y.loc[sub_idxs]
# Toggle through all the labels.
for label in np.unique(labels.values):
# make the folder directory if it does not exist
if not os.path.exists(os.path.join("Combined","Train",ACTIVITIES[label])):
os.makedirs(os.path.join("Combined","Train",ACTIVITIES[label]))
label_idxs = labels[labels.iloc[:,0] == label].index
accx = []
accy = []
accz = []
for idx in label_idxs:
if accx is not None:
accx = np.hstack((accx,total_acc_x.loc[idx][64:]))
accy = np.hstack((accy,total_acc_y.loc[idx][64:]))
accz = np.hstack((accz,total_acc_z.loc[idx][64:]))
else:
accx = total_acc_x.loc[idx]
accy = total_acc_y.loc[idx]
accz = total_acc_z.loc[idx]
# saving the data into csv file
data = pd.DataFrame({'accx':accx,'accy':accy,'accz':accz})
save_path = os.path.join("Combined","Train",ACTIVITIES[label],f"Subject_{subject}.csv")
data.to_csv(save_path,index=False)
print("Done Combining the training data")
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Combining Test Data
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Load all the accelerometer data
total_acc_x = pd.read_csv(os.path.join(test_path,"Inertial Signals","total_acc_x_test.txt"),delim_whitespace=True,header=None)
total_acc_y = pd.read_csv(os.path.join(test_path,"Inertial Signals","total_acc_y_test.txt"),delim_whitespace=True,header=None)
total_acc_z = pd.read_csv(os.path.join(test_path,"Inertial Signals","total_acc_z_test.txt"),delim_whitespace=True,header=None)
# Read the subject IDs
subject_test = pd.read_csv(os.path.join(test_path,"subject_test.txt"),delim_whitespace=True,header=None)
# Read the labels
y = pd.read_csv(os.path.join(test_path,"y_test.txt"),delim_whitespace=True,header=None)
# Toggle through all the subjects.
for subject in np.unique(subject_test.values):
sub_idxs = np.where( subject_test.iloc[:,0] == subject )[0]
labels = y.loc[sub_idxs]
# Toggle through all the labels.
for label in np.unique(labels.values):
if not os.path.exists(os.path.join("Combined","Test",ACTIVITIES[label])):
os.makedirs(os.path.join("Combined","Test",ACTIVITIES[label]))
label_idxs = labels[labels.iloc[:,0] == label].index
accx = []
accy = []
accz = []
for idx in label_idxs:
if accx is not None:
accx = np.hstack((accx,total_acc_x.loc[idx][64:]))
accy = np.hstack((accy,total_acc_y.loc[idx][64:]))
accz = np.hstack((accz,total_acc_z.loc[idx][64:]))
else:
accx = total_acc_x.loc[idx]
accy = total_acc_y.loc[idx]
accz = total_acc_z.loc[idx]
# saving the data into csv file
data = pd.DataFrame({'accx':accx,'accy':accy,'accz':accz})
save_path = os.path.join("Combined","Test",ACTIVITIES[label],f"Subject_{subject}.csv")
data.to_csv(save_path,index=False)
print("Done Combining the testing data")
print("Done Combining the data")
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=