-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
98 lines (68 loc) · 2.57 KB
/
run_analysis.R
File metadata and controls
98 lines (68 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Coursera Getting and Cleaning Data final project
# Step 1: Merges the training and the test sets to create one data set.
# Step 2: Extracts only the measurements on the mean and standard deviation for each measurement.
# Step 3: Uses descriptive activity names to name the activities in the data set
# Step 4: Appropriately labels the data set with descriptive variable names.
# Step 5: From the data set in step 4, creates a second, independent tidy data
# set with the average of each variable for each activity and each subject
library(dplyr)
#Load feature set
features <- read.table("Dataset/features.txt",header = FALSE)
# Load test data
X_test <- read.table("./Dataset/test/X_test.txt",header = FALSE)
y_test <- read.table("./Dataset/test/y_test.txt",header = FALSE)
subject_test <-
read.table("Dataset/test/subject_test.txt",header = FALSE)
#Header Name
names(X_test) <- features$V2
names(y_test) <- "activity"
names(subject_test) <- "subject"
#get only mean/std collums
X_test <- X_test[,grep('mean\\(\\)|std\\(\\)',names(X_test))]
#stich datata together for test
testdata <- cbind(subject_test,y_test,X_test)
#release data from memory
rm(subject_test)
rm(y_test)
rm(X_test)
# Load training data
X_train <- read.table("./Dataset/train/X_train.txt",header = FALSE)
y_train <-
read.table("./Dataset/train/y_train.txt",header = FALSE)
subject_train <-
read.table("Dataset/train/subject_train.txt",header = FALSE)
#Label the colums
names(X_train) <- features$V2
names(y_train) <- "activity"
names(subject_train) <- "subject"
#get only mean()/()std collums
X_train <- X_train[,grep('mean\\(\\)|std\\(\\)',names(X_train))]
#stich train data
traindata <- cbind(subject_train,y_train,X_train)
#Release memmory
rm(subject_train)
rm(y_train)
rm(X_train)
rm(features)
#combine Train adn Test data Sets
dirtydata <- rbind(traindata,testdata)
#Load activity labels
activity_label <-
read.table("Dataset/activity_labels.txt",header = FALSE,stringsAsFactors =
FALSE)
names(activity_label) <- c("activity","label")
# library(dplyr)
dirtydata <- merge(activity_label,dirtydata,by = "activity")
# mergeData <- join_all(list(X_train,X_test))
# mergeData <- join(X_train,X_test,type = "full")
#remove the activity collumn
dirtydata$activity <- NULL
# Step 5
library(reshape2)
#first version of tidy data
tidyData <- melt(dirtydata,id = c("subject","label"))
# Get means
tidyData <- dcast(tidyData, ... ~ variable,mean)
#clean up collumn names (Week 4 suggestions)
names(tidyData) <- tolower(gsub("\\(|\\)","",names(tidyData)))
write.table(tidyData,file = "tidy.txt" ,row.names = FALSE)