-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCode_Winnow.py
More file actions
162 lines (152 loc) · 8.2 KB
/
Code_Winnow.py
File metadata and controls
162 lines (152 loc) · 8.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#
# Code by Rajiv Mantena & Devi Ayyagari for CS6350 Machine Learning Fall 2016
# Final Project
# University of Utah
# 7th of November
# mantenarajiv@gmail.com
# akdevi.92@gmail.com
#
import numpy
import csv
import sys
# Function to read the CSV into Numpy.Array
def read_data_csv(matrix):
new_array = numpy.zeros((len(matrix),149))
for count in range(len(matrix)):
for a in range(len(matrix[count])):
new_array[count][a] = matrix[count][a]
# new_array[count][a] = int(''.join(matrix[count][a]))
return new_array
# Winnow Algorithm Code
def winnow(data):
w = numpy.ones(146) # Initializing the Weights vector
theta = 147 # Initializing theta
corrections = 0
for count in range(len(data)): # For each record
data_here = data[count] # Copy 2D array into a 1D array
y_here = data_here[148] # Separate label,
x_here = data_here[1:147] # Separate features,
weekday = data_here[0] # Separate the Weekday boolean. (Not using this as a feature yet)
dot_prod = numpy.dot(w,x_here)
if weekday: # Ignore Weekends
if dot_prod - theta > 0: # Winnow update code
y_new = 1
else:
y_new = 0
if y_here == 1 and y_new ==0: # If mistake is made
corrections += 1
for mycount in range(len(w)): # Loop over all features to find the True features
if x_here[mycount] == 1: # If feature is 1
w[mycount] = 2*w[mycount] # Promotive update
elif y_here == 0 and y_new == 1: # If mistake is made
corrections += 1
for mycount in range(len(w)): # Loop over features
if x_here[mycount] == 1: # If feature is 1
w[mycount] = w[mycount]/2 # Demotive update
return w,corrections
# Balanced Winnow Algorithm Code
def balanced_winnow(data):
w_pos = numpy.ones(146) # Initializing the Weights vector
w_neg = numpy.ones(146) # Initializing the Weights vector
theta = 147 # Initializing theta
corrections = 0
for count in range(len(data)): # For each record
data_here = data[count] # Copy 2D array into a 1D array
y_here = data_here[148] # Separate label,
x_here = data_here[1:147] # Separate features,
weekday = data_here[0] # Separate the Weekday boolean. (Not using this as a feature yet)
w = w_pos-w_neg
dot_prod = numpy.dot(w,x_here)
if weekday: # Ignore Weekends
if dot_prod - theta > 0: # Winnow update code
y_new = 1
else:
y_new = 0
if y_here == 1 and y_new ==0: # If mistake is made
corrections += 1
for mycount in range(len(w_pos)): # Loop over all features to find the True features
if x_here[mycount] == 1: # If feature is 1
w_pos[mycount] = 2*w_pos[mycount] # Promotive update
w_neg[mycount] = w_neg[mycount]/2
elif y_here == 0 and y_new == 1: # If mistake is made
corrections += 1
for mycount in range(len(w_pos)): # Loop over features
if x_here[mycount] == 1: # If feature is 1
w_pos[mycount] = w_pos[mycount]/2 # Demotive update
w_neg[mycount] = 2*w_neg[mycount]
w = w_pos - w_neg
return w,corrections
# Function to find the accuracy of the final_vector
def validate_winnow(final_vector, test_array):
theta = 147
correct_predictions = 0 # Variable to save the number of corrections
total_predictions = len(test_array) # Number of records ?
total_predictions_weekdays = 0
for current_prediction in range(total_predictions): # loop over all the records
y_here = test_array[current_prediction][148]
x_here = test_array[current_prediction][1:147] # Ignore the 0th element of the array. (Label)
weekday = test_array[current_prediction][0]
if weekday:
predicted = numpy.dot(final_vector,x_here) # What is the prediction of W (final vector) ?
if predicted >= theta:
y = 1
else:
y = 0
print "Predicted : ", y, " Actual : ", y_here
if predicted >= theta and y_here == 1: # Does it match the label ?
print "1"
correct_predictions += 1 # Increment correct_predictions
elif predicted < theta and y_here == 0: # Same as above
print "2"
correct_predictions += 1 # Increment correct_predictions
total_predictions_weekdays += 1
print "correct_predictions : ", correct_predictions, "total_predictions_weekdays : ", total_predictions_weekdays
correct_predictions = (correct_predictions/float(total_predictions_weekdays))*100 # Convert to average
return correct_predictions # Return predictions.
# Main function
def main():
learning_rate = 1 # Initializing learning rate
# Variable to define the algorithm to run
# Select 1 for Winnow
# Select 2 for Balanced Winnow
Algorithm_Select = 2
print "Running the Preceptron/Winnow code : "
my_data_file = open('Data_Training.csv') # Fetching and saving it as a list
with my_data_file as f:
data = csv.reader(f)
my_data_matrix = list(data)
array = read_data_csv(my_data_matrix) # Converting the list data into a Numpy Array
my_test_file = open('Data_Test.csv') # Fetching and saving it as a list
with my_test_file as f:
data = csv.reader(f)
my_test_matrix = list(data)
test_array = read_data_csv(my_test_matrix) # Converting the list data into a Numpy Array
num_of_tests = 10 # Defining the number of test to be performed
corrections = [] # Initializing arrays to compute accuracy and mistakes
accuracies = []
for count in range(num_of_tests): # Loop over the number of tests
if (Algorithm_Select == 1):
(final_vec, correction) = winnow(array)
curr_acc = validate_winnow(final_vec,test_array)
elif (Algorithm_Select == 2):
(final_vec, correction) = balanced_winnow(array)
curr_acc = validate_winnow(final_vec,test_array)
else:
curr_acc = 0
correction = 0
accuracies.append(curr_acc) # Saving the obtained accuracy
corrections.append(correction) # and correction
sys.stdout.write("\r Working on record : %d " % count ) # Code to just display the progress
sys.stdout.write("out of %d tests" % num_of_tests )
sys.stdout.flush()
sys.stdout.write("\r")
sys.stdout.flush()
# Code to print out the experimental statistics at the end of the run
print " Number of tests performed are :", num_of_tests, " "
print " Learning rate is set at :", learning_rate
print " Average number of mistakes done are :", numpy.average(corrections)
print " Average of accuracies obtained is :", numpy.average(accuracies), " and the Standard Deviation is ", \
numpy.std(accuracies)
return 0
if __name__ == '__main__':
main()