-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMovie
More file actions
120 lines (102 loc) · 2.62 KB
/
Movie
File metadata and controls
120 lines (102 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#! /usr/bin/env python
#coding=utf-8
import os
import timeit
import random
predict = open("data-v/predict.txt")
t_user = {}
t_movie= {}
user_dis = {}
def preprocess():
train = open("data-v/training_set.txt")
n = 0
for line in train.readlines():
terms = line[:-2].split('\t')
x = float(terms[2])
if t_user.has_key(terms[0]):
t_user[terms[0]][terms[1]] = x
else:
t_user[terms[0]] = {terms[1]: x}
if t_movie.has_key(terms[0]):
t_movie[terms[1]][terms[0]] = x
else:
t_movie[terms[1]] = {terms[0] : x}
n += 1
# if n > 1000: break
print len(t_user)
return
def manhattn(rating1, rating2):
n = 0
sum = 0
for x in rating1:
if x in rating2:
sum += abs(rating1[x] - rating2[x])
n += 1
if n == 0:
return -1
else:
return sum/n
def closestUser(uId):
dis = []
#print t_user.keys()
for user in t_user:
if user != uId:
d = manhattn(t_user[uId], t_user[user])
dis.append((d,user))
dis.sort()
return dis
def distances():
for x in t_user:
dis = closestUser(x)
user_dis[x] = dis
return
def input():
pre = []
predict = open("data-v/predict.txt")
for line in predict:
pre.append(line[:-2])
return pre
def predict1(pre):
n = 0
for i in xrange(0,len(pre)):
n += 1
if n % 100 == 0:
print n
terms = pre[i].split('\t')
dis = user_dis[terms[0]]
count = 0
sum = 0
for i in range(0, len(dis)):
if i > 3:
break
if t_user[dis[i][1]].has_key(terms[1]):
count += 1
sum += t_user[dis[i][1]][terms[1]]
if count != 0:
pre[i] += '\t' + str(sum / count) + '\r\n'
else:
pre[i] += '\t' + '4' + '\r\n'
return pre
def output(ans_predict):
out = open("output/predict.txt", "w")
for line in ans_predict:
out.write(line)
out.close()
if __name__== "__main__":
starttime = timeit.default_timer()
print 'begin'
preprocess()
endtime = timeit.default_timer()
interval=(endtime - starttime)
print str(interval) + " seconds"
distances()
endtime = timeit.default_timer()
interval=(endtime - starttime)
print str(interval) + " seconds"
print 'step1'
pre = input()
ans_predict = predict1(pre)
output(ans_predict)
endtime = timeit.default_timer()
interval=(endtime - starttime)
print str(interval) + " seconds"