1 #定义几种距离计算函数
2 #更高效的方式为把得分向量化之后使用scipy中定义的distance方法
3
4 from math import sqrt
5 def euclidean_dis(rating1, rating2): #欧式距离计算
6 """计算2个打分序列间的欧式距离. 输入的rating1和rating2都是打分dict
7 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
8 distance = 0
9 commonRatings = False
10 for key in rating1:
11 if key in rating2:
12 distance += (rating1[key] - rating2[key])^2
13 commonRatings = True
14 #两个打分序列之间有公共打分电影
15 if commonRatings:
16 return distance
17 #无公共打分电影
18 else:
19 return -1
20
21
22 def manhattan_dis(rating1, rating2): #曼哈顿距离计算
23 """计算2个打分序列间的曼哈顿距离. 输入的rating1和rating2都是打分dict
24 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
25 distance = 0
26 commonRatings = False
27 for key in rating1:
28 if key in rating2:
29 distance += abs(rating1[key] - rating2[key])
30 commonRatings = True
31 #两个打分序列之间有公共打分电影
32 if commonRatings:
33 return distance
34 #无公共打分电影
35 else:
36 return -1
37
38 def cos_dis(rating1, rating2): #余弦相似度计算
39 """计算2个打分序列间的cos距离. 输入的rating1和rating2都是打分dict
40 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
41 distance = 0
42 dot_product_1 = 0
43 dot_product_2 = 0
44 commonRatings = False
45
46 for score in rating1.values():
47 dot_product_1 += score^2
48 for score in rating2.values():
49 dot_product_2 += score^2
50
51 for key in rating1:
52 if key in rating2:
53 distance += rating1[key] * rating2[key]
54 commonRatings = True
55 #两个打分序列之间有公共打分电影
56 if commonRatings:
57 return 1-distance/sqrt(dot_product_1*dot_product_2)
58 #无公共打分电影
59 else:
60 return -1
61
62 def pearson_dis(rating1, rating2): #皮尔逊相似度计算
63 """计算2个打分序列间的pearson距离. 输入的rating1和rating2都是打分dict
64 格式为{'小时代4': 1.0, '疯狂动物城': 5.0}"""
65 sum_xy = 0
66 sum_x = 0
67 sum_y = 0
68 sum_x2 = 0
69 sum_y2 = 0
70 n = 0
71 for key in rating1:
72 if key in rating2:
73 n += 1
74 x = rating1[key]
75 y = rating2[key]
76 sum_xy += x * y
77 sum_x += x
78 sum_y += y
79 sum_x2 += pow(x, 2)
80 sum_y2 += pow(y, 2)
81 # now compute denominator
82 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
83 if denominator == 0:
84 return 0
85 else:
86 return (sum_xy - (sum_x * sum_y) / n) / denominator