# 读书笔记：集体智慧编程（1）

• 收集用户偏好
• 找到相似的用户或者物品
• 计算推荐

数学公式：

  1 # -*- coding: utf-8 -*-
2
3 # A dictionary of movie critics and their ratings of a small
4 # set of movies
5 critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
6                          'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
7                          'The Night Listener': 3.0},
8            'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
9                             'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
10                             'You, Me and Dupree': 3.5},
11            'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
12                                 'Superman Returns': 3.5, 'The Night Listener': 4.0},
13            'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
14                             'The Night Listener': 4.5, 'Superman Returns': 4.0,
15                             'You, Me and Dupree': 2.5},
16            'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
17                             'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
18                             'You, Me and Dupree': 2.0},
19            'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
20                              'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
21            'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}}
22
23 from math import sqrt
24
25 def sim_distance(prefs, person1, person2):
26     si = {}
27     for item in prefs[person1]:
28         if item in prefs[person2]:
29             si[item] = 1
30
31     if len(si) == 0: return 0
32     sum_of_squares = sum(
33         [pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])
34     return 1 / (1 + sqrt(sum_of_squares))
35
36
37 def sim_pearson(prefs, person1, person2):
38     si = {}
39     for item in prefs[person1]:
40         if item in prefs[person2]:
41             si[item] = 1
42
43     n = len(si)
44     if n == 0: return 1
45
46     sum1 = sum([prefs[person1][it] for it in si])
47     sum2 = sum([prefs[person2][it] for it in si])
48
49     sum1Sq = sum([pow(prefs[person1][it], 2) for it in si])
50     sum2Sq = sum([pow(prefs[person2][it], 2) for it in si])
51
52     pSum = sum([prefs[person1][it] * prefs[person2][it] for it in si])
53
54     num = pSum - (sum1 * sum2 / n)
55     den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
56     if den == 0: return 0
57
58     r = num / den
59     return r
60
61
62 # Returns the best matches for person from the prefs dictionary.
63 # Number of results and similarity function are optional params.
64 def topMatches(prefs, person, n=5, similarity=sim_pearson):
65     scores = [(similarity(prefs, person, other), other) for other in prefs if other != person]
66     # Sort the list so the highest scores appear at the top
67     scores.sort()
68     scores.reverse()
69     return scores[0:n]
70
71
72 # Gets recommendations for a person by using a weighted average
73 # of every other user's rankings
74 def getRecommendations(prefs, person, similarity=sim_pearson):
75     totals = {}
76     simSums = {}
77     for other in prefs:
78         # don't compare me to myself
79         if other == person: continue
80         sim = similarity(prefs, person, other)
81         # ignore scores of zero or lower
82         if sim <= 0: continue
83         for item in prefs[other]:
84             # only score movies I haven't seen yet
85             if item not in prefs[person] or prefs[person][item] == 0:
86                 # Similarity * Score
87                 totals.setdefault(item, 0)
88                 totals[item] += prefs[other][item] * sim
89                 # Sum of similarities
90                 simSums.setdefault(item, 0)
91                 simSums[item] += sim
92     # Create the normalized list
93     rankings = [(total / simSums[item], item) for item, total in totals.items()]
94     # Return the sorted list
95     rankings.sort()
96     rankings.reverse()
97     return rankings
98
99 if __name__ == '__main__':
100     print getRecommendations(critics,'Toby',similarity=sim_pearson)
View Code

posted @ 2014-11-11 21:43  cxy486  阅读(279)  评论(0编辑  收藏  举报