# coding=gbk
'''
选择用户反馈的物品
将评分>3定义为喜欢
'''
import pandas as pd
import numpy as np
import copy
#获取区分度
def getDiff(userRates,movie):
like=dict()
dislike=dict()
unknown=dict()
likeArray=[]
dislikeArray=[]
unknownArray=[]
for user,ratings in userRates.items():
if movie not in ratings:
unknown[user]=ratings
for mv,rating in ratings.items():
unknownArray.append(rating)
elif ratings[movie]>3:
like[user]=ratings
for mv,rating in ratings.items():
if mv!=movie:
likeArray.append(rating)
else:
dislike[user]=ratings
for mv,rating in ratings.items():
if mv!=movie:
dislikeArray.append(rating)
diff=0
if len(likeArray)!=0:
diff += np.var(likeArray)
if len(dislikeArray)!=0:
diff += np.var(dislikeArray)
if len(unknownArray)!=0:
diff += np.var(unknownArray)
return (diff,like,dislike,unknown)
def select(mvs,userRates,node,exceptMvs,lv):
like=dict()
dislike=dict()
unknown=dict()
maxDiff=-100
bestmv=-1000
for mv in mvs:
if mv in exceptMvs:
continue
diff,tmpa,tmpb,tmpc=getDiff(userRates,mv)
if diff>maxDiff:
bestmv=mv
maxDiff=diff
like=tmpa
dislike=tmpb
unknown=tmpc
exceptMvs.append(bestmv)
node['movie']=bestmv
print lv
print node['tag']
if (lv+1)<=3:
node['like']={'tag':'like'}
node['dislike']={'tag':'dislike'}
node['unknown']={'tag':'unknown'}
select(mvs,like,node['like'],copy.deepcopy(exceptMvs),lv+1)
select(mvs,dislike,node['dislike'],copy.deepcopy(exceptMvs),lv+1)
select(mvs,unknown,node['unknown'],copy.deepcopy(exceptMvs),lv+1)
data=pd.read_csv('data/ratings.dat',sep='::',nrows=80000,header=None)
data=data.ix[:,0:2]
groups=data.groupby([0])
#用rates[用户][物品]=评分形式组织数据
rates=dict()
for user,group in groups:
rates[user]={a:b for a, b in group[[1,2]].itertuples(index=False)}
#得到物品列表
movies = set([j for i,j,k in data.itertuples(index=False)])
root={'tag':'root'}
select(movies,rates,root,[],1)
print root