mahout推荐12-相似度方法汇总
将各个计算用户相似度的方法弄过来了,可以参考下。实际运行代码
数据文件 intro.csv内容: 直接复制就行了
1,101,5.0
1,102,3.0
1,103,2.5
2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0
3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0
4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0
5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0
代码:都带有注释的。结果就不发了。
package mahout;
import java.io.File;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.eval.DataModelBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;
/**
*
* @author Administrator
*
*/
public class TestRecommenderEvaluator2 {
public static void main(String[] args) throws Exception {
//强制每次生成相同的随机值,生成可重复的结果
RandomUtils.useTestSeed();
//数据装填,无偏好值的处理
//DataModel dataModel = new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File("data/ua.base"))));
DataModel dataModel = new FileDataModel(new File("data/intro.csv"));
//推荐评估,使用平均值
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
//推荐评估,使用均方差
//RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();
//用于生成推荐引擎的构建器,与上一例子实现相同
RecommenderBuilder builder = new RecommenderBuilder() {
public Recommender buildRecommender(DataModel model) throws TasteException {
// TODO Auto-generated method stub
//用户相似度,多种方法
//皮尔逊相关系数,未引入权重,同余弦相似度
//UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
// 皮尔逊相关系数,引入了权重
//UserSimilarity similarity = new PearsonCorrelationSimilarity(model,Weighting.WEIGHTED);
// 欧式距离定义相似度
//UserSimilarity similarity = new EuclideanDistanceSimilarity(model);
// 斯皮尔曼相关系数
//UserSimilarity similarity = new SpearmanCorrelationSimilarity(model);
// 斯皮尔曼相关系数 缓存级别的
//UserSimilarity similarity = new CachingUserSimilarity(new SpearmanCorrelationSimilarity(model), model);
// 谷本系数(忽略偏好值的)
// UserSimilarity similarity = new TanimotoCoefficientSimilarity(model);
// 对数似然法
UserSimilarity similarity = new LogLikelihoodSimilarity(model);
//用户邻居
UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
//一个推荐器
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
/*DataModelBuilder modelBuilder = new DataModelBuilder() {
public DataModel buildDataModel(FastByIDMap<PreferenceArray> arg0) {
// TODO Auto-generated method stub
return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(arg0));
}
};*/
//推荐程序评估值(平均差值)训练90%的数据,测试数据10%,《mahout in Action》使用的是0.7,但是出现结果为NaN
double score = evaluator.evaluate(builder, null, dataModel, 0.9, 1.0);
System.out.println(score);
}
}
浙公网安备 33010602011771号