mahout推荐12-相似度方法汇总

将各个计算用户相似度的方法弄过来了,可以参考下。实际运行代码

数据文件 intro.csv内容: 直接复制就行了

1,101,5.0
1,102,3.0
1,103,2.5

2,101,2.0
2,102,2.5
2,103,5.0
2,104,2.0

3,101,2.5
3,104,4.0
3,105,4.5
3,107,5.0

4,101,5.0
4,103,3.0
4,104,4.5
4,106,4.0

5,101,4.0
5,102,3.0
5,103,2.0
5,104,4.0
5,105,3.5
5,106,4.0

 

代码:都带有注释的。结果就不发了。

package mahout;

import java.io.File;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.eval.DataModelBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;
/**
 * 
 * @author Administrator
 *
 */
public class TestRecommenderEvaluator2 {

	public static void main(String[] args) throws Exception {
		//强制每次生成相同的随机值,生成可重复的结果
		RandomUtils.useTestSeed();
		//数据装填,无偏好值的处理
		//DataModel dataModel = new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File("data/ua.base"))));
		DataModel dataModel = new FileDataModel(new File("data/intro.csv"));
		
		//推荐评估,使用平均值
		RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
		//推荐评估,使用均方差
		//RecommenderEvaluator evaluator = new RMSRecommenderEvaluator();
		//用于生成推荐引擎的构建器,与上一例子实现相同
		RecommenderBuilder builder = new RecommenderBuilder() {
			
			public Recommender buildRecommender(DataModel model) throws TasteException {
				// TODO Auto-generated method stub
				//用户相似度,多种方法
				//皮尔逊相关系数,未引入权重,同余弦相似度
				//UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
				// 皮尔逊相关系数,引入了权重
				//UserSimilarity similarity = new PearsonCorrelationSimilarity(model,Weighting.WEIGHTED);
				// 欧式距离定义相似度
				//UserSimilarity similarity = new EuclideanDistanceSimilarity(model);
				// 斯皮尔曼相关系数
				//UserSimilarity similarity = new SpearmanCorrelationSimilarity(model);
				// 斯皮尔曼相关系数 缓存级别的
				//UserSimilarity similarity = new CachingUserSimilarity(new SpearmanCorrelationSimilarity(model), model);
				// 谷本系数(忽略偏好值的)
				// UserSimilarity similarity = new TanimotoCoefficientSimilarity(model);
				// 对数似然法
				UserSimilarity similarity = new LogLikelihoodSimilarity(model);
				//用户邻居
				UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
				//一个推荐器
				return new GenericUserBasedRecommender(model, neighborhood, similarity);
			}
		};
		/*DataModelBuilder modelBuilder = new DataModelBuilder() {
			
			public DataModel buildDataModel(FastByIDMap<PreferenceArray> arg0) {
				// TODO Auto-generated method stub
				return new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(arg0));
			}
		};*/
		//推荐程序评估值(平均差值)训练90%的数据,测试数据10%,《mahout in Action》使用的是0.7,但是出现结果为NaN
		double score = evaluator.evaluate(builder, null, dataModel, 0.9, 1.0);
		System.out.println(score);
	}
}

 

posted @ 2014-08-05 11:50  jseven  阅读(892)  评论(0编辑  收藏  举报