1 package ML.collaborativeFilltering;
2
3 import org.apache.spark.SparkConf;
4 import org.apache.spark.api.java.JavaDoubleRDD;
5 import org.apache.spark.api.java.JavaPairRDD;
6 import org.apache.spark.api.java.JavaRDD;
7 import org.apache.spark.api.java.JavaSparkContext;
8 import org.apache.spark.api.java.function.Function;
9 import org.apache.spark.mllib.recommendation.ALS;
10 import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
11 import org.apache.spark.mllib.recommendation.Rating;
12 import scala.Tuple2;
13
14 /**
15 * TODO
16 *
17 * @ClassName: example
18 * @author: DingH
19 * @since: 2019/4/10 16:03
20 */
21 public class example {
22 public static void main(String[] args) {
23 SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example");
24 JavaSparkContext jsc = new JavaSparkContext(conf);
25
26 // Load and parse the data
27 String path = "D:\\IdeaProjects\\SimpleApp\\src\\main\\resources\\data\\mllib\\als\\test.data";
28 JavaRDD<String> data = jsc.textFile(path);
29 JavaRDD<Rating> ratings = data.map(new Function<String, Rating>() {
30 public Rating call(String s) {
31 String[] sarray = s.split(",");
32 return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), Double.parseDouble(sarray[2]));
33 }
34 }
35 );
36 int ranks = 10;
37 int numIterations = 10;
38 MatrixFactorizationModel model = ALS.train(ratings.rdd(), ranks, numIterations);
39
40 JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(new Function<Rating, Tuple2<Object, Object>>() {
41 public Tuple2<Object, Object> call(Rating r) {
42 return new Tuple2<Object, Object>(r.user(), r.product());
43 }
44 }
45 );
46 JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
47 new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
48 public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
49 return new Tuple2<Tuple2<Integer, Integer>, Double>(
50 new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
51 }
52 }
53 ));
54
55 JavaRDD<Tuple2<Double, Double>> ratesAndPreds = JavaPairRDD.fromJavaRDD(ratings.map(
56 new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
57 public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
58 return new Tuple2<Tuple2<Integer, Integer>, Double>(
59 new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
60 }
61 }
62 )).join(predictions).values();
63
64 double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
65 new Function<Tuple2<Double, Double>, Object>() {
66 public Object call(Tuple2<Double, Double> pair) {
67 Double err = pair._1() - pair._2();
68 return err * err;
69 }
70 }
71 ).rdd()).mean();
72
73 System.out.println("Mean Squared Error = " + MSE);
74
75
76
77
78 }
79 }