向MapReduce转换:计算共现关系
分两部分:
<strong><span style="font-size:18px;">/***
* @author YangXin
* @info 计算共现关系的Mapper
*/
package unitSix;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarLongWritable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class UserVectorToCooccurrenceMapper extends Mapper<VarLongWritable, VectorWritable, IntWritable, IntWritable>{
public void map(VarLongWritable userID, VectorWritable userVector, Context context) throws IOException, InterruptedException{
Iterator<Vector.Element> it = userVector.get().nonZeroes().iterator();
//双重循环遍历非零元素
while(it.hasNext()){
int index1 = it.next().index();
Iterator<Vector.Element> it2 = userVector.get().nonZeroes().iterator();
while(it2.hasNext()){
int index2 = it2.next().index();
//写入项目ID
context.write(new IntWritable(index1), new IntWritable(index2));
}
}
}
}</span></strong>
<strong><span style="font-size:18px;">/***
* @author YangXin
* @info Mahout实现计算共生关系的Reducer
*/
package unitSix;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class UserVectorToCooccurrenceReducer extends Reducer<IntWritable, IntWritable, IntWritable, VectorWritable>{
public void reduce(IntWritable itemIndex1, Iterable<IntWritable> itemIndex2s, Context context) throws IOException, InterruptedException{
Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
for(IntWritable intWritable : itemIndex2s){
int itemIndex2 = intWritable.get();
cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
}
context.write(itemIndex1, new VectorWritable(cooccurrenceRow));
}
}
</span></strong>
浙公网安备 33010602011771号