hadoop-矩阵相乘法(MapReduce学习)

列行相乘法:参见高度可伸缩的稀疏矩阵乘法_吴志川.pdf

package
org.bigdata508.util; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.bigdata.util.HadoopCfg; import org.apache.hadoop.mapreduce.lib.input.FileSplit; /* * 2016-5-8 * @author:lixin * 求矩阵乘积 * */ public class Matrix { public static int tFlag = 0; private static class MatrixMapper extends Mapper<LongWritable, Text, IntWritable, Text>{ private static int columnN = 0; private static int rowM = 0; @Override protected void setup(Mapper<LongWritable, Text, IntWritable, Text>.Context context) throws IOException, InterruptedException { Configuration conf = HadoopCfg.getInstance(); columnN = conf.getInt("columnN",0); rowM = conf.getInt("rowM",0); } @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, Text>.Context context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) context.getInputSplit(); String fileName = fileSplit.getPath().getName(); String str = value.toString(); String[] strs = str.split(","); int i = Integer.parseInt(strs[0]); String[] strs2 = strs[1].split("\t"); int j = Integer.parseInt(strs2[0]); int val = Integer.parseInt(strs2[1]); if (fileName.startsWith("M")) { context.write(new IntWritable(j), new Text("M," + i + "," + val + "")); } else {// fileName == N context.write(new IntWritable(i), new Text("N," + j + "," + val + "")); } } } private static class MatrixReducer extends Reducer<IntWritable, Text, Text, IntWritable> { private static int columnN = 0; private static int rowM = 0; @Override protected void setup( Reducer<IntWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); columnN = conf.getInt("columnN",0); rowM = conf.getInt("rowM",0); } @Override protected void reduce(IntWritable key, Iterable<Text> values, Reducer<IntWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { int[] mArray = new int[rowM + 1]; int[] nArray = new int[columnN + 1]; for (Text value : values) { String str = value.toString(); String[] strs = str.split(","); if (strs[0].equals("M")) { mArray[Integer.parseInt(strs[1])] = Integer .parseInt(strs[2]); } else { // N nArray[Integer.parseInt(strs[1])] = Integer .parseInt(strs[2]); } } for (int i = 1; i <= rowM; i++) { for(int j = 1;j <= columnN;j ++){ // System.out.println("mArray[i]: "+mArray[i]+"nArray[j]: "+nArray[j]); context.write(new Text(i+","+j), new IntWritable(mArray[i]*nArray[j])); } } } } private static class MatrixMapper2 extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String str = value.toString(); String keyOut[] = str.split("\t"); context.write(new Text(keyOut[0]), new IntWritable(Integer.parseInt(keyOut[1]))); } } private static class MatrixReducer2 extends Reducer<Text, IntWritable, Text, Text> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, Text>.Context context) throws IOException, InterruptedException { int sum = 0; for(IntWritable value : values){ sum += value.get(); } context.write(key, new Text(""+sum)); } } public static void runFirstReduce() throws Exception{ Configuration config = HadoopCfg.getInstance(); config.setInt("rowM",2); config.setInt("columnM",2); config.setInt("columnN",3); Job job = Job.getInstance(config,"矩阵运算第一次reduce"); job.setJarByClass(Matrix.class); job.setMapperClass(MatrixMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(MatrixReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/MatrixInput")); FileOutputFormat.setOutputPath(job,new Path("/output/reduce1")); job.waitForCompletion(true); } public static void runSecondReduce() throws Exception{ Configuration config = HadoopCfg.getInstance(); Job job = Job.getInstance(config,"矩阵运算第二次reduce"); job.setJarByClass(Matrix.class); job.setMapperClass(MatrixMapper2.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(MatrixReducer2.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("/output/reduce1")); FileOutputFormat.setOutputPath(job,new Path("/output/reduce2")); job.waitForCompletion(true); } public static void main(String[] args) throws Exception { runFirstReduce(); runSecondReduce(); } }

 

 

 

 

 

 

 

普通方法:

package org.bigdata508.util;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.bigdata.util.HadoopCfg;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

/*
 * 2016-5-8
 * @author:lixin
 * 求矩阵乘积
 * */
public class Matrix {
    
    public static int tFlag = 0;
    private static class MatrixMapper extends Mapper<LongWritable, Text, Text, Text>{

        private static int columnN = 0;
        private static int rowM = 0;
        
        @Override
        protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context)
                throws IOException, InterruptedException {
//            Configuration conf = HadoopCfg.getInstance();
            columnN = 3;//conf.getInt("columnN",0);
            rowM = 2;//conf.getInt("rowM",0);
        }
        
        
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
                throws IOException, InterruptedException {
            FileSplit fileSplit = (FileSplit) context.getInputSplit();
            String fileName = fileSplit.getPath().getName();
            String str = value.toString();
            String[] strs = str.split(",");
            int i = Integer.parseInt(strs[0]);
            String[] strs2 = strs[1].split("\t");
            int j = Integer.parseInt(strs2[0]);
            int val = Integer.parseInt(strs2[1]);
            if (fileName.startsWith("M")) {
                for (int count = 1; count <= columnN; count++) {
                    context.write(new Text(i + "," + count), new Text("M," + j
                            + "," + val + ""));
                }
            } else {// fileName == N
                for (int count = 1; count <= rowM; count++) {
                    context.write(new Text(count + "," + j), new Text("N," + i
                            + "," + val + ""));
                }
            }
        }
    }
    
    private static class MatrixReducer extends
            Reducer<Text, Text, Text, IntWritable> {
        private static int columnM = 0;
        
        @Override
        protected void setup(
                Reducer<Text, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
//            Configuration config = context.getConfiguration();
            columnM = 2;//config.getInt("columnM", 0);
        }
        
        @Override
        protected void reduce(Text key, Iterable<Text> values,
                Reducer<Text, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            System.out.println(key);
            int finalVal = 0;
            int[] mArray = new int[columnM + 1];
            int[] nArray = new int[columnM + 1];
            for (Text value : values) {
                String str = value.toString();
                String[] strs = str.split(",");
                if (strs[0].equals("M")) {
                    mArray[Integer.parseInt(strs[1])] = Integer
                            .parseInt(strs[2]);
                } else { // N
                    nArray[Integer.parseInt(strs[1])] = Integer
                            .parseInt(strs[2]);
                }
            }
            for (int i = 1; i < columnM + 1; i++) {
                finalVal += (mArray[i] * nArray[i]);
            }
            context.write(key, new IntWritable(finalVal));
        }
        
        }
        
        public static void main(String[] args) throws Exception {
        Configuration config = HadoopCfg.getInstance();
//        config.setInt("rowM",100);
//        config.setInt("columnM",90);
//        config.setInt("columnN",70);
        
        Job job = Job.getInstance(config,"矩阵运算");
        job.setJarByClass(Matrix.class);
        job.setMapperClass(MatrixMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        
        job.setReducerClass(MatrixReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        
        FileInputFormat.addInputPath(job, new Path("/MatrixInput"));
        FileOutputFormat.setOutputPath(job,new Path("/output/"));
        System.exit(job.waitForCompletion(true)? 0 : 1);
        
        }
}

 

M-Matrix
1,1 1 1,2 2 2,1 1 2,2 3
N-Matrix
1,1 1 1,2 2 1,3 4 2,1 1 2,2 3 2,3 8

 

posted @ 2016-05-14 09:13  Decmber  阅读(1126)  评论(0编辑  收藏  举报