Scala开发Hadoop示例

import org.apache.hadoop.conf.{Configuration, Configured};
import org.apache.hadoop.util.{ToolRunner, Tool};
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.{LongWritable, Text, IntWritable};
import org.apache.hadoop.mapreduce.{Reducer, Mapper, Job};
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;


/**
 * Created with IntelliJ IDEA.
 * User: riley
 * Date: 8/26/13
 * Time: 1:58 PM
 */
object WordCount extends Configured with Tool
{
    class Map extends Mapper[LongWritable, Text, Text, IntWritable]
    {
        private val one: IntWritable = new IntWritable(1);
        private var word: Text;

        override def map(key: LongWritable, rowLine: Text, context: Mapper[LongWritable, Text, Text, IntWritable]#Context)
        {
            val line = rowLine.toString();
            if (line.isEmpty) return;

            val tokens: Array[String] = line.split(" ");
            for (item: String <- tokens) {
                word.set(item);
                context.write(word, one);
            }
        }
    }

    class Reduce extends Reducer[Text, IntWritable, Text, IntWritable]
    {
        private var count: IntWritable = new IntWritable();

        override def reduce(key: Text, values: Iterable[IntWritable], context: Reducer[Text, IntWritable, Text, IntWritable]#Context)
        {
            var sum: Int = 0;

            for (i: IntWritable <- values) sum = sum + i.get();

            count.set(sum);
            context.write(key, count);
        }
    }

    def run(args: Array[String]) =
    {
        val conf = super.getConf();
        val job = new Job(conf, "WordCount");

        job.setJarByClass(this.getClass);
        job.setOutputKeyClass(classOf[Text]);
        job.setOutputValueClass(classOf[IntWritable]);

        job.setMapperClass(classOf[Map]);
        job.setReducerClass(classOf[Reduce]);
        job.setCombinerClass(classOf[Reduce]);

        FileInputFormat.addInputPath(job, new Path(args(0)));
        FileOutputFormat.setOutputPath(job, new Path(args(1)));

        val status = job.waitForCompletion(true);
        if (status) 0 else 1;
    }

    def main(args: Array[String])
    {
        val conf: Configuration = new Configuration();
        System.exit(ToolRunner.run(conf, this, args));
    }
}
posted @ 2016-04-25 14:47 rilley 阅读(2792) 评论(2) 收藏举报
刷新页面返回顶部
rilley

Scala开发Hadoop示例

公告