hadoop Eclipse环境

 将相关库导入eclipse工程即可

commons-cli-1.2.jar
hadoop-common-3.2.0.jar
hadoop-mapreduce-client-core-3.2.0.jar

 

package com.hisi.a;

import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.StringUtils; public class WordCount2 { public static class TokenizerMapper extends Mapper
<Object, Text, Text, IntWritable>{ static enum CountersEnum { INPUT_WORDS } private final static IntWritable one = new IntWritable(1); private Text word = new Text(); private boolean caseSensitive; private Set<String> patternsToSkip = new HashSet<String>(); private Configuration conf; private BufferedReader fis; @Override public void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); caseSensitive = conf.getBoolean("wordcount.case.sensitive", true); if (conf.getBoolean("wordcount.skip.patterns", false)) { URI[] patternsURIs = Job.getInstance(conf).getCacheFiles(); for (URI patternsURI : patternsURIs) { Path patternsPath = new Path(patternsURI.getPath()); String patternsFileName = patternsPath.getName().toString(); parseSkipFile(patternsFileName); } } } private void parseSkipFile(String fileName) { try { fis = new BufferedReader(new FileReader(fileName)); String pattern = null; while ((pattern = fis.readLine()) != null) { patternsToSkip.add(pattern); } } catch (IOException ioe) { System.err.println("Caught exception while parsing the cached file '" + StringUtils.stringifyException(ioe)); } } @Override public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { String line = (caseSensitive) ? value.toString() : value.toString().toLowerCase(); for (String pattern : patternsToSkip) { line = line.replaceAll(pattern, ""); } StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); Counter counter = context.getCounter(CountersEnum.class.getName(), CountersEnum.INPUT_WORDS.toString()); counter.increment(1); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); String[] remainingArgs = optionParser.getRemainingArgs(); if ((remainingArgs.length != 2) && (remainingArgs.length != 4)) { System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount2.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); List<String> otherArgs = new ArrayList<String>(); for (int i=0; i < remainingArgs.length; ++i) { if ("-skip".equals(remainingArgs[i])) { job.addCacheFile(new Path(remainingArgs[++i]).toUri()); job.getConfiguration().setBoolean("wordcount.skip.patterns", true); } else { otherArgs.add(remainingArgs[i]); } } FileInputFormat.addInputPath(job, new Path(otherArgs.get(0))); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); System.exit(job.waitForCompletion(true) ? 0 : 1); } }

 用eclipse Export.. 为Runnable JAR fIle: wc2.jar

hadoop@muhe221:~/test/temp$ hadoop fs -ls /input
Found 1 items
-rw-r--r--   1 hadoop supergroup         30 2019-02-28 10:13 /input/hehe.txt
hadoop@muhe221:~/test/temp$ hadoop fs -cat /input/hehe.txt
Hello World
hello sweat heart

 

hadoop@muhe221:~/test$ hadoop jar wc2.jar com.hisi.a.WordCount2 /input /output
......
hadoop@muhe221:~/test$ hadoop fs -cat /output/part-r-00000
Hello 1
World 1
heart 1
hello 1
sweat 1
hadoop@muhe221:~/test$ hadoop fs -rm -r /output
#忽略大小写
hadoop@muhe221:~/test$ hadoop jar wc2.jar com.hisi.a.WordCount2 -Dwordcount.case.sensitive=false /input /output
hadoop@muhe221:~/test$ hadoop fs -cat /output/part-r-00000
heart 1
hello 2
sweat 1
world 1
hadoop@muhe221:~/test$ hadoop fs -rm -r /output
hadoop@muhe221:~/test$ hadoop fs -cat /hehe/patterns.txt
sweat
heart

#跳过指定的字符
hadoop@muhe221:~/test$ hadoop jar wc2.jar com.hisi.a.WordCount2 -Dwordcount.case.sensitive=false -skip /hehe/patterns.txt /input /output
hadoop@muhe221:~/test$ hadoop fs -cat /output/part-r-00000
hello 2
world 1

 

hadoop@muhe221:~/test$
posted @ 2019-02-28 11:44  牧 天  阅读(131)  评论(0)    收藏  举报