HDFS自定义处理类实现
1、新建interface: ImoocMapper.java
package com.imooc.bigdata.hadoop.hdfs; /* *自定义Mapper */ public interface ImoocMapper { /* * 先放一个接口 * line 读取到的每一行数据 * context 上下文/缓存 */ public void map(String line, ImoocContext context); }
2、新建class:WordCountMapper.java
package com.imooc.bigdata.hadoop.hdfs; /* * 自定义WordCount实现类 */ public class WordCountMapper implements ImoocMapper { @Override public void map(String line, ImoocContext context) { String[] words = line.split("\t"); for (String word : words){ Object value = context.get(word); if (value == null){ context.write(word, 1); } else { int v = Integer.parseInt(value.toString()); context.write(word, v+1);//去除单词对应的次数+1 } } } }


浙公网安备 33010602011771号