idea+Windows+hadoop本地调试

1、下载hadoop-2.6.0.tar.gz包到本地

我解压后放入D:\hadoop-2.6.0-cdh5.9.3\

https://github.com/steveloughran/winutils.git

选择相应的版本

下载hadoop.dll、winutils.exe文件放入D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0\bin

配置环境变量

HADOOP_HOME=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0

HADOOP_BIN_PATH=%HADOOP_HOME%\bin

HADOOP_PREFIX=D:\hadoop-2.6.0-cdh5.9.3\hadoop-2.6.0

PATH后增加;%HADOOP_HOME%\bin

基于官方WordCount 修改 增加了,如果输出目录存在就删除的逻辑和系统参数

  1 import org.apache.hadoop.conf.Configuration;
  2 import org.apache.hadoop.fs.FileSystem;
  3 import org.apache.hadoop.fs.Path;
  4 import org.apache.hadoop.io.IntWritable;
  5 import org.apache.hadoop.io.Text;
  6 import org.apache.hadoop.mapreduce.Job;
  7 import org.apache.hadoop.mapreduce.Mapper;
  8 import org.apache.hadoop.mapreduce.Reducer;
  9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 11 import org.apache.hadoop.util.GenericOptionsParser;
 12 
 13 import java.io.IOException;
 14 import java.util.Properties;
 15 import java.util.StringTokenizer;
 16 
 17 public class WordCount {
 18 
 19     public static class TokenizerMapper
 20             extends Mapper<Object, Text, Text, IntWritable> {
 21 
 22         private final static IntWritable one = new IntWritable(1);
 23         private Text word = new Text();
 24 
 25         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
 26             StringTokenizer itr = new StringTokenizer(value.toString());
 27             while (itr.hasMoreTokens()) {
 28                 word.set(itr.nextToken());
 29                 context.write(word, one);
 30             }
 31         }
 32     }
 33 
 34     public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
 35         private IntWritable result = new IntWritable();
 36 
 37         public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
 38             int sum = 0;
 39             for (IntWritable val : values) {
 40                 sum += val.get();
 41             }
 42             result.set(sum);
 43             context.write(key, result);
 44         }
 45     }
 46 
 47 
 48     /**
 49      * 删除指定目录
 50      *
 51      * @param conf
 52      * @param dirPath
 53      * @throws IOException
 54      */
 55     private static void deleteDir(Configuration conf, String dirPath) throws IOException {
 56         FileSystem fs = FileSystem.get(conf);
 57         Path targetPath = new Path(dirPath);
 58         if (fs.exists(targetPath)) {
 59             boolean delResult = fs.delete(targetPath, true);
 60             if (delResult) {
 61                 System.out.println(targetPath + " has been deleted sucessfullly.");
 62             } else {
 63                 System.out.println(targetPath + " deletion failed.");
 64             }
 65         }
 66 
 67     }
 68 
 69     public static void main(String[] args) throws Exception {
 70         Properties props = System.getProperties(); //系统属性
 71         System.out.println("Java的运行环境版本:" + props.getProperty("java.version"));
 72         System.out.println("Java的运行环境供应商:" + props.getProperty("java.vendor"));
 73         System.out.println("Java供应商的URL:" + props.getProperty("java.vendor.url"));
 74         System.out.println("Java的安装路径:" + props.getProperty("java.home"));
 75         System.out.println("Java的虚拟机规范版本:" + props.getProperty("java.vm.specification.version"));
 76         System.out.println("Java的虚拟机规范供应商:" + props.getProperty("java.vm.specification.vendor"));
 77         System.out.println("Java的虚拟机规范名称:" + props.getProperty("java.vm.specification.name"));
 78         System.out.println("Java的虚拟机实现版本:" + props.getProperty("java.vm.version"));
 79         System.out.println("Java的虚拟机实现供应商:" + props.getProperty("java.vm.vendor"));
 80         System.out.println("Java的虚拟机实现名称:" + props.getProperty("java.vm.name"));
 81         System.out.println("Java运行时环境规范版本:" + props.getProperty("java.specification.version"));
 82         System.out.println("Java运行时环境规范供应商:" + props.getProperty("java.specification.vender"));
 83         System.out.println("Java运行时环境规范名称:" + props.getProperty("java.specification.name"));
 84         System.out.println("Java的类格式版本号:" + props.getProperty("java.class.version"));
 85         String jars = props.getProperty("java.class.path");
 86         String[] split = jars.split(";", -1);
 87         for (String jar : split) {
 88             System.out.println("Java的类路径jar: " + jar);
 89         }
 90         //System.out.println("Java的类路径:" + props.getProperty("java.class.path"));
 91         String paths = props.getProperty("java.library.path");
 92         String[] pathsSplit = paths.split(";", -1);
 93         for (String path : pathsSplit) {
 94             System.out.println("加载库时搜索的路径列表:" + path);
 95         }
 96         //System.out.println("加载库时搜索的路径列表:" + props.getProperty("java.library.path"));
 97         System.out.println("默认的临时文件路径:" + props.getProperty("java.io.tmpdir"));
 98         System.out.println("一个或多个扩展目录的路径:" + props.getProperty("java.ext.dirs"));
 99         System.out.println("操作系统的名称:" + props.getProperty("os.name"));
100         System.out.println("操作系统的构架:" + props.getProperty("os.arch"));
101         System.out.println("操作系统的版本:" + props.getProperty("os.version"));
102         System.out.println("文件分隔符:" + props.getProperty("file.separator"));   //在 unix 系统中是"/"
103         System.out.println("路径分隔符:" + props.getProperty("path.separator"));   //在 unix 系统中是":"
104         System.out.println("行分隔符:" + props.getProperty("line.separator"));   //在 unix 系统中是"/n"
105         System.out.println("用户的账户名称:" + props.getProperty("user.name"));
106         System.out.println("用户的主目录:" + props.getProperty("user.home"));
107         System.out.println("用户的当前工作目录:" + props.getProperty("user.dir"));
108         Configuration conf = new Configuration();
109         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
110         if (otherArgs.length < 2) {
111             System.err.println("Usage: wordcount <in> [<in>...] <out>");
112             System.exit(2);
113         }
114 
115         //先删除output目录
116         deleteDir(conf, otherArgs[otherArgs.length - 1]);
117 
118         Job job = Job.getInstance(conf, "word count");
119         job.setJarByClass(WordCount.class);
120         job.setMapperClass(TokenizerMapper.class);
121         job.setCombinerClass(IntSumReducer.class);
122         job.setReducerClass(IntSumReducer.class);
123         job.setOutputKeyClass(Text.class);
124         job.setOutputValueClass(IntWritable.class);
125         for (int i = 0; i < otherArgs.length - 1; ++i) {
126             FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
127         }
128         FileOutputFormat.setOutputPath(job,
129                 new Path(otherArgs[otherArgs.length - 1]));
130         System.exit(job.waitForCompletion(true) ? 0 : 1);
131     }
132 }
View Code

 pom依赖

 1         <dependency>
 2             <groupId>org.apache.hadoop</groupId>
 3             <artifactId>hadoop-client</artifactId>
 4             <version>2.6.0</version>
 5         </dependency>
 6         <dependency>
 7             <groupId>org.apache.hadoop</groupId>
 8             <artifactId>hadoop-common</artifactId>
 9             <version>2.6.0</version>
10         </dependency>
11         <dependency>
12             <groupId>org.apache.hadoop</groupId>
13             <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
14             <version>2.6.0</version>
15         </dependency>
View Code

添加本地依赖

 

集群信息

hdfs-site.xml

core-site.xml

放入resources 文件

 

给出输入输出参数运行即可

确保本地文件和maven依赖都被加载到

 

posted @ 2018-09-07 19:37  阳光下的me  阅读(2473)  评论(0编辑  收藏  举报