在windows远程提交任务给Hadoop集群（Hadoop 2.6）

我使用3台Centos虚拟机搭建了一个Hadoop2.6的集群。希望在windows7上面使用IDEA开发mapreduce程序，然后提交的远程的Hadoop集群上执行。经过不懈的google终于搞定

开始我使用hadoop的eclipse插件来执行job，竟然成功了，后来发现mapreduce是在本地执行的，根本没有提交到集群上。我把hadoop的4个配置文件加上后就开始出现了问题。

1：org.apache.hadoop.util.Shell$ExitCodeException: /bin/bash: line 0: fg: no job control

网上说要修改源码，在Hadoop2.6已经合并了那个补丁。这个错误怎么解决的也忘记了

2：Stack trace: ExitCodeException exitCode=1:

3：Error: Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster

4：Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class WordCount$Map not found

按照我的步骤走，这些问题都能解决，我使用的IDE是IDEA

1：复制Hadoop的4个配置文件放到src目录下面：core-site.xml,hdfs-site.xml,log4j.properties,mapred-site.xml,yarn-site.xml

2:配置mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapred.remote.os</name>
        <value>Linux</value>
    </property>
    <property>
        <name>mapreduce.app-submission.cross-platform</name>
        <value>true</value>
    </property>
    <property>
    <name>mapreduce.application.classpath</name>
    <value>
        /opt/hadoop-2.6.0/etc/hadoop,
        /opt/hadoop-2.6.0/share/hadoop/common/*,
        /opt/hadoop-2.6.0/share/hadoop/common/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/hdfs/*,
        /opt/hadoop-2.6.0/share/hadoop/hdfs/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/mapreduce/*,
        /opt/hadoop-2.6.0/share/hadoop/mapreduce/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/yarn/*,
        /opt/hadoop-2.6.0/share/hadoop/yarn/lib/*
    </value>
</property>    
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>master:10020</value>
    </property>
       <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>master:19888</value>
        </property>
</configuration>

注意mapreduce.application.classpath一定是绝对路径，不要搞什么$HADOOP_HOME,我这里反正是报错的

3：修改yarn-site.xml

<configuration>
<!-- Site specific YARN configuration properties -->
  <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>master:8032</value>
    </property>
<property>
    <name>yarn.application.classpath</name>
    <value>
        /opt/hadoop-2.6.0/etc/hadoop,
        /opt/hadoop-2.6.0/share/hadoop/common/*,
        /opt/hadoop-2.6.0/share/hadoop/common/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/hdfs/*,
        /opt/hadoop-2.6.0/share/hadoop/hdfs/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/mapreduce/*,
        /opt/hadoop-2.6.0/share/hadoop/mapreduce/lib/*,
        /opt/hadoop-2.6.0/share/hadoop/yarn/*,
        /opt/hadoop-2.6.0/share/hadoop/yarn/lib/*
    </value>
  </property>
</configuration>

注意yarn.application.classpath一定是绝对路径，不要搞什么$HADOOP_HOME

4:看下我的代码

package com.gaoxing.hadoop;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {
    //继承mapper接口，设置map的输入类型为<Object,Text>
    //输出类型为<Text,IntWritable>
    public static class Map extends Mapper<Object,Text,Text,IntWritable>{
        //one表示单词出现一次
        private static IntWritable one = new IntWritable(1);
        //word存储切下的单词
        private Text word = new Text();
        public void map(Object key,Text value,Context context) throws IOException,InterruptedException{
            //对输入的行切词
            StringTokenizer st = new StringTokenizer(value.toString());
            while(st.hasMoreTokens()){
                word.set(st.nextToken());//切下的单词存入word
                context.write(word, one);
            }
        }
    }
    //继承reducer接口，设置reduce的输入类型<Text,IntWritable>
    //输出类型为<Text,IntWritable>
    public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
        //result记录单词的频数
        private static IntWritable result = new IntWritable();
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
            int sum = 0;
            //对获取的<key,value-list>计算value的和
            for(IntWritable val:values){
                sum += val.get();
            }
            //将频数设置到result
            result.set(sum);
            //收集结果
            context.write(key, result);
        }
    }
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
       // conf.set("mapred.remote.os","Linux");
       // conf.set("yarn.resourcemanager.address","master:8032");
       // conf.set("mapreduce.framework.name","yarn");
        conf.set("mapred.jar","D:\\IdeaProjects\\hadooplearn\\out\\artifacts\\hadoo.jar");
        //conf.set("mapreduce.app-submission.cross-platform","true");
        Job job = Job.getInstance(conf);
        job.setJobName("test");
        //配置作业各个类
        job.setJarByClass(WordCount.class);
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/tmp/hbase-env.sh"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/tmp/out11"));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

conf.set("mapred.jar","D:\\IdeaProjects\\hadooplearn\\out\\artifacts\\hadoo.jar");这是最重要的一句，不然会报上面第4个问题

IDEA中有个功能就是编译的时候打包：

下班了。

来自为知笔记(Wiz)

posted @ 2015-04-29 20:04 高兴的博客阅读(12053) 评论(0) 收藏举报

刷新页面返回顶部

高兴

谨言慎行,格物致知

在windows远程提交任务给Hadoop集群（Hadoop 2.6）

公告