yarn

yarn集群主要有 Resource manager 多个Node Manager rm最好独立安装一个节点 nm最好和datanode在一个节点。

node manager的主要作用：创建容器，运行程序

resource manager任务调度：指定node manager 开多少个容器来运行

yarn-site.cml

yarn集群中resourcemanager的启动节点
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hdp-01</value>
</property>

mapreduce的shuffle辅助工具
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

nodemanager可使用的内存资源根据服务器的配置决定

<name>yarn.nodemanager.resource.memory-mb</name>

</property>

cpu 核数一个容器分配一核、最少一G做计算

<name>yarn.nodemanager.resource.cpu-vcores</name>

</property>

start-yarn.sh 自启动 nodemanager会根据slevs的配置中的节点 resourcemanager要在配置的节点上启动才行。

网页查看端口 8088

客户端使用api 和resourcemanager交互

配置nodemanager的内存资源最少配置2个G,mrappmaster是一个管理map task和reduce task的程序，在启动时会占用1.5G。

job客户端程序：向yarn提交mr程序

//用于提交MapReduce的客户端程序，
/**
 * 1.封装job参数
 * 如果在windows中运行需要做修改
 * 
 * job客户端运行会将jar包发到yarn上 先打成jar包
 */
public class jobSubmitter{
    public static void main(String[] agrs){
            //job对象访问fs需要用户身份  jvm系统参数设置hadoop_user_name
            System.setProperty("HADOOP_USER_NAME","root");

            Configuration conf = new Configuration();
            //1.设置job运行时要访问的默认文件系统
            conf.set("fs.defaultFS","hdfs://hdp-01:9000");
            //2.设置job提交到哪去运行
            conf.set("mapreduce.framwork.name","yarn");
            conf.set("yarn.resourcemanager.hostname","hdp-01");
            //3.如果从Windows系统上运行这个job提交客户端程序，则需要这个跨平台的参数
            conf.set("mapreduce.app-submission.cross-platform","true");

            Job job = Job.getInstance(conf);
            

            //1.封装jar包所在的位置  
            job.setJarByClass(JobSubmitter.class); //根据class找到位置

            //2.封装参数：本次job索要调用的Mapper实现类、Reduce实现类
            job.setMapperClass(WordcountMapper.class);
            job.setReduceClass(WordcountReduce.class);

            //3.封装参数：本次job的Mapper实现类。Reduce实现类产生的结果数据的key、value的类型
            job.setMapperOutputKeyClass(Text.class);
            job.setMapperOutputValueClass(IntWritable.class);

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            Path output = new Path("/wordcount/output");
            FileSystem fs = FileSystem.get(new URI("hdfs://hdp-01:9000"),conf,"root"); //访问fs使用root用户

            //4.封装参数：本次job要处理的输入数据所在路径。最终结果的输出路径
            FileInputFormat.setInputPaths(job,new Path("wordcount/input"));
            FileInputFormat.setOutputPath(job,new Path("wordcount/output"));//注意：输出路径必须不存在

            //5.封装参数：想要启动的reduce task的数量
            job.setNumReduceTasks(2);

            //6.提交job给yarn   此方法可以让yarn与客户端保持关联，在客户端可以看到运行过程
            boolean res = job.waitForCompletion(true);

            System.exit(res?0:1);
    }
}

package cn.edu360.mr.wc;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 如果要在hadoop集群的某台机器上启动这个job提交客户端的话
 * conf里面就不需要指定 fs.defaultFS   mapreduce.framework.name
 * 
 * 因为在集群机器上用 hadoop jar xx.jar cn.edu360.mr.wc.JobSubmitter2 命令来启动客户端main方法时，
 *   hadoop jar这个命令会将所在机器上的hadoop安装目录中的jar包和配置文件加入到运行时的classpath中
 *   
 *   那么，我们的客户端main方法中的new Configuration()语句就会加载classpath中的配置文件，自然就有了 
 *   fs.defaultFS 和 mapreduce.framework.name 和 yarn.resourcemanager.hostname 这些参数配置
 *   
 * @author ThinkPad
 *
 */
public class JobSubmitterLinuxToYarn {
	
	public static void main(String[] args) throws Exception {
		
		Configuration conf = new Configuration();
		conf.set("fs.defaultFS", "hdfs://hdp-01:9000");
		conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
		// 没指定默认文件系统
		// 没指定mapreduce-job提交到哪运行

		Job job = Job.getInstance(conf);
		
		
		job.setJarByClass(JobSubmitterLinuxToYarn.class);
		
		
		job.setMapperClass(WordcountMapper.class);
		job.setReducerClass(WordcountReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path("/wordcount/input"));
		FileOutputFormat.setOutputPath(job, new Path("/wordcount/output"));
		
		job.setNumReduceTasks(3);
		
		boolean res = job.waitForCompletion(true);
		System.exit(res?0:1);
		
	}
	

}

//直接在linux运行

public class JobSubmitterLinuxToYarn{
    public static void main(String[] args){
        //1.创建Configuration 加载配置文件
        Configuration conf = new Configuration();
        
        //2.创建Job对象
        Job job = Job.getInstance(conf);

        //3.配置job参数 所要运行的提交的jar包
        job.setJarByClass(JobSubmitterLinuxToYarn.class);

        //3.1配置map reduce 的类
        job.setMapperClass(WordcountMapper.class);
        job.setReducerClass(WordcountReducer.class);
        //3.2配置map 、 reduce 输入输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputKeyClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPath(job,new Path("/wordcount/input"));
        FileOutputFormat.setOutputPath(job,new Path("wordcount/output"));

        job.setNumReduceTask(3);

        boolean res = job.waitForCompletion(true);
    }
}

posted @ 2018-09-06 17:16 赵先先森阅读(299) 评论(0) 收藏举报

刷新页面返回顶部

赵先先森

yarn

公告