3.hadoop完全分布式搭建

3.Hadoop完全分布式搭建

1.完全分布式搭建

  1. 配置

    #cd /soft/hadoop/etc/
    #mv hadoop local
    #cp -r local full
    #ln -s full hadoop
    #cd hadoop
    
    
  2. 修改core-site.xml配置文件

    #vim core-site.xml
    [core-site.xml配置如下]
    	<?xml version="1.0"?>
       <configuration>
       	<property>
       		<name>fs.defaultFS</name>
       		<value>hdfs://hadoop-1</value>
       	</property>
       </configuration>
    
  3. 修改hdfs-site.xml配置文件

    #vim hdfs-site.xml 
    [hdfs-site.xml配置如下]
    <?xml version="1.0"?>
    <configuration>
    	<property>
    		<name>dfs.replication</name>
    		<value>3</value>
    	</property>
    	<property>
    		<name>dfs.namenode.secondary.http-address</name>
    		<value>hadoop-2:50090</value>
    	</description>
    </property>
    </configuration>
    
  4. 修改mapred-site.xml配置文件

    #cp mapred-site.xml.template mapred-site.xml
    #vim mapred-site.xml
    [mapred-site.xml配置如下]
    <?xml version="1.0"?>
    <configuration>
    	<property>
    		<name>mapreduce.framework.name</name>
    		<value>yarn</value>
    	</property>
    </configuration>
    
  5. 修改yarn-site.xml配置文件

    #vim yarn-site.xml 
      [yarn-site.xml配置如下]
    <?xml version="1.0"?>
    <configuration>
            <property>
                    <name>yarn.resourcemanager.hostname</name>
                    <value>hadoop-1</value>
            </property>
            <property>
                    <name>yarn.nodemanager.aux-services</name>
                    <value>mapreduce_shuffle</value>
            </property>
    </configuration>
    
  6. 修改slaves配置文件

    #vim slaves
    [salves]
    hadoop-2
    hadoop-3
    hadoop-4
    hadoop-5
    
  7. 同步到其他节点

     #scp -r /soft/hadoop/etc/full  hadoop-2:/soft/hadoop/etc/
     #scp -r /soft/hadoop/etc/full  hadoop-3:/soft/hadoop/etc/
     #scp -r /soft/hadoop/etc/full  hadoop-4:/soft/hadoop/etc/
     #scp -r /soft/hadoop/etc/full  hadoop-5:/soft/hadoop/etc/
     #ssh hadoop-2 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
     #ssh hadoop-3 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
     #ssh hadoop-4 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
     #ssh hadoop-5 ln -s /soft/hadoop/etc/full /soft/hadoop/etc/hadoop
    
  8. 格式化hdfs分布式文件系统

    #hadoop namenode -format
    
  9. 启动服务

    [root@hadoop-1 hadoop]# start-all.sh 
    This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
    Starting namenodes on [hadoop-1]
    hadoop-1: starting namenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-namenode-hadoop-1.out
    hadoop-2: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-2.out
    hadoop-3: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-3.out
    hadoop-4: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-4.out
    hadoop-5: starting datanode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop-5.out
    Starting secondary namenodes [hadoop-2]
    hadoop-2: starting secondarynamenode, logging to /soft/hadoop-2.7.3/logs/hadoop-root-secondarynamenode-hadoop-2.out
    starting yarn daemons
    starting resourcemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-resourcemanager-hadoop-1.out
    hadoop-3: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-3.out
    hadoop-4: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-4.out
    hadoop-2: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-2.out
    hadoop-5: starting nodemanager, logging to /soft/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop-5.out
    
  10. 查看服务运行状态

        [root@hadoop-1 hadoop]# jps
        16358 ResourceManager
        12807 NodeManager                                                                                                                 
        16011 NameNode
        16204 SecondaryNameNode
        16623 Jps
        
        hadoop-5 | SUCCESS | rc=0 >>
        16993 NodeManager
        16884 DataNode
        17205 Jps
        
        hadoop-1 | SUCCESS | rc=0 >>
        28520 ResourceManager
        28235 NameNode
        29003 Jps
        
        hadoop-2 | SUCCESS | rc=0 >>
        17780 Jps
        17349 DataNode
        17529 NodeManager
        17453 SecondaryNameNode
        
        hadoop-4 | SUCCESS | rc=0 >>
        17105 Jps
        16875 NodeManager
        16766 DataNode
        
        hadoop-3 | SUCCESS | rc=0 >>
        16769 DataNode
        17121 Jps
        16878 NodeManager
    
  11. 登陆WEB查看

2. 完全分布式单词统计

  1. 通过hadoop自带的demo运行单词统计

    #mkdir /input
    #cd /input/
    #echo "hello world" > file1.txt
    #echo "hello world" > file2.txt
    #echo "hello world" > file3.txt
    #echo "hello hadoop" > file4.txt
    #echo "hello hadoop" > file5.txt
    #echo "hello mapreduce" > file6.txt
    #echo "hello mapreduce" > file7.txt
    #hadoop dfs -mkdir /input
    #hdfs dfs -ls /
    #hadoop fs -ls /
    #hadoop fs -put /input/* /input
    #hadoop fs -ls /input
    
  2. 开始统计

    [root@hadoop-1 ~]# hadoop jar /soft/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /input/ /output
    17/05/14 23:01:07 INFO client.RMProxy: Connecting to ResourceManager at hadoop-1/10.31.133.19:8032
    17/05/14 23:01:09 INFO input.FileInputFormat: Total input paths to process : 7
    17/05/14 23:01:10 INFO mapreduce.JobSubmitter: number of splits:7
    17/05/14 23:01:10 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1494773207391_0001
    17/05/14 23:01:10 INFO impl.YarnClientImpl: Submitted application application_1494773207391_0001
    17/05/14 23:01:11 INFO mapreduce.Job: The url to track the job: http://hadoop-1:8088/proxy/application_1494773207391_0001/
    17/05/14 23:01:11 INFO mapreduce.Job: Running job: job_1494773207391_0001
    17/05/14 23:01:23 INFO mapreduce.Job: Job job_1494773207391_0001 running in uber mode : false
    17/05/14 23:01:23 INFO mapreduce.Job:  map 0% reduce 0%
    17/05/14 23:01:56 INFO mapreduce.Job:  map 43% reduce 0%
    17/05/14 23:01:57 INFO mapreduce.Job:  map 100% reduce 0%
    17/05/14 23:02:04 INFO mapreduce.Job:  map 100% reduce 100%
    17/05/14 23:02:05 INFO mapreduce.Job: Job job_1494773207391_0001 completed successfully
    17/05/14 23:02:05 INFO mapreduce.Job: Counters: 50
            File System Counters
                    FILE: Number of bytes read=184
                    FILE: Number of bytes written=949365
                    FILE: Number of read operations=0
                    FILE: Number of large read operations=0
                    FILE: Number of write operations=0
                    HDFS: Number of bytes read=801
                    HDFS: Number of bytes written=37
                HDFS: Number of read operations=24
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=2
        Job Counters 
                Killed map tasks=1
                Launched map tasks=7
                Launched reduce tasks=1
                Data-local map tasks=7
                Total time spent by all maps in occupied slots (ms)=216289
                Total time spent by all reduces in occupied slots (ms)=4827
                Total time spent by all map tasks (ms)=216289
                Total time spent by all reduce tasks (ms)=4827
                Total vcore-milliseconds taken by all map tasks=216289
                Total vcore-milliseconds taken by all reduce tasks=4827
                Total megabyte-milliseconds taken by all map tasks=221479936
                Total megabyte-milliseconds taken by all reduce tasks=4942848
        Map-Reduce Framework
                Map input records=7
                Map output records=14
                Map output bytes=150
                Map output materialized bytes=220
                Input split bytes=707
                Combine input records=14
                Combine output records=14
                Reduce input groups=4
                Reduce shuffle bytes=220
                Reduce input records=14
                Reduce output records=4
                Spilled Records=28
                Shuffled Maps =7
                Failed Shuffles=0
                Merged Map outputs=7
                GC time elapsed (ms)=3616
                CPU time spent (ms)=3970
                Physical memory (bytes) snapshot=1528823808
                Virtual memory (bytes) snapshot=16635846656
                Total committed heap usage (bytes)=977825792
        Shuffle Errors
                BAD_ID=0
                CONNECTION=0
                IO_ERROR=0
                WRONG_LENGTH=0
                WRONG_MAP=0
                WRONG_REDUCE=0
        File Input Format Counters 
                Bytes Read=94
        File Output Format Counters 
                Bytes Written=37
                
    
  3. 查看

    [root@hadoop-1 ~]# hadoop fs -ls /out/put
    Found 2 items
    -rw-r--r--   3 root supergroup          0 2017-05-14 23:02 /out/put/_SUCCESS
    -rw-r--r--   3 root supergroup         37 2017-05-14 23:02 /out/put/part-r-00000
    [root@hadoop-1 ~]# hadoop fs -cat /out/put/part-r-00000
    hadoop  2
    hello   7
    mapreduce       2
    world   3
    [root@hadoop-1 ~]# 
    
posted @ 2017-06-23 00:07  刘耀  阅读(421)  评论(0编辑  收藏  举报