hadoop集群搭建教程
1. 相关软件准备:
VMware-workstation-full-15.0.4-12990004.exe
CentOS-7-x86_64-DVD-1810.iso
jdk-8u231-linux-x64.tar.gz 账号:邮箱,密码首字母大写
apache-zookeeper-3.5.6-bin.tar.gz
apache-hive-3.1.2-bin.tar.gz 其他
2. VMware虚拟机及linux系统安装及网络环境配置,参见之前的博客
(1)查看、打开、关闭防火墙
# CentOS 7默认没有了iptables文件 cd /etc/sysconfig ls -l yum install iptables-services systemctl enable iptables
systemctl disable iptables.service
(2)权限不足 : sudo chmod -R 777 /home/hadoop/apps/hadoop-3.2.1/bin/yarn
vim /etc/profile #也可以vim ~/.bashrc export JAVA_HOME="/opt/modules/jdk1.8.0_271" export PATH=$JAVA_HOME/bin:$PATH export HADOOP_HOME="/opt/modules/hadoop-2.10.1" export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH source /etc/profile mkdir $HADOOP_HOME/tmp mkdir $HADOOP_HOME/hdfs/data mkdir $HADOOP_HOME/hdfs/name #etc/hadoop/core-site.xml: <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>$HADOOP_HOME/tmp</value> </property> </configuration> #etc/hadoop/hdfs-site.xml: <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.name.dir</name> <value>$HADOOP_HOME/hdfs/name</value> </property> <property> <name>dfs.data.dir</name> <value>$HADOOP_HOME/hdfs/data</value> </property> </configuration> #etc/hadoop/yarn-site.xml: <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration> #vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh 添加 export JAVA_HOME=/opt/modules/jdk1.8.0_271 #修改 export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root #启动 - 初次启动格式化namenode? bin/hdfs namenode -format #关闭防火墙 #https://blog.csdn.net/u011170921/article/details/80437937?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-4.control #./sbin/stop-all.sh #service iptables stop #chkconfig iptables off systemctl disable iptables.service service iptables status #关闭 NetworkManager service NetworkManager stop chkconfig NetworkManager off #./sbin/start-all.sh sbin/start-dfs.sh sbin/start-yarn.sh #检查 9000 端口是否打开并远程访问,一般集群启动就打开了,如为localhost表示只能本地访问 netstat -tlpn #netstat -ap | grep 9000 #检查是否允许远程访问可使用telnet ip port telnet 192.168.100.10 9000 #没启动9000,解决措施 https://www.cnblogs.com/woofwoof/p/10261751.html http://dblab.xmu.edu.cn/blog/2440-2/ http://dblab.xmu.edu.cn/blog/install-mysql/ #安装hive元数据库 - 以mysql为元数据库 ----------------------------------------------------- #安装新版mysql前,需将系统自带的mariadb-lib卸载 #rpm -qa|grep mariadb #mariadb-libs-5.5.60-1.el7_5.x86_64 #rpm -e --nodeps mariadb-libs-5.5.60-1.el7_5.x86_64 #会自动替换掉这个服务 #sudo tar -zxvf ./apache-hive-3.1.2-bin.tar.gz -C /usr/local # 解压到/usr/local中 #cd /usr/local/ #sudo mv apache-hive-3.1.2-bin hive # 将文件夹名改为hive #sudo chown -R dblab:dblab hive # 修改文件权限 wget https://mirrors.tuna.tsinghua.edu.cn/mysql/yum/mysql80-community-el7/mysql80-community-release-el7-3.noarch.rpm rpm -ivh mysql80-community-release-el7-3.noarch.rpm yum update mysql80-community-release-el7-3.noarch.rpm yum install -y mysql-server #启动服务 service mysqld start netstat -tap | grep mysql #mysql节点处于LISTEN状态表示启动成功 systemctl status mysqld.service mysqladmin --version grep 'temporary password' /var/log/mysqld.log #查看root默认密码 &aF%C#+4NMo/ #修改密码 vim /etc/my.cnf [mysqld] skip-grant-tables basedir=/var/lib/mysql datadir=/var/lib/mysql/data socket=/var/lib/mysql/mysql.sock character-set-server=utf8 log-error=/var/log/mysqld.log pid-file=/var/run/mysqld/mysqld.pid #重启 systemctl restart mysqld show databases; use mysql; select user,host from user; update user set host="%" where user="root"; #update user set authentication_string='' where user='root'; #如果直接改不了就先置空 xZg#jK49sIl. update mysql.user set authentication_string='l123456' where user='root'; #ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY 'l123456'; flush privileges; quit; #将这行注释 vim /etc/my.cnf #skip-grant-tables #systemctl restart mysqld service mysqld restart #忘记密码 ps -ef | grep -i mysql --查看状态 service mysqld stop --关闭 vim /etc/my.cnf --skip-grant-tables service mysqld start mysql -u root -p #创建hive数据库及hive用户 select host, user, plugin from mysql.user; CREATE DATABASE hive; USE hive; CREATE USER 'hive'@'localhost' IDENTIFIED BY 'hive'; CREATE USER 'hive'@'%' IDENTIFIED BY 'hive'; CREATE USER 'root'@'%' IDENTIFIED BY 'iUpoint@123'; #GRANT ALL ON hive.* TO 'hive'@'localhost' IDENTIFIED BY 'hive'; #8.0.19报错 #GRANT ALL privileges ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive'; GRANT ALL ON *.* TO 'hive'@'localhost'; GRANT ALL ON *.* TO 'hive'@'%'; FLUSH PRIVILEGES; #安装hive ----------------------------------------------------- wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz tar -xzvf apache-hive-2.3.7-bin.tar.gz #ln -s /opt/modules/apache-hive-2.3.7-bin hive #类似快捷方式 mv apache-hive-2.3.7-bin hive vim /etc/profile export HIVE_HOME="/opt/modules/hive" export path=$HIVE_HOME/bin:$PATH source /etc/profile cd hive/conf cp hive-default.xml.template hive-site.xml # 将相对路径${system:java.io.tmpdir{/${system:user.name}改为绝对路径/home/user_name/hive/tmp/hive mv hive-default.xml hive-default.xml.template #mysql jdbc wget https://mirrors.tuna.tsinghua.edu.cn/mysql/downloads/Connector-J/mysql-connector-java-8.0.22.tar.gz tar -zxvf mysql-connector-java-8.0.22.tar.gz cp mysql-connector-java-8.0.22/mysql-connector-java-8.0.22.jar /opt/modules/hive-2.3.7/lib #rmdir mysql-connector-java-8.0.22 #删除空目录 rm -rf mysql-connector-java-8.0.22/ #递归删除目录 #hive配置文件 #参考:https://www.jianshu.com/p/02ec73752e1c #grep -n 'datanucleus.autoStartMechanism' conf/hive-site.xml #grep -n '<name>.*dir</name>' conf/hive-site.xml #grep -n '<name>.*Connection.*</name>' conf/hive-site.xml #vim +529 conf/hive-site.xml #打开后esc状态下输入:n可跳转到指定行 #添加 <property> <name>system:java.io.tmpdir</name> <value>/opt/modules/hive/tmp/</value> </property> <property> <name>system:user.name</name> <value>hive</value> </property> #修改 <property> <name>javax.jdo.option.ConnectionURL</name> <!-- createDatabaseIfNotExist=true;characterEncoding=UTF-8; --> <value>jdbc:mysql://localhost:3306/hive?useSSL=false</value> <description> JDBC connect string for a JDBC metastore. To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL. For example, jdbc:postgresql://myhost/db?ssl=true for postgres database. </description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <!-- <value>com.mysql.jdbc.Driver</value> --> <value>com.mysql.cj.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> <description>Username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> <description>password to use against metastore database</description> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>/user/hive/warehouse</value> <description>location of default database for the warehouse</description> </property> <property> <name>datanucleus.autoStartMechanism</name> <value>SchemaTable</value> </property> <!-- 当配置为true时,则表示会强制metastore的版本信息与hive jar一致 --> <property> <name>hive.metastore.schema.verification</name> <value>false</value> <description> Enforce metastore schema version consistency. True: Verify that version information stored in metastore matches with one from Hive jars. Also disable automatic schema migration attempt. Users are required to manully migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default) False: Warn if the version information stored in metastore doesn't match with one from in Hive jars. </description> </property> <property> <name>datanucleus.schema.autoCreateAll</name> <value>true</value> </property> #配置hive-env.sh cd $HIVE_HOME/conf cp hive-env.sh.template hive-env.sh vim hive-env.sh # 相应的目录换成自己的 # hadoop 目录 HADOOP_HOME=/opt/modules/hadoop-3.1.4 # hive 配置目录 export HIVE_CONF_DIR=/opt/modules/hive/conf # hive 的lib目录 export HIVE_AUX_JARS_PATH=/opt/modules/hive/lib #Could not create ServerSocket on address 0.0.0.0/0.0.0.0:9083. # Jps查看Runjar进程号,并杀掉 jps kill -9 进程号 #$HIVE_HOME/lib 中guava的版本应与 $HADOOP_HOME/share/hadoop/common/lib 中相同 schematool -initSchema -dbType mysql hive --service metastore & #hadoop测试 查看文件权限 ls -l /opt/data chmod -R 777 /opt/data #不加-R改变的是该文件的权限 mkdir /opt/data cd /opt/data touch text.txt vim text.txt Hello World Hello Hadoop cd $HADOOP_HOME/share/hadoop/mapreduce hadoop jar hadoop-mapreduce-examples-2.10.1.jar wordcount /opt/data/tmp/test.txt /opt/data/output cd /opt/data/output cat part-r-00000 Hadoop 1 Hello 2 World 1