18.hive-spark-hadoop
一、Hadoop安装
1.Hadoop集群安装,查看:http://www.cnblogs.com/makexu/articles/7107422.html
#创建文件 hdfs dfs -mkdir -p /user/hive/warehouse hdfs dfs -mkdir -p /tmp/hive #如果权限不够,需要添加权限 hdfs dfs -chmod -R 777 /tmp/hive
二、Spark安装
安装包选择https://d3kbcqa49mib13.cloudfront.net/spark-2.2.0-bin-hadoop2.7.tgz
环境变量配置
vim /etc/profile #Set spark path export SPARK_HOME=/home/ops/spark-2.2.0 export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin #立即生效 source /etc/profile
三、Hive安装
1.环境变量配置
vim /etc/profile #Set spark path export HIVE_HOME=/home/ops/hive-1.2.2 export HIVE_CONF_DIR=$HIVE_HOME/conf export PATH=$PATH:$HIVE_HOME/bin #立即生效 source /etc/profile
2.hive配置
## --------------------hive-env.sh-------------------
cd /home/ops/hive-1.2.2/conf
cp hive-env.sh.template hive-env.sh
vim hive-env.sh
# The heap size of the jvm stared by hive shell script can be controlled via:
export HADOOP_HEAPSIZE=1024
# Set HADOOP_HOME to point to a specific hadoop install directory
HADOOP_HOME=/home/ops/hadoop-2.8.1
# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=/home/ops/hive-1.2.2/conf
# Folder containing extra ibraries required for hive compilation/execution can be controlled by:
export HIVE_AUX_JARS_PATH=/home/ops/hive-1.2.2/lib
## --------------------hive-site.xml-------------------
cd /home/ops/hive-1.2.2/conf
cp hive-default.xml.template hive-site.xml
vim hive-site.xml
  <property>
    <name>hive.exec.scratchdir</name>
    <value>hdfs://master:9000/user/hive/warehouse</value>
    <description>HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with ${hive.scratch.dir.permission}.</description>
  </property>
  <property>
    <name>hive.exec.local.scratchdir</name>
    <value>/home/software/hive/iotmp</value>
    <description>Local scratch space for Hive jobs</description>
  </property>
    <name>hive.downloaded.resources.dir</name>
    <value>/home/software/hive/iotmp/${hive.session.id}_resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
  </property>
  <property>
    <name>hive.scratch.dir.permission</name>
    <value>777</value>
    <description>The permission for the user specific scratch directories that get created.</description>
  </property>
 <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>hdfs://master:9000/user/hive/warehouse</value>
    <description>location of default database for the warehouse</description>
  </property>
  <property>
    <name>hive.metastore.uris</name>
    <value></value>
    <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
  </property>
四、启动顺序
1.启动hadoop集群
2.编写java代码
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.hive.HiveContext;
/**
 * Created by n7053 on 2017/7/28.
 */
public class SparkHiveTest {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("SparkSQL").set("spark.sql.warehouse.dir","hdfs://master:9000/user/hive/warehouse");
        JavaSparkContext jsc = new JavaSparkContext(conf);
        HiveContext hiveContext = new HiveContext(jsc);
        hiveContext.sql("add jar /home/software/json-serde-1.3.7.jar");
        hiveContext.sql("CREATE TABLE IF NOT EXISTS hive_test (\n" +
                "country string,\n" +
                " languages array \n" +
                " )\n" +
                " ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'\n" +
                " STORED AS TEXTFILE \n" +
                " TBLPROPERTIES('transactional'='true')");
        hiveContext.sql("LOAD DATA LOCAL INPATH '/home/software/country.txt' INTO TABLE hive_test");
        Row[] results = (Row[])hiveContext.sql("FROM hive_test SELECT *").collect();
        for (Row pari:results ) {
            System.out.println(pari.get(0).toString() + pari.getList(1).toString());
        }
       // System.out.println(results.toString());
    }
}
3.编译打包后执行
mvn compile
mvn package
#hive_test.sh 脚本文件内容
mv /home/mvnpro/spark-project/target/spark-project-1.0-SNAPSHOT.jar /home/mvnpro/spark-project/target/spark-project.jar
/home/ops/spark-2.2.0/bin/spark-submit \
  --class com.mark.spark.SparkHiveTest \
  --master local[8] \
  /home/mvnpro/spark-project/target/spark-project.jar
#执行脚本文件
./hive_test.sh
#需要额外的文件
/home/software/json-serde-1.3.6.jar
/home/software/country.txt
#country.txt文件内容如下
{"country":"Switzerland","languages":["German","French","Italian"]}
{"country":"China","languages":["chinese"]}
    http://www.cnblogs.com/makexu/

 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号