hive 分区表 配置动态分区

-- 分区表,动态分区
drop table db_hive1.t2a_trans purge;

CREATE TABLE t2a_trans (
    TR_ID STRING COMMENT '交易流水号',
    CUST_ID STRING COMMENT '用户ID',
    TR_AMT DECIMAL(10,2) COMMENT '交易金额',
    product_code STRING COMMENT '产品代码'
    --data_dt DATE COMMENT '交易创建时间'
)
COMMENT '交易流水事实表'
PARTITIONED BY (tr_dt date) -- 按照日期分区,分区字段通常使用字符串或日期类型
--ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'  -- 使用JSON SerDe
--ROW FORMAT DELIMITED FIELDS TERMINATED BY "|@|"
STORED AS ORC; -- 推荐使用ORC或Parquet等列式存储格式

truncate table db_hive1.t2a_trans;

INSERT INTO db_hive1.t2a_trans
(TR_ID, CUST_ID, TR_AMT, product_code, tr_dt)
SELECT cast(cast(r.id as bigint) as string) as tr_id,
       r.sname as cust_id,
       round(rand(),2) as tr_cmt,
       r.ctfid as producet_code,
       to_date(substr(r.version,1,4)||'-01-01','yyyy-MM-dd') as tr_dt
       FROM db_hive1.room4 r
    where r.version is not null
    --and r.sname like '%鬼%'
;

select tr_dt,count(*) from db_hive1.t2a_trans group by tr_dt;

SELECT * FROM db_hive1.t2a_trans;

 配置 hive config/hive-site.xml 实现支持动态分区

<configuration>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://localhost:3306/hive_metadata?allowPublicKeyRetrieval=true&createDatabaseIfNotExist=true&useSSL=false</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.cj.jdbc.Driver</value>  <!-- MySQL 8驱动名 -->
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123456</value>
  </property>

    <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/hivedata</value> <!-- HDFS存储路径 -->
  </property> 

   <!-- HiveServer2配置 -->
    <property>
        <name>hive.server2.thrift.port</name>
        <value>10000</value>
    </property>
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>hadoop01</value>
    </property>
    <property>
        <name>hive.metastore.uris</name>
        <value>thrift://hadoop01:9083</value>
    </property>

<!-- 开启动态分区功能 -->
  <property>
    <name>hive.exec.dynamic.partition</name>
    <value>true</value>
    <description>Whether or not to allow dynamic partitions in DML/DDL.</description>
  </property>
  <!-- 设置动态分区模式为非严格模式 -->
  <property>
    <name>hive.exec.dynamic.partition.mode</name>
    <value>nonstrict</value>
    <description>In nonstrict mode all partitions are allowed to be dynamic.</description>
  </property>

<!-- 设置 Hive 执行引擎为 Tez -->
  <property>
    <name>hive.execution.engine</name>
    <value>mr</value>
  </property>

<property>
  <name>spark.home</name>
  <value>/opt/spark-2.4.5</value>
</property>

 
<!--
 <property>
    <name>hive.enable.spark.execution.engine</name>
    <value>true</value>
  </property>

 <property>
    <name>spark.home</name>
    <value>/opt/spark-3.3.2</value>
  </property>
<property>
    <name>spark.master</name>
    <value>yarn</value>
  </property>
  -->
<!--
<property>
  <name>spark.master</name>
  <value>yarn</value>
</property>
-->
<property>
  <name>spark.yarn.jars</name>
  <value>hdfs://hadoop01:9000/spark-jars/*</value>
</property>


<!--
 <property>
   <name>hive.exec.parallel</name>
   <value>true</value>
   <description>是否启用并行执行作业</description>
 </property>
 <property>
   <name>hive.exec.parallel.thread.number</name>
   <value>10</value>
   <description>并行执行作业的最大线程数</description>
 </property>
 <property>
   <name>hive.exec.reducers.bytes.per.reducer</name>
   <value>256000000</value>
   <description>每个reducer处理的数据量(字节)</description>
 </property>
 <property>
   <name>hive.exec.reducers.max</name>
   <value>1000</value>
   <description>最大reducer数量</description>
 </property>
 <property>
   <name>hive.exec.memory</name>
   <value>2048</value>
   <description>每个map/reduce任务的内存大小(MB)</description>
 </property>
 <property>
   <name>hive.auto.convert.join</name>
   <value>true</value>
   <description>是否自动将小表转换为map-side join</description>
 </property>
 <property>
   <name>hive.optimize.sort.dynamic.partition</name>
   <value>true</value>
   <description>是否动态排序动态分区</description>
 </property>
-->


</configuration>

  

posted @ 2025-12-14 00:09  ARYOUOK  阅读(2)  评论(0)    收藏  举报