hive数-据库布-隆索引

数据库建表

CREATE TABLE test.test_fi_all_11(
  `company_name` string, 
  `manage_station` string, 
  `country` string, 
  `customer_type` string, 
  `customer_id` string, 
  `work_time` string)
PARTITIONED BY ( 
  `search_date` string)
STORED AS ORC  -- 必须指定ORC/Parquet格式
TBLPROPERTIES (
    -- 开启布隆索引,指定需要索引的列(多个列用逗号分隔)
    'orc.bloom.filter.columns' = 'customer_id',
    'orc.create.index'='true',
    -- 可选:配置布隆过滤器误判率(默认0.05,值越小误判率越低,占用空间略大)
    'orc.bloom.filter.fpp' = '0.05'  
);

插入数据

insert overwrite table test.test_fi_all_11 partition(search_date)
select * from dwd.test_fi_all_11;

查询索引过滤,可以对比两边的查询效率true和false

SET hive.optimize.index.filter=true;  -- 开启索引过滤   false是取消
select * from test.test_fi_all_11 where customer_id='8142997860'

查看执行计划

EXPLAIN EXTENDED
select * from test.test_fi_all_11 where customer_id='8142997860';

 

posted @ 2025-12-17 16:56  所向披靡zz  阅读(2)  评论(0)    收藏  举报