hive数-据库布-隆索引
数据库建表
CREATE TABLE test.test_fi_all_11( `company_name` string, `manage_station` string, `country` string, `customer_type` string, `customer_id` string, `work_time` string) PARTITIONED BY ( `search_date` string) STORED AS ORC -- 必须指定ORC/Parquet格式 TBLPROPERTIES ( -- 开启布隆索引,指定需要索引的列(多个列用逗号分隔) 'orc.bloom.filter.columns' = 'customer_id', 'orc.create.index'='true', -- 可选:配置布隆过滤器误判率(默认0.05,值越小误判率越低,占用空间略大) 'orc.bloom.filter.fpp' = '0.05' );
插入数据
insert overwrite table test.test_fi_all_11 partition(search_date) select * from dwd.test_fi_all_11;
查询索引过滤,可以对比两边的查询效率true和false
SET hive.optimize.index.filter=true; -- 开启索引过滤 false是取消 select * from test.test_fi_all_11 where customer_id='8142997860'
查看执行计划
EXPLAIN EXTENDED select * from test.test_fi_all_11 where customer_id='8142997860';
浙公网安备 33010602011771号