Hive 建表比较
一、create + load
create table if not exists bidata.dep_category ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as textfile ;
hive> load data local inpath '/opt/Files/out/dep_category.csv' overwrite into table bidata.dep_category; Loading data to table bidata.dep_category OK Time taken: 0.989 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category 49.6 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category [root@centos02 ~]#
二、create table ... as select
create table bidata.dep_category_copy as
select * from bidata.dep_category where department>0 ;
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200418015419_d59dcac7-0b70-4e0e-9bfc-d985ef82b690 Total jobs = 3 Launching Job 1 out of 3 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587138285424_0005, Tracking URL = http://centos02:8088/proxy/application_1587138285424_0005/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587138285424_0005 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-18 01:56:10,208 Stage-1 map = 0%, reduce = 0% 2020-04-18 01:56:58,749 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 3.12 sec MapReduce Total cumulative CPU time: 3 seconds 120 msec Ended Job = job_1587138285424_0005 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/.hive-staging_hive_2020-04-18_01-54-19_026_5037545095391077598-1/-ext-10002 Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_copy MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 3.12 sec HDFS Read: 56287 HDFS Write: 50207 SUCCESS Total MapReduce CPU Time Spent: 3 seconds 120 msec OK dep_category.department dep_category.department_desc_cn dep_category.department_desc_en dep_category.division_offline_l2 dep_category.division_ghs_mini dep_category.category dep_category.category_desc_cn dep_category.category_desc_en Time taken: 201.44 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_copy 48.9 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_copy [root@centos02 ~]#
三、create table ... stored as ... as select
create table bidata.dep_category_parquet_1as stored as parquet as
select * from bidata.dep_category_copy where department>0 ;
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421161720_1219f51b-f4dd-41db-aaf3-0f287abae8f9 Total jobs = 3 Launching Job 1 out of 3 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0005, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0005/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0005 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:18:42,496 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:19:30,508 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 4.9 sec MapReduce Total cumulative CPU time: 5 seconds 430 msec Ended Job = job_1587448862243_0005 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/.hive-staging_hive_2020-04-21_16-17-20_475_6508291955273921365-1/-ext-10002 Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_1as MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 5.43 sec HDFS Read: 55874 HDFS Write: 24840 SUCCESS Total MapReduce CPU Time Spent: 5 seconds 430 msec OK dep_category_copy.department dep_category_copy.department_desc_cn dep_category_copy.department_desc_en dep_category_copy.division_offline_l2 dep_category_copy.division_ghs_mini dep_category_copy.category dep_category_copy.category_desc_cn dep_category_copy.category_desc_en Time taken: 146.527 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_1as 24.2 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_1as [root@centos02 ~]#
四、create table + insert
create table if not exists bidata.dep_category_parquet_2create ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as parquet ;
insert overwrite table bidata.dep_category_parquet_2create select
department,department_desc_cn,department_desc_en, division_offline_l2,division_ghs_mini, category,category_desc_cn,category_desc_en from bidata.dep_category_copy where department >0
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421160954_45bf186e-98a2-4d88-9a6f-70dada1b4c40 Total jobs = 3 Launching Job 1 out of 3 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0004, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0004/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0004 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:11:28,276 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:11:52,174 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 4.15 sec MapReduce Total cumulative CPU time: 4 seconds 150 msec Ended Job = job_1587448862243_0004 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_2create/.hive-staging_hive_2020-04-21_16-09-54_900_3108301284318244749-1/-ext-10000 Loading data to table bidata.dep_category_parquet_2create MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 4.15 sec HDFS Read: 56434 HDFS Write: 24844 SUCCESS Total MapReduce CPU Time Spent: 4 seconds 150 msec OK department department_desc_cn department_desc_en division_offline_l2 division_ghs_mini category category_desc_cn category_desc_en Time taken: 128.171 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_2create 24.2 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_2create [root@centos02 ~]#
五、create table + insert + gzip
create table if not exists bidata.dep_category_parquet_3gzip ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as parquet tblproperties('parquet.compression'='GZIP');
insert overwrite table bidata.dep_category_parquet_3gzip select
department,department_desc_cn,department_desc_en, division_offline_l2,division_ghs_mini, category,category_desc_cn,category_desc_en from bidata.dep_category_copy where department >0
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421162456_114fab99-e853-4b53-8644-e564c40eecbd Total jobs = 3 Launching Job 1 out of 3 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0006, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0006/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0006 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:26:39,114 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:27:19,920 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 4.26 sec MapReduce Total cumulative CPU time: 4 seconds 260 msec Ended Job = job_1587448862243_0006 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_3gzip/.hive-staging_hive_2020-04-21_16-24-56_642_8115129675578625480-1/-ext-10000 Loading data to table bidata.dep_category_parquet_3gzip MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 4.26 sec HDFS Read: 56467 HDFS Write: 12580 SUCCESS Total MapReduce CPU Time Spent: 4 seconds 260 msec OK department department_desc_cn department_desc_en division_offline_l2 division_ghs_mini category category_desc_cn category_desc_en Time taken: 147.191 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_3gzip 12.2 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_3gzip [root@centos02 ~]#
六、create table + insert + snappy
create table if not exists bidata.dep_category_parquet_4snappy ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as parquet tblproperties('parquet.compression'='SNAPPY');
insert overwrite table bidata.dep_category_parquet_4snappy select
department,department_desc_cn,department_desc_en, division_offline_l2,division_ghs_mini, category,category_desc_cn,category_desc_en from bidata.dep_category_copy where department >0
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421163306_d3654eea-4b7d-4e22-86b6-ff1415d372c4 Total jobs = 3 Launching Job 1 out of 3 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0007, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0007/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0007 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:34:36,345 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:35:17,598 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 4.67 sec MapReduce Total cumulative CPU time: 4 seconds 670 msec Ended Job = job_1587448862243_0007 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_4snappy/.hive-staging_hive_2020-04-21_16-33-06_059_6028529886538582108-1/-ext-10000 Loading data to table bidata.dep_category_parquet_4snappy MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 4.67 sec HDFS Read: 56483 HDFS Write: 16391 SUCCESS Total MapReduce CPU Time Spent: 4 seconds 670 msec OK department department_desc_cn department_desc_en division_offline_l2 division_ghs_mini category category_desc_cn category_desc_en Time taken: 140.318 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_4snappy 15.9 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_parquet_4snappy [root@centos02 ~]#
七、orc
create table if not exists bidata.dep_category_orc ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc ;
insert overwrite table bidata.dep_category_orc select
department,department_desc_cn,department_desc_en, division_offline_l2,division_ghs_mini, category,category_desc_cn,category_desc_en from bidata.dep_category_copy where department >0 ;
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421165039_d7f8789f-fe9a-4548-89da-f08a491b9498 Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0008, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0008/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0008 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:52:13,093 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:52:57,130 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 6.45 sec MapReduce Total cumulative CPU time: 6 seconds 450 msec Ended Job = job_1587448862243_0008 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc/.hive-staging_hive_2020-04-21_16-50-39_026_4917282774922210728-1/-ext-10000 Loading data to table bidata.dep_category_orc MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 6.45 sec HDFS Read: 56415 HDFS Write: 10894 SUCCESS Total MapReduce CPU Time Spent: 6 seconds 450 msec OK department department_desc_cn department_desc_en division_offline_l2 division_ghs_mini category category_desc_cn category_desc_en Time taken: 144.07 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc 10.6 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc [root@centos02 ~]#
八、orc + snappy
create table if not exists bidata.dep_category_orc_snappy ( department int , department_desc_cn string , department_desc_en string , division_offline_l2 string , division_ghs_mini string , category int , category_desc_cn string , category_desc_en string ) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc tblproperties('orc.compression'='SNAPPY');
insert overwrite table bidata.dep_category_orc_snappy select
department,department_desc_cn,department_desc_en, division_offline_l2,division_ghs_mini, category,category_desc_cn,category_desc_en from bidata.dep_category_copy where department >0 ;
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. Query ID = root_20200421165441_d77a507f-49bb-434d-a1ca-6929ac811dc7 Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1587448862243_0009, Tracking URL = http://centos02:8088/proxy/application_1587448862243_0009/ Kill Command = /opt/bigdata/hadoop/hadoop-2.8.5/bin/hadoop job -kill job_1587448862243_0009 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0 2020-04-21 16:56:16,591 Stage-1 map = 0%, reduce = 0% 2020-04-21 16:56:45,880 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 3.21 sec MapReduce Total cumulative CPU time: 3 seconds 210 msec Ended Job = job_1587448862243_0009 Stage-4 is selected by condition resolver. Stage-3 is filtered out by condition resolver. Stage-5 is filtered out by condition resolver. Moving data to directory hdfs://centos02:9000/opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc_snappy/.hive-staging_hive_2020-04-21_16-54-41_355_6900857626881900088-1/-ext-10000 Loading data to table bidata.dep_category_orc_snappy MapReduce Jobs Launched: Stage-Stage-1: Map: 1 Cumulative CPU: 3.21 sec HDFS Read: 56491 HDFS Write: 10901 SUCCESS Total MapReduce CPU Time Spent: 3 seconds 210 msec OK department department_desc_cn department_desc_en division_offline_l2 division_ghs_mini category category_desc_cn category_desc_en Time taken: 129.596 seconds hive>
[root@centos02 ~]# hdfs dfs -du -s -h /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc_snappy 10.6 K /opt/bigdata/hive/hive-2.3.4/warehouse/bidata.db/dep_category_orc_snappy [root@centos02 ~]#


浙公网安备 33010602011771号