Hive表压缩比较

一、TextFile 

create table if not exists vn09jj5.test_order_text
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as textfile ; 
show create table vn09jj5.test_order_text ;
+-------------------------------------------------------------------+
|                   createtab_stmt                                  |
+-------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_text`(                           |
|   `store_id` smallint,                                            |
|   `order_id` string,                                              |
|   `delivery_phone` string,                                        |
|   `paid_time` timestamp,                                          |
|   `department` smallint,                                          |
|   `category` smallint,                                            |
|   `upc` string,                                                   |
|   `upc_desc` string,                                              |
|   `gmv` decimal(14,2))                                            |
| PARTITIONED BY (                                                  |
|   `channel` string,                                               |
|   `paid_date` date)                                               |
| ROW FORMAT SERDE                                                  |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'            |
| STORED AS INPUTFORMAT                                             |
|   'org.apache.hadoop.mapred.TextInputFormat'                      |
| OUTPUTFORMAT                                                      |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'    |
| LOCATION                                                          |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_text' |
| TBLPROPERTIES (                                                   |
|   'bucketing_version'='2',                                        |
|   'transactional'='true',                                         |
|   'transactional_properties'='insert_only',                       |
|   'transient_lastDdlTime'='1597297404')                           |
+-------------------------------------------------------------------+

 

二、Parquet

create table if not exists vn09jj5.test_order_parquet 
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as parquet  ; 
show create table vn09jj5.test_order_parquet ;
+----------------------------------------------------------------------+
|                   createtab_stmt                                     |
+----------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_parquet`(                           |
|   `store_id` smallint,                                               |
|   `order_id` string,                                                 |
|   `delivery_phone` string,                                           |
|   `paid_time` timestamp,                                             |
|   `department` smallint,                                             |
|   `category` smallint,                                               |
|   `upc` string,                                                      |
|   `upc_desc` string,                                                 |
|   `gmv` decimal(14,2))                                               |
| PARTITIONED BY (                                                     |
|   `channel` string,                                                  |
|   `paid_date` date)                                                  |
| ROW FORMAT SERDE                                                     |
|   'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'      |
| STORED AS INPUTFORMAT                                                |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'    |
| OUTPUTFORMAT                                                         |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'   |
| LOCATION                                                             |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_parquet' |
| TBLPROPERTIES (                                                      |
|   'bucketing_version'='2',                                           |
|   'transactional'='true',                                            |
|   'transactional_properties'='insert_only',                          |
|   'transient_lastDdlTime'='1597297775')                              |
+----------------------------------------------------------------------+

 

create table if not exists vn09jj5.test_order_parquet_gzip 
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as parquet tblproperties('parquet.compression'='GZIP'); 
show create table vn09jj5.test_order_parquet_gzip ;
+---------------------------------------------------------------------------+
|                   createtab_stmt                                          |
+---------------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_parquet_gzip`(                           |
|   `store_id` smallint,                                                    |
|   `order_id` string,                                                      |
|   `delivery_phone` string,                                                |
|   `paid_time` timestamp,                                                  |
|   `department` smallint,                                                  |
|   `category` smallint,                                                    |
|   `upc` string,                                                           |
|   `upc_desc` string,                                                      |
|   `gmv` decimal(14,2))                                                    |
| PARTITIONED BY (                                                          |
|   `channel` string,                                                       |
|   `paid_date` date)                                                       |
| ROW FORMAT SERDE                                                          |
|   'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'           |
| STORED AS INPUTFORMAT                                                     |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'         |
| OUTPUTFORMAT                                                              |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'        |
| LOCATION                                                                  |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip' |
| TBLPROPERTIES (                                                           |
|   'bucketing_version'='2',                                                |
|   'parquet.compression'='GZIP',                                           |
|   'transactional'='true',                                                 |
|   'transactional_properties'='insert_only',                               |
|   'transient_lastDdlTime'='1597297942')                                   |
+---------------------------------------------------------------------------+

 

create table if not exists vn09jj5.test_order_parquet_snappy
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as parquet tblproperties('parquet.compression'='SNAPPY'); 
show create table vn09jj5.test_order_parquet_snappy ;
+-----------------------------------------------------------------------------+
|                   createtab_stmt                                            |
+-----------------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_parquet_snappy`(                           |
|   `store_id` smallint,                                                      |
|   `order_id` string,                                                        |
|   `delivery_phone` string,                                                  |
|   `paid_time` timestamp,                                                    |
|   `department` smallint,                                                    |
|   `category` smallint,                                                      |
|   `upc` string,                                                             |
|   `upc_desc` string,                                                        |
|   `gmv` decimal(14,2))                                                      |
| PARTITIONED BY (                                                            |
|   `channel` string,                                                         |
|   `paid_date` date)                                                         |
| ROW FORMAT SERDE                                                            |
|   'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'             |
| STORED AS INPUTFORMAT                                                       |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'           |
| OUTPUTFORMAT                                                                |
|   'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'          |
| LOCATION                                                                    |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy' |
| TBLPROPERTIES (                                                             |
|   'bucketing_version'='2',                                                  |
|   'parquet.compression'='SNAPPY',                                           |
|   'transactional'='true',                                                   |
|   'transactional_properties'='insert_only',                                 |
|   'transient_lastDdlTime'='1597298128')                                     |
+-----------------------------------------------------------------------------+

 

create table if not exists vn09jj5.test_order_orc
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as orc; --stored as orc tblproperties('orc.compression'='ZLIB');
show create table vn09jj5.test_order_orc ;
+------------------------------------------------------------------+
|                   createtab_stmt                                 |
+------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_orc`(                           |
|   `store_id` smallint,                                           |
|   `order_id` string,                                             |
|   `delivery_phone` string,                                       |
|   `paid_time` timestamp,                                         |
|   `department` smallint,                                         |
|   `category` smallint,                                           |
|   `upc` string,                                                  |
|   `upc_desc` string,                                             |
|   `gmv` decimal(14,2))                                           |
| PARTITIONED BY (                                                 |
|   `channel` string,                                              |
|   `paid_date` date)                                              |
| ROW FORMAT SERDE                                                 |
|   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'                    |
| STORED AS INPUTFORMAT                                            |
|   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'              |
| OUTPUTFORMAT                                                     |
|   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'             |
| LOCATION                                                         |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_orc' |
| TBLPROPERTIES (                                                  |
|   'bucketing_version'='2',                                       |
|   'transactional'='true',                                        |
|   'transactional_properties'='insert_only',                      |
|   'transient_lastDdlTime'='1597298306')                          |
+------------------------------------------------------------------+

 

create table if not exists vn09jj5.test_order_orc_snappy
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as orc tblproperties('orc.compression'='SNAPPY');
show create table vn09jj5.test_order_orc_snappy ;
+-------------------------------------------------------------------------+
|                   createtab_stmt                                        |
+-------------------------------------------------------------------------+
| CREATE TABLE `vn09jj5.test_order_orc_snappy`(                           |
|   `store_id` smallint,                                                  |
|   `order_id` string,                                                    |
|   `delivery_phone` string,                                              |
|   `paid_time` timestamp,                                                |
|   `department` smallint,                                                |
|   `category` smallint,                                                  |
|   `upc` string,                                                         |
|   `upc_desc` string,                                                    |
|   `gmv` decimal(14,2))                                                  |
| PARTITIONED BY (                                                        |
|   `channel` string,                                                     |
|   `paid_date` date)                                                     |
| ROW FORMAT SERDE                                                        |
|   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'                           |
| STORED AS INPUTFORMAT                                                   |
|   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'                     |
| OUTPUTFORMAT                                                            |
|   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'                    |
| LOCATION                                                                |
|   'hdfs://cnprod1ha/user/hive/userdbs/vn09jj5.db/test_order_orc_snappy' |
| TBLPROPERTIES (                                                         |
|   'bucketing_version'='2',                                              |
|   'orc.compression'='SNAPPY',                                           |
|   'transactional'='true',                                               |
|   'transactional_properties'='insert_only',                             |
|   'transient_lastDdlTime'='1597298463')                                 |
+-------------------------------------------------------------------------+

 

create table if not exists vn09jj5.test_order_text_gzip
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as textfile ; 
create table if not exists vn09jj5.test_order_text_bzip2
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as textfile ; 
create table if not exists vn09jj5.test_order_text_snappy
(
 store_id          smallint     ,
 order_id          string       ,
 delivery_phone    string       ,
 paid_time         timestamp    ,
 department        smallint     ,
 category          smallint     ,
 upc               string       ,
 upc_desc          string       ,
 gmv               decimal(14,2) 
)
partitioned by (channel string,paid_date date) 
stored as textfile ; 

 

同步数据 

--自动分区属性
SET hive.exec.dynamic.partition =true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.dynamic.partitions.pernode=100000;

 

insert overwrite table vn09jj5.test_order_parquet
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;
insert overwrite table vn09jj5.test_order_parquet_gzip
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;
insert overwrite table vn09jj5.test_order_parquet_snappy
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

insert overwrite table vn09jj5.test_order_orc
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;
insert overwrite table vn09jj5.test_order_orc_snappy
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

 

+---------------------------------+
|               set               |
+---------------------------------+
| hive.exec.compress.output=true  |
+---------------------------------+
+---------------------------------+
|             set                 |
+---------------------------------+
| mapred.output.compress=true     |
+---------------------------------+
+----------------------------------------------------------------------------+
|                        set                                                 |
+----------------------------------------------------------------------------+
| mapred.output.compression.codec=org.apache.hadoop.io.compress.DefaultCodec |
+----------------------------------------------------------------------------+
+----------------------------------+
|               set                |
+----------------------------------+
| mapred.compress.map.output=true  |
+----------------------------------+
+----------------------------------------------------+
|                        set                         |
+----------------------------------------------------+
| io.compression.codecs=                             |
|org.apache.hadoop.io.compress.GzipCodec,            |
|org.apache.hadoop.io.compress.DefaultCodec,         |
|org.apache.hadoop.io.compress.SnappyCodec           |
+----------------------------------------------------+

 

insert overwrite table vn09jj5.test_order_text
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
--io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;

insert overwrite table vn09jj5.test_order_text_gzip
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.BZip2Codec;
--io.compression.codecs=org.apache.hadoop.io.compress.BZip2Codec;

insert overwrite table vn09jj5.test_order_text_bzip2
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
--io.compression.codecs=org.apache.hadoop.io.compress.SnappyCodec;

insert overwrite table vn09jj5.test_order_text_snappy
partition(channel,paid_date)
select 
 store_id,order_id,delivery_phone,paid_time,department,category,upc,upc_desc,gmv,
 channel,paid_date
from cn_ec_bi_dl_secure.wm_order_line_o
where paid_date = '2020-08-08'
and channel='SNG'
;

 

hdfs dfs -ls /user/hive/userdbs/vn09jj5.db/test_order_*/
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG
drwxr-x---   - hive hadoop    /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG

 

hdfs dfs -ls /user/hive/userdbs/vn09jj5.db/test_order_*/*/*/*
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.deflate
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.deflate
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.bz2
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.bz2
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.gz
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.gz
Found 2 items               
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.snappy
-rw-r-----   3 hive hadoop   /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.snappy

 

hdfs dfs -du -h /user/hive/userdbs/vn09jj5.db/test_order_*/
54.7 M   164.2 M  /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG
54.7 M   164.2 M  /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG
76.8 M   230.5 M  /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG
80.7 M   242.0 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG
117.5 M  352.6 M  /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG
117.5 M  352.6 M  /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG
131.2 M  393.7 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG
184.1 M  552.4 M  /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG
328.3 M  984.9 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG

 

hdfs dfs -du -h /user/hive/userdbs/vn09jj5.db/test_order_*/*/*
54.7 M   164.2 M  /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG/paid_date=2020-08-08/base_0000001
54.7 M   164.2 M  /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG/paid_date=2020-08-08/base_0000001
76.8 M   230.5 M  /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG/paid_date=2020-08-08/base_0000001
80.7 M   242.0 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG/paid_date=2020-08-08/base_0000001
117.5 M  352.6 M  /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG/paid_date=2020-08-08/base_0000001
117.5 M  352.6 M  /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG/paid_date=2020-08-08/base_0000001
131.2 M  393.7 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG/paid_date=2020-08-08/base_0000001
184.1 M  552.4 M  /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG/paid_date=2020-08-08/base_0000001
328.3 M  984.9 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG/paid_date=2020-08-08/base_0000001

 

hdfs dfs -du -h /user/hive/userdbs/vn09jj5.db/test_order_*/*/*/*
43.3 M   130.0 M  /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
11.4 M   34.1 M   /user/hive/userdbs/vn09jj5.db/test_order_orc/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
43.3 M   130.0 M  /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
11.4 M   34.1 M   /user/hive/userdbs/vn09jj5.db/test_order_orc_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0

269.7 M  809.0 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
58.6 M   175.9 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
65.8 M   197.3 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
14.9 M   44.7 M   /user/hive/userdbs/vn09jj5.db/test_order_parquet_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0
108.2 M  324.7 M  /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0
23.0 M   69.0 M   /user/hive/userdbs/vn09jj5.db/test_order_parquet_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0

94.5 M   283.5 M  /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.deflate
23.1 M   69.2 M   /user/hive/userdbs/vn09jj5.db/test_order_text/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.deflate
61.5 M   184.4 M  /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.bz2
15.4 M   46.2 M   /user/hive/userdbs/vn09jj5.db/test_order_text_bzip2/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.bz2
94.5 M   283.5 M  /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.gz
23.1 M   69.2 M   /user/hive/userdbs/vn09jj5.db/test_order_text_gzip/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.gz
148.0 M  444.1 M  /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000000_0.snappy
36.1 M   108.2 M  /user/hive/userdbs/vn09jj5.db/test_order_text_snappy/channel=SNG/paid_date=2020-08-08/base_0000001/000001_0.snappy

 

posted @ 2021-01-25 20:41  茗::流  阅读(243)  评论(0)    收藏  举报
如有雷同,纯属参考。如有侵犯你的版权,请联系我。