OushuDB 体验新执行器

 

 

本章节通过TPCH lineitem 表来展示新执行器的使用。

建立e_lineitem外部表用来生成TPCH lineitem 数据,

CREATE EXTERNAL WEB TABLE E_LINEITEM ( L_ORDERKEY    INT8 ,
                                       L_PARTKEY     INTEGER ,
                                       L_SUPPKEY     INTEGER ,
                                       L_LINENUMBER  INTEGER ,
                                       L_QUANTITY    FLOAT ,
                                       L_EXTENDEDPRICE  FLOAT ,
                                       L_DISCOUNT    FLOAT ,
                                       L_TAX         FLOAT ,
                                       L_RETURNFLAG  VARCHAR(1) ,
                                       L_LINESTATUS  VARCHAR(1) ,
                                       L_SHIPDATE    TEXT ,
                                       L_COMMITDATE  TEXT ,
                                       L_RECEIPTDATE TEXT ,
                                       L_SHIPINSTRUCT CHAR(25) ,
                                       L_SHIPMODE     VARCHAR(10) ,
                                       L_COMMENT      VARCHAR(44) )
                                       EXECUTE 'bash -c "$GPHOME/bin/dbgen -b $GPHOME/bin/dists.dss -T L -s 1 -N 6 -n $((GP_SEGMENT_ID + 1))"'
                                       on 6 format 'text' (delimiter '|');

创建ORC 表

CREATE TABLE lineitem
       (  L_ORDERKEY    INT8,
          L_PARTKEY     INTEGER,
          L_SUPPKEY     INTEGER,
          L_LINENUMBER  INTEGER,
          L_QUANTITY    FLOAT,
          L_EXTENDEDPRICE  FLOAT,
          L_DISCOUNT    FLOAT,
          L_TAX         FLOAT,
          L_RETURNFLAG  TEXT,
          L_LINESTATUS  TEXT,
          L_SHIPDATE    TEXT,
          L_COMMITDATE  TEXT,
          L_RECEIPTDATE TEXT,
          L_SHIPINSTRUCT TEXT,
          L_SHIPMODE     TEXT,
          L_COMMENT      TEXT)
   WITH (APPENDONLY = true, OIDS = FALSE, ORIENTATION = orc);

插入数据

INSERT INTO lineitem SELECT * FROM e_lineitem;

从下面的例子可以看到新执行器对于性能的大幅改进。

-----获取表行数------
postgres=# set new_executor = on;
SET
postgres=# SELECT COUNT(*) FROM lineitem;
count
---------
6001215
(1 row)
Time: 17.006 ms

postgres=# set new_executor = off;
SET
postgres=# SELECT COUNT(*) FROM lineitem;
count
---------
6001215
(1 row)
Time: 213.248 ms

-----TPCH 查询 1 ------
postgres=# set new_executor = on;
SET
postgres=#  SELECT
        l_returnflag,
        l_linestatus,
        sum(l_quantity)::bigint as sum_qty,
        sum(l_extendedprice)::bigint as sum_base_price,
        sum(l_extendedprice * (1 - l_discount))::bigint as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax))::bigint as sum_charge,
        avg(l_quantity)::bigint as avg_qty,
        avg(l_extendedprice)::bigint as avg_price,
        avg(l_discount)::bigint as avg_disc,
        count(*) as count_order
FROM
        lineitem
WHERE
        l_shipdate <= '1998-08-20'
GROUP BY
        l_returnflag,
        l_linestatus;

l_returnflag | l_linestatus | sum_qty  | sum_base_price | sum_disc_price |  sum_charge  | avg_qty | avg_price | avg_disc | count_order
--------------+--------------+----------+----------------+----------------+--------------+---------+-----------+----------+-------------
R            | F            | 37719753 |    56568041381 |    53741292685 |  55889619120 |      26 |     38251 |        0 |     1478870
N            | F            |   991417 |     1487504710 |     1413082168 |   1469649223 |      26 |     38284 |        0 |       38854
A            | F            | 37734107 |    56586554401 |    53758257135 |  55909065223 |      26 |     38273 |        0 |     1478493
N            | O            | 73808911 |   110700990251 |   105167436999 | 109377979031 |      26 |     38248 |        0 |     2894278
(4 rows)

Time: 234.376 ms

postgres=# set new_executor = off;
SET
postgres=#  SELECT
        l_returnflag,
        l_linestatus,
        sum(l_quantity)::bigint as sum_qty,
        sum(l_extendedprice)::bigint as sum_base_price,
        sum(l_extendedprice * (1 - l_discount))::bigint as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax))::bigint as sum_charge,
        avg(l_quantity)::bigint as avg_qty,
        avg(l_extendedprice)::bigint as avg_price,
        avg(l_discount)::bigint as avg_disc,
        count(*) as count_order
FROM
        lineitem
WHERE
        l_shipdate <= '1998-08-20'
GROUP BY
        l_returnflag,
        l_linestatus;

l_returnflag | l_linestatus | sum_qty  | sum_base_price | sum_disc_price |  sum_charge  | avg_qty | avg_price | avg_disc | count_order
--------------+--------------+----------+----------------+----------------+--------------+---------+-----------+----------+-------------
R            | F            | 37719753 |    56568041381 |    53741292685 |  55889619120 |      26 |     38251 |        0 |     1478870
N            | F            |   991417 |     1487504710 |     1413082168 |   1469649223 |      26 |     38284 |        0 |       38854
A            | F            | 37734107 |    56586554401 |    53758257135 |  55909065223 |      26 |     38273 |        0 |     1478493
N            | O            | 73808911 |   110700990251 |   105167436999 | 109377979031 |      26 |     38248 |        0 |     2894278
(4 rows)

Time: 2341.147 ms
posted @ 2021-12-27 18:29  数据舱酷  阅读(12)  评论(0)    收藏  举报