【源码阅读】3. 建表

| KW_CREATE opt_external:isExternal KW_TABLE opt_if_not_exists:ifNotExists table_name:name
            LPAREN column_definition_list:columns COMMA index_definition_list:indexes RPAREN opt_engine:engineName
            opt_keys:keys
            opt_comment:tableComment
            opt_partition:partition
            opt_distribution:distribution
            opt_rollup:index
            opt_properties:tblProperties
            opt_ext_properties:extProperties
    {:
        RESULT = new CreateTableStmt(ifNotExists, isExternal, name, columns, indexes, engineName, keys, partition,
        distribution, tblProperties, extProperties, tableComment, index);
    :}                           
    
 CreateTableStmt最终解析出的属性如下所示:
    protected TableName tableName;                                              // 表名
    protected List<ColumnDef> columnDefs;                                 // 列定义
    private List<IndexDef> indexDefs;                                            // 索引定义
  protected PartitionDesc partitionDesc;               // 分区信息
  protected DistributionDesc distributionDesc;         // 分桶方式
    protected KeysDesc keysDesc;                                                          // 数据模型
    protected Map<String, String> properties;                                                     // 属性
    private String comment;                                                                                   // 评论
    private List<AlterClause> rollupAlterClauseList = Lists.newArrayList();     // rollup
 

建表执行

总体视角

创建时序图 - createOlapTable

 

细化过程

排序key选择逻辑Env.calcShortKeyColumnCount

默认最多3个排序字段,遍历每个key字段

● 如果加上此字段,key长度超过36个字节

○ 如果是char家族,这个字段算

○ 如果不是char家族,这个字段不算

● 如果加上此字段,key长度没有36个字节

○ 如果是varchar:这个字段算,退出

○ 否则这个字段算

 

表结构细化

每个分区下都存在相同索引

每个索引下都存在相同数量Tablet

Tablet总数为:分区数*索引数*分桶数

Replica总数为:

 

BE选择逻辑

每个(Partition, Rollup, Tablet)组合根据副本数去选择BE存放副本

 

RPC逻辑

每个(Partition, Rollup)把下面累积的CreateReplicaTask组装到AgentBatchTask中并通过AgentTaskExecutor发送

 

properties应用情况

建表时可指定的属性如下:

    public static final String PROPERTIES_REPLICATION_NUM = "replication_num";
    public static final String PROPERTIES_REPLICATION_ALLOCATION = "replication_allocation";
    public static final String PROPERTIES_SHORT_KEY = "short_key";    
    public static final String PROPERTIES_ENABLE_LIGHT_SCHEMA_CHANGE = "light_schema_change";
    public static final String PROPERTIES_STORAGE_FORMAT = "storage_format";                            //V2 
    public static final String PROPERTIES_DISABLE_AUTO_COMPACTION = "disable_auto_compaction";
    public static final String PROPERTIES_COMPRESSION = "compression";
    public static final String ENABLE_UNIQUE_KEY_MERGE_ON_WRITE = "enable_unique_key_merge_on_write";
    public static final String PROPERTIES_BF_COLUMNS = "bloom_filter_columns";
    public static final String PROPERTIES_BF_FPP = "bloom_filter_fpp";
    public static final String PROPERTIES_AUTO_BUCKET = "_auto_bucket";
    public static final String PROPERTIES_ESTIMATE_PARTITION_SIZE = "estimate_partition_size";
    public static final String PROPERTIES_INMEMORY = "in_memory";
    public static final String PROPERTIES_STORAGE_POLICY = "storage_policy";                            // Policy
    public static final String PROPERTIES_TABLET_TYPE = "tablet_type";
    public static final String PROPERTIES_STORAGE_MEDIUM = "storage_medium";                            // SSD, HDD
    public static final String PROPERTIES_STORAGE_COOLDOWN_TIME = "storage_cooldown_time";
    public static final String PROPERTIES_DATA_BASE_TIME = "data_base_time_ms";
    public static final String PROPERTIES_COLOCATE_WITH = "colocate_with";
    public static final String PROPERTIES_STORAGE_TYPE = "storage_type";                                // COLUMN
    public static final String PROPERTIES_SCHEMA_VERSION = "schema_version";
    public static final String PROPERTIES_FUNCTION_COLUMN = "function_column";
    public static final String PROPERTIES_SEQUENCE_TYPE = "sequence_type";
    public static final String PROPERTIES_SEQUENCE_COL = "sequence_col";
    public static final String PROPERTIES_VERSION_INFO = "version_info";
 

BE交互

 

 

其他

分区Partition

Partition的示例如下

PARTITION BY RANGE(`date`)
(
    PARTITION `p201701` VALUES LESS THAN ("2017-02-01"),
    PARTITION `p201702` VALUES LESS THAN ("2017-03-01"),
    PARTITION `p201703` VALUES LESS THAN ("2017-04-01")
)

PARTITION BY LIST(`city`)
(
    PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),
    PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),
    PARTITION `p_jp` VALUES IN ("Tokyo")
)
 

总体定义

opt_partition ::=
    /* Empty: no partition */
    {:
        RESULT = null;
    :}
    /* Range partition */
    | KW_PARTITION KW_BY KW_RANGE LPAREN ident_list:columns RPAREN
            LPAREN opt_all_partition_desc_list:list RPAREN
    {:
        RESULT = new RangePartitionDesc(columns, list);
    :}
    /* List partition */
    | KW_PARTITION KW_BY KW_LIST LPAREN ident_list:columns RPAREN
            LPAREN opt_all_partition_desc_list:list RPAREN
    {:
        RESULT = new ListPartitionDesc(columns, list);
    :}
    ;
 

Partition范围块的顶层设计

###   多个partition的定义    
opt_all_partition_desc_list ::=
    /* Empty */
    {:
        RESULT = null;
    :}
    | all_partition_desc_list:list
    {:
        RESULT = list;
    :}
    ;
all_partition_desc_list ::=
    all_partition_desc_list:list COMMA single_partition_desc:desc
    {:
        list.add(desc);
        RESULT = list;
    :}
    | single_partition_desc:desc
    {:
        RESULT = Lists.newArrayList(desc);
    :}
    | all_partition_desc_list:list COMMA multi_partition_desc:desc
    {:
        list.add(desc);
        RESULT = list;
    :}
    | multi_partition_desc:desc
    {:
        RESULT = Lists.newArrayList(desc);
    :}
    ;
 

单partition块的设计

## 单个partition行定义    
## PARTITION `xx` VALUES LESS THAN ("2017-02-01")=>SinglePartitionDesc(PartitionKeyDesc(List<PartitionValue>))
## PARTITION `xx` VALUES [("a","b"),("a","b")) => SinglePartitionDesc(PartitionKeyDesc(List<PartitionValue>,List<PartitionValue>))
## PARTITION `p_cn` VALUES IN ("A", "B") => SinglePartitionDesc(PartitionKeyDesc(List<List<PartitionValue>>))

single_partition_desc ::=
    KW_PARTITION opt_if_not_exists:ifNotExists ident:partName KW_VALUES KW_LESS KW_THAN partition_key_desc:desc
        opt_key_value_map:properties
    {:
        RESULT = new SinglePartitionDesc(ifNotExists, partName, desc, properties);
    :}
    | KW_PARTITION opt_if_not_exists:ifNotExists ident:partName KW_VALUES fixed_partition_key_desc:desc
        opt_key_value_map:properties
    {:
        RESULT = new SinglePartitionDesc(ifNotExists, partName, desc, properties);
    :}
    /* list partition */
    | KW_PARTITION opt_if_not_exists:ifNotExists ident:partName KW_VALUES KW_IN list_partition_key_desc:desc
        opt_key_value_map:properties
    {:
        RESULT = new SinglePartitionDesc(ifNotExists, partName, desc, properties);
    :}
    ; 
 

range分区值定义部分partition_key_desc,fixed_partition_key_desc

###   LESS THAN分区值("a","b","c")   =>   PartitionKeyDesc(List<PartitionValue>)
partition_key_desc ::=
    KW_MAX_VALUE
    {:
        RESULT = PartitionKeyDesc.createMaxKeyDesc();
    :}
    | LPAREN partition_key_list:keys RPAREN
    {:
        RESULT = PartitionKeyDesc.createLessThan(keys);
    :}
    ;    
###  VALUES开闭区间[("a","b","c"),("a","b","c")) => PartitionKeyDesc(List<PartitionValue>,List<PartitionValue>)
fixed_partition_key_desc ::=
    /* format: [(lower), (upper))*/
    LBRACKET LPAREN partition_key_list:lower RPAREN COMMA LPAREN partition_key_list:upper RPAREN RPAREN
    {:
        RESULT = PartitionKeyDesc.createFixed(lower, upper);
    :}
    ; 
    
###   "a","b","c"   =>   List<PartitionValue>
partition_key_list ::=
    /* empty */
    {:
        List<PartitionValue> l = new ArrayList<PartitionValue>();
        RESULT = l;
    :}
    | partition_key_list:l COMMA STRING_LITERAL:item
    {:
        l.add(new PartitionValue(item));
        RESULT = l;
    :}
    | partition_key_list:l COMMA KW_MAX_VALUE
    {:
        l.add(PartitionValue.MAX_VALUE);
        RESULT = l;
    :}
    | STRING_LITERAL:item
    {:
        RESULT = Lists.newArrayList(new PartitionValue(item));
    :}
    | KW_MAX_VALUE
    {:
        RESULT = Lists.newArrayList(PartitionValue.MAX_VALUE);
    :}
    ;
 

list分区值定义部分list_partition_key_desc

###  LIST分区部分        
###   (("abc","dbf"), ("abc","dbf"))   =>   PartitionKeyDesc(List<List<PartitionValue>>) 
###  加上最外层()
list_partition_key_desc ::=
    LPAREN list_partition_values_list:keys RPAREN
    {:
        RESULT = PartitionKeyDesc.createIn(keys);
    :}
    ;    
###  单体或,分割    
list_partition_values_list ::=
    partition_value_list:item
    {:
        ArrayList<List<PartitionValue>> l = new ArrayList();
        l.add(item);
        RESULT = l;
    :}
    | list_partition_values_list:l COMMA partition_value_list:item
    {:
        l.add(item);
        RESULT = l;
    :}
    ;
###   "abc"或者("abc")或者("abc","def")    =>  List<PartitionValue>
partition_value_list ::=
    /* single partition key */
    STRING_LITERAL:item
    {:
        RESULT = Lists.newArrayList(new PartitionValue(item));
    :}
    /* multi partition keys : (1, "beijing") */
    | LPAREN partition_key_item_list:l RPAREN
    {:
        RESULT = l;
    :}
    ;  
partition_key_item_list ::=
    STRING_LITERAL:item
    {:
        RESULT = Lists.newArrayList(new PartitionValue(item));
    :}
    | partition_key_item_list:l COMMA STRING_LITERAL:item
    {:
        l.add(new PartitionValue(item));
        RESULT = l;
    :}
    ;     

 

 

posted @ 2023-06-25 18:40  xutao_ustc  阅读(32)  评论(0)    收藏  举报