***********************************************************************************
/* Allocate a new rax and return its pointer. On out of memory the function
* returns NULL. */
创建一个新的基树并且返回指向基树的指针,内存不足返回空
rax *raxNew(void) {
rax *rax = rax_malloc(sizeof(*rax));
//这里分配24个字节,一个指针8字节,两个64位数各8字节,共24字节
if (rax == NULL) return NULL;
rax->numele = 0;
rax->numnodes = 1;
rax->head = raxNewNode(0,0); //创建一个没有子节点的基数,只有头结点
if (rax->head == NULL) {
rax_free(rax);
return NULL;
} else {
return rax;
}
}
***********************************************************************************
/* Allocate a new non compressed node with the specified number of children.
* If datafiled is true, the allocation is made large enough to hold the
* associated data pointer.
* Returns the new node pointer. On out of memory NULL is returned. */
创建一个带特定数量子节点的非压缩的节点。
如果数据域是有值的,那么需要分配足够的空间报存放关联的数据指正
成功返回指向新节点的指正,内存不足返回空
raxNode *raxNewNode(size_t children, int datafield) {
size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
sizeof(raxNode*)*children;
if (datafield) nodesize += sizeof(void*);
raxNode *node = rax_malloc(nodesize);
if (node == NULL) return NULL;
node->iskey = 0;
node->isnull = 0;
node->iscompr = 0;
node->size = children;
return node;
}
***********************************************************************************
/* Low level function that walks the tree looking for the string
* 's' of 'len' bytes. The function returns the number of characters
* of the key that was possible to process: if the returned integer
* is the same as 'len', then it means that the node corresponding to the
* string was found (however it may not be a key in case the node->iskey is
* zero or if simply we stopped in the middle of a compressed node, so that
* 'splitpos' is non zero).
这是一个比较底层的函数,通过遍历基树查找长度为len的字符串s(即为键值)。这个函数返回可能要处理键值的字符个数:
如果返回的整数等同于len,意味着字符串对应的节点被找到了(然而它可能不是一个键,假如node->iskey的值是0
或者我们只是停留在了一个压缩节点的内部,那样的话splitpos的值就不是0了,意味着需要分拆)
* Otherwise if the returned integer is not the same as 'len', there was an
* early stop during the tree walk because of a character mismatch.
否则如果返回的整数不等同于len,那么在查找过程中必定停止的更早,因为没有匹配合适的字符
* The node where the search ended (because the full string was processed
* or because there was an early stop) is returned by reference as
* '*stopnode' if the passed pointer is not NULL. This node link in the
* parent's node is returned as '*plink' if not NULL. Finally, if the
* search stopped in a compressed node, '*splitpos' returns the index
* inside the compressed node where the search ended. This is useful to
* know where to split the node for insertion.
查找结束的节点(因为整个字符串要处理或者因为更早的停止)通过引用指针*stopnode返回,前提是传入的指正非空。
这个节点关联的父节点中的指正通过引用指针*plink返回,前提是指针非空。
最终,如果该次查找停留在压缩节点,*splitpos返回查找在压缩节点内部终止的索引。
这个有利于知道哪里插入一个分裂的新节点。
* Note that when we stop in the middle of a compressed node with
* a perfect match, this function will return a length equal to the
* 'len' argument (all the key matched), and will return a *splitpos which is
* always positive (that will represent the index of the character immediately
* *after* the last match in the current compressed node).
注意到如果我们完美匹配的停留在一个压缩节点内部,那么这个函数将会返回一个长度等同于len(所有键匹配),
并且将会返回一个正的*splitpos值(这个值将代表压缩节点最后匹配字符后的第一个位置索引)
* When instead we stop at a compressed node and *splitpos is zero, it
* means that the current node represents the key (that is, none of the
* compressed node characters are needed to represent the key, just all
* its parents nodes). */
相反如果我们停在一个压缩节点但是*splitpos是0,就表示当前节点代表了这个键
(意味着没有压缩节点的字符需要去表示这个键,就是全部它的父节点)
static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts)
{
参数依次解释: 基树,待查找字符串,待查找字符串长度,停留的节点,子节点在父节点的位置指针,拆分的位置,保存父节点的栈
raxNode *h = rax->head;
raxNode **parentlink = &rax->head;
size_t i = 0; /* Position in the string. */ 字符串中的位置
size_t j = 0; /* Position in the node children (or bytes if compressed).*/ 子节点的位置
while(h->size && i < len) { 如果对应节点的子节点数量大于0并且当前寻找的字符串没有结束
debugnode("Lookup current node",h);
unsigned char *v = h->data; 取出当前数组的子节点字符
if (h->iscompr) { 当前节点是压缩节点
for (j = 0; j < h->size && i < len; j++, i++) {
if (v[j] != s[i]) break; 一直比较直到不相等才退出循环
}
if (j != h->size) break; 如果不是比较到当前节点最后,就退出(意味着需要分裂节点)
} else { 非压缩节点,每个字符都有自己的子节点指针
/* Even when h->size is large, linear scan provides good
* performances compared to other approaches that are in theory
* more sounding, like performing a binary search. */
即使节点的子节点很多,线性扫描也能提供良好的性能,即使相对于其他理论上性能更好的算法,比如二分搜索。
for (j = 0; j < h->size; j++) {
if (v[j] == s[i]) break; //就比较一个字符,相等就找下一个节点再比较,不相等需要一个个往下比较
}
if (j == h->size) break; 没有子节点中的字符和当前字符相等,退出
i++; 正常情况下,寻找下一个字符相等的节点
}
if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */ 保存当前节点
raxNode **children = raxNodeFirstChildPtr(h);获取当前节点的第一个子节点
if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
压缩节点的子节点就一个,在索引0的位置
memcpy(&h,children+j,sizeof(h)); 将下一个要对比的节点赋值给变量h(因为我们使用的遍历h)
parentlink = children+j; parentlink表示子节点在父节点中的指针位置
j = 0; /* If the new node is compressed and we do not
iterate again (since i == l) set the split
position to 0 to signal this node represents
the searched key. */
}
debugnode("Lookup stop node is",h);
if (stopnode) *stopnode = h; 将值返回给调用者
if (plink) *plink = parentlink;
if (splitpos && h->iscompr) *splitpos = j;
return i; 返回匹配的字符个数
}
***********************************************************************************
/* Stack data structure used by raxLowWalk() in order to, optionally, return
* a list of parent nodes to the caller. The nodes do not have a "parent"
* field for space concerns, so we use the auxiliary stack when needed. */
raxLowWalk()使用栈结构的数据类型去返回父节点列表给调用者。
节点本身不存在保存父节点信息的空间,所以我们需要辅助的栈来保存
#define RAX_STACK_STATIC_ITEMS 32 最大的数目
typedef struct raxStack {
void **stack; /* Points to static_items or an heap allocated array. */ 指向静态项或者堆上分配的数组
size_t items, maxitems; /* Number of items contained and total space. */ 包含的数据项和总的空间大小
/* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
* and use this static array of pointers instead. */
void *static_items[RAX_STACK_STATIC_ITEMS];
上限为RAXSTACK_STACK_ITEMS个数时候, 为了避免在堆上分配内存,
我们使用这个的静态指针数组代替(超过了就需要堆上内存分配)
int oom; /* True if pushing into this stack failed for OOM at some point. */
压入数据到栈则为真,在某些节点内部分配不足则为假
} raxStack;
***********************************************************************************
/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
压入一个项到栈,返回1如果成功,返回0如果内存不足
static inline int raxStackPush(raxStack *ts, void *ptr) {
if (ts->items == ts->maxitems) { 如果已经到达上限值
if (ts->stack == ts->static_items) { 使用的是静态指针数组
ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2); 重新独立在堆上分配内存
if (ts->stack == NULL) { 分配内存失败
ts->stack = ts->static_items; 将原来的指向的值赋回来
ts->oom = 1; 写上内存分配失败标志
errno = ENOMEM;
return 0; 返回失败
}
内存分配成功的情况下,将原来的数据拷贝到新的空间
memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
} else { 已经使用了堆内存的分配,将原先分配的内存尝试扩大(在原来位置的后面增加长度)
void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
if (newalloc == NULL) { 原来位置后面已经没有空间了,失败
ts->oom = 1; 写上内存分配失败标志
errno = ENOMEM;
return 0; 返回失败
}
ts->stack = newalloc; 成功就使用新的指针,这个时候不需要拷贝数据
}
ts->maxitems *= 2; 无论哪种情况,分配的空间都是原来的2倍
}
ts->stack[ts->items] = ptr; 将新的项添加进来
ts->items++; 项数加1
return 1; 返回成功
}
***********************************************************************************
/* Return the pointer to the first child pointer. */
#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
(n)->data + \ 这里是节点数组的位置
(n)->size + \ 这里加上数组的长度
raxPadding((n)->size))) 和对齐的字节数,那么恰好指向第一个孩指针,如下所示
[header iscompr=0][abc] [a-ptr][b-ptr][c-ptr](value-ptr?)
数组开始位置+数组长度,就指向了第一个孩指针
[header iscompr=1][xyz] [z-ptr](value-ptr?)
***********************************************************************************
/* Insert the element 's' of size 'len', setting as auxiliary data
* the pointer 'data'. If the element is already present, the associated
* data is updated (only if 'overwrite' is set to 1), and 0 is returned,
* otherwise the element is inserted and 1 is returned. On out of memory the
* function returns 0 as well but sets errno to ENOMEM, otherwise errno will
* be set to 0.
*/
插入元素长度为len的元素s,设置指针data指向辅助数据。如果元素已经存在,那么相关联的数据会被更新(如果允许写),
同时返回0,否则元素被插入返回1.当内存不足时候也返回0同时设置errno为ENOMEM,否则erron设置为0
int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
size_t i;
int j = 0; /* Split position. If raxLowWalk() stops in a compressed
node, the index 'j' represents the char we stopped within the
compressed node, that is, the position where to split the
node for insertion. */
分裂的位置,如果函数raxLowWalk停在了一个压缩节点,那么索引j表示这个字符我们停在压缩节点中间,
那就是说这个位置是插入新节点的分裂位置
raxNode *h, **parentlink;
debugf("### Insert %.*s with value %p\n", (int)len, s, data);
i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
/* If i == len we walked following the whole string. If we are not
* in the middle of a compressed node, the string is either already
* inserted or this middle node is currently not a key, but can represent
* our key. We have just to reallocate the node and make space for the
* data pointer. */
如果返回的i长度和len相等,意味着我们遍历了整个待查找的字符串。如果我们不是在一个压缩节点的中间,
那么这个字符串要不就是已经存在的元素,要不就是一个中间非元素节点,但是能够表示我们的键。
我们值需要重新分配节点空间和创建辅助数据所需的指针
if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
i==len表示我们遍历了整个待查找字符串, !h->iscompr 找到的节点为非压缩节点或者 不在压缩节点的中间
debugf("### Insert: node representing key exists\n");
/* Make space for the value pointer if needed. */
if (!h->iskey || (h->isnull && overwrite)) { 如果不是元素节点 或者 节点为空并且允许覆盖
h = raxReallocForData(h,data); 我们重新分配节点空间
if (h) memcpy(parentlink,&h,sizeof(h)); 将新创建的节点赋给父节点中的链接地址
}
if (h == NULL) { 新节点没有创建成功,就返回错误
errno = ENOMEM;
return 0;
}
/* Update the existing key if there is already one. */
更新已经存在的键
if (h->iskey) { 如果是一个键
if (old) *old = raxGetData(h); 获取旧的数据
if (overwrite) raxSetData(h,data); 设置新的数据
errno = 0;
return 0; /* Element already exists. */ 没有插入新键
}
/* Otherwise set the node as a key. Note that raxSetData()
* will set h->iskey. */
这个键之前不存在,那么把这个节点设置为键
raxSetData(h,data);
rax->numele++; 键元素加1
return 1; /* Element inserted. */
}
/* If the node we stopped at is a compressed node, we need to
* split it before to continue.
如果我们停留在一个压缩节点中,在继续操作前我们需要分裂这个节点
* Splitting a compressed node have a few possible cases.
* Imagine that the node 'h' we are currently at is a compressed
* node contaning the string "ANNIBALE" (it means that it represents
* nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
* pointer of this node pointing at the 'E' node, because remember that
* we have characters at the edges of the graph, not inside the nodes
* themselves.
分裂一个压缩节点有几种可能的情况。想象下如果我们正在操作的节点h是一个压缩节点,
包含字符串ANNIBALE(表示这个字符串代表了节点A -> N -> N -> I -> B -> A -> L -> E,
而且只有一个在E节点的孩子指针节点,因为我们需要牢记在边缘才有数据,数据不在节点本身)
* In order to show a real case imagine our node to also point to
* another compressed node, that finally points at the node without
* children, representing 'O':
为了展示一个实际的例子,想象我们的节点还指向了另外一个压缩节点SCO,然后最终指向了一个没有孩子的节点表示空
* "ANNIBALE" -> "SCO" -> []
* When inserting we may face the following cases. Note that all the cases
* require the insertion of a non compressed node with exactly two
* children, except for the last case which just requires splitting a
* compressed node.
当我们插入节点时会遇到如下情况,注意到所有的例子插入一个非压缩节点需要两个孩子节点,
除了最后一种例子外,它值需要分裂一个压缩节点。
* 1) Inserting "ANNIENTARE" 插入ANNIENTARE
* |B| -> "ALE" -> "SCO" -> []
* "ANNI" -> |-|
* |E| -> (... continue algo ...) "NTARE" -> []
* 2) Inserting "ANNIBALI"
* |E| -> "SCO" -> []
* "ANNIBAL" -> |-|
* |I| -> (... continue algo ...) []
* 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
这种情况等同于第一种情况,不过需要将原来节点设置压缩属性为0
* |N| -> "NIBALE" -> "SCO" -> []
* |A| -> |-|
* |G| -> (... continue algo ...) |O| -> []
* 4) Inserting "CIAO"
* |A| -> "NNIBALE" -> "SCO" -> []
* |-|
* |C| -> (... continue algo ...) "IAO" -> []
* 5) Inserting "ANNI"
* "ANNI" -> "BALE" -> "SCO" -> []
* The final algorithm for insertion covering all the above cases is as
* follows.
1是中间不同 2是最后一个不同 3是第二个不同 4 是第一个不同 5是全部找到
最后覆盖上面所有案例的插入新节点算法总结如下:
*
* ============================= ALGO 1 =============================
* For the above cases 1 to 4, that is, all cases where we stopped in
* the middle of a compressed node for a character mismatch, do:
对于上述案例1到4,那就是我们停止在了一个压缩节点因为找不到匹配字符,所以按如下操作:
* Let $SPLITPOS be the zero-based index at which, in the
* compressed node array of characters, we found the mismatching
* character. For example if the node contains "ANNIBALE" and we add
* "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
* mismatching character is found.
让$SPLITPOS是从0开始的索引,表示压缩节点中不匹配字符的位置。举例如下如果节点包含ANNIBALE,
而我们添加一个新字符串ANNIENTARE,$SPLITPOS的值就是4,这就意味着索引所在的位置是找到第一个不匹配字符的位置
* 1. Save the current compressed node $NEXT pointer (the pointer to the
* child element, that is always present in compressed nodes).
1保存当前压缩节点的后向指针(就是指向子节点的指针,总是在压缩节点中表示)
* 2. Create "split node" having as child the non common letter
* at the compressed node. The other non common letter (at the key)
* will be added later as we continue the normal insertion algorithm
* at step "6".
创建分裂节点,用压缩节点中非公共字符创建的节点当做子节点,
另外一个非公共字符(在插入的键中)将在插入算法的第6步被添加。
* 3a. IF $SPLITPOS == 0:
* Replace the old node with the split node, by copying the auxiliary
* data if any. Fix parent's reference. Free old node eventually
* (we still need its data for the next steps of the algorithm).
如果$SPLITPOS的值为0,用分裂节点代替旧的节点,如果有辅助数据,拷贝之。修复父节点对子节点的引用。
最后释放旧节点(我们仍然需要这个旧节点的数据处理算法的下一步,所以才是最后才释放)
* 3b. IF $SPLITPOS != 0:
* Trim the compressed node (reallocating it as well) in order to
* contain $splitpos characters. Change chilid pointer in order to link
* to the split node. If new compressed node len is just 1, set
* iscompr to 0 (layout is the same). Fix parent's reference.
如果$SPLITPOS不为0,那么我们需要截取原节点的前面$splitpos个字符做成新节点(需要重新分配)。
改变子节点的指针保证连接到分裂节点。如果压缩节点的长度只剩下1了,需要设置压缩属性为0(存储格式同样如此)。
修复父节点对子节点的引用。
* 4a. IF the postfix len (the length of the remaining string of the
* original compressed node after the split character) is non zero,
* create a "postfix node". If the postfix node has just one character
* set iscompr to 0, otherwise iscompr to 1. Set the postfix node
* child pointer to $NEXT.
如果后缀的长度(原节点中分裂字符之后的数据长度)非0,那么需要创建一个后缀节点。
如果后缀节点只有一个字符,将压缩属性设置为0,否则压缩属性设置为1.设置后缀节点的子指针指向第一步中的节点$NEXT
* 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
如果后缀节点的长度是0值需要将$NEXT设置为后缀指针即可
* 5. Set child[0] of split node to postfix node.
将分裂节点的第一个子节点设置为后缀节点
* 6. Set the split node as the current node, set current index at child[1]
* and continue insertion algorithm as usually.
设置分裂节点为当前节点,设置当前索引在子节点1的位置上,继续按照算法插入剩余字符
* ============================= ALGO 2 =============================
* For case 5, that is, if we stopped in the middle of a compressed
* node but no mismatch was found, do:
对案例5,那就是如果我们停止在一个压缩节点中间,但是全部匹配了待查找的字符串。按如下操作
* Let $SPLITPOS be the zero-based index at which, in the
* compressed node array of characters, we stopped iterating because
* there were no more keys character to match. So in the example of
* the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4.
让$SPLITPOS是从0开始的在压缩节点数组中的字符位置,我们停止迭代因为待匹配字符串已经没有字符了。
所以在本例中,在节点ANNIBALE添加字符串ANNI,$SPLITPOS的值为4.
* 1. Save the current compressed node $NEXT pointer (the pointer to the
* child element, that is always present in compressed nodes).
1.保存当前压缩节点$NEXT指针(这个指针指向子元素,总是在压缩节点中)
* 2. Create a "postfix node" containing all the characters from $SPLITPOS
* to the end. Use $NEXT as the postfix node child pointer.
* If the postfix node length is 1, set iscompr to 0.
* Set the node as a key with the associated value of the new
* inserted key.
2创建一个后缀节点包含从$SPLITPOS位置开始到结束的所有字符。使用$NEXT当做后缀几点的子指针。
如果后缀节点的长度是1,那么设置压缩属性为0.设置这个节点位一个键,同时关联新插入键的关联值。
* 3. Trim the current node to contain the first $SPLITPOS characters.
* As usually if the new node length is just 1, set iscompr to 0.
* Take the iskey / associated value as it was in the orignal node.
* Fix the parent's reference.
3裁剪当前节点包含开始的$SPLITPOS个字符(从0开始到$SPLITPOS-1)。如果新节点的长度是1,设置压缩属性为0.
将原始节点中的键和关联值赋给这个节点。修复父节点的引用(即其中的子几点指针指向新的这个裁剪出来的节点)
* 4. Set the postfix node as the only child pointer of the trimmed
* node created at step 1.
*/
4设置后缀节点为第一步中创建节点的子指针指向的节点
/* ------------------------- ALGORITHM 1 --------------------------- */
if (h->iscompr && i != len) {
debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
h->size, h->data, (void*)h);
debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
debugf("Other (key) letter is '%c'\n", s[i]);
/* 1: Save next pointer. */
raxNode **childfield = raxNodeLastChildPtr(h); 获取最后一个孩子节点的位置
raxNode *next;
memcpy(&next,childfield,sizeof(next)); 将其中的内容拷贝到NEXT
debugf("Next is %p\n", (void*)next);
debugf("iskey %d\n", h->iskey);
if (h->iskey) {
debugf("key value is %p\n", raxGetData(h));
}
/* Set the length of the additional nodes we will need. */
size_t trimmedlen = j; 如果停在的位置是j
0 1 2 3 4 5 共六个字符,假如j=3,那么 6-3-1 =2 ,剩余就是2个字符
size_t postfixlen = h->size - j - 1; 那么j以后的字符长度就是这个值
int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
从前往后依次判断是否停在中间,是否是键,是否有数据,全部满足的话分裂节点就是一个键
size_t nodesize;
/* 2: Create the split node. Also allocate the other nodes we'll need
* ASAP, so that it will be simpler to handle OOM. */
raxNode *splitnode = raxNewNode(1, split_node_is_key); 创建带有一个孩子的新节点
raxNode *trimmed = NULL;
raxNode *postfix = NULL;
if (trimmedlen) { 如果停留在中间
nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
sizeof(raxNode*); 原来节点前面部分字符串转化成新节点的长度
if (h->iskey && !h->isnull) nodesize += sizeof(void*); 如果有辅助数据
trimmed = rax_malloc(nodesize);
}
if (postfixlen) {
nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
sizeof(raxNode*); 原来节点后面部分字符串转化成新节点的长度
postfix = rax_malloc(nodesize);
}
/* OOM? Abort now that the tree is untouched. */
if (splitnode == NULL ||
(trimmedlen && trimmed == NULL) ||
(postfixlen && postfix == NULL))
{ 内存分配不足,全部释放
rax_free(splitnode);
rax_free(trimmed);
rax_free(postfix);
errno = ENOMEM;
return 0;
}
splitnode->data[0] = h->data[j]; 第j个字符(就是第一个不匹配的字符)赋给新建的节点的数据
if (j == 0) { 如果不在压缩节点中,则直接用分裂节点替换原节点的指针位置
/* 3a: Replace the old node with the split node. */
if (h->iskey) {
void *ndata = raxGetData(h);
raxSetData(splitnode,ndata);
}
memcpy(parentlink,&splitnode,sizeof(splitnode));
直接将分裂节点变成子节点,取代了原来节点h子节点的位置
} else { 如果在压缩节点中,那么需要分裂压缩节点
/* 3b: Trim the compressed node. */
trimmed->size = j;
memcpy(trimmed->data,h->data,j); 相同部分字符拷贝到新节点
trimmed->iscompr = j > 1 ? 1 : 0;
trimmed->iskey = h->iskey; 继承原先节点的键情况,如果原来前面部分是键,现在还是键
trimmed->isnull = h->isnull;
if (h->iskey && !h->isnull) {
void *ndata = raxGetData(h);
raxSetData(trimmed,ndata);
}
raxNode **cp = raxNodeLastChildPtr(trimmed); 获取新裁剪的节点最后一个子节点的指针
memcpy(cp,&splitnode,sizeof(splitnode)); 将分裂节点的值赋给新裁剪的节点最后一个子节点的指针的内容
memcpy(parentlink,&trimmed,sizeof(trimmed));将取新裁剪的节点的值赋给父节点,这样父节点就指向了裁剪节点
parentlink = cp; /* Set parentlink to splitnode parent. */ 现在的父几点变成了新裁剪出来的节点
rax->numnodes++; 节点数目多了1个
}
/* 4: Create the postfix node: what remains of the original
* compressed node after the split. */
用原始节点剩余的字符创建后缀节点
if (postfixlen) { 后缀的字符不为空
/* 4a: create a postfix node. */
postfix->iskey = 0;
postfix->isnull = 0;
postfix->size = postfixlen; 长度为len-j-1
postfix->iscompr = postfixlen > 1; 是否压缩,多余一个字符就压缩
memcpy(postfix->data,h->data+j+1,postfixlen); 将字符拷贝到新键的后缀节点
raxNode **cp = raxNodeLastChildPtr(postfix); 获取新建后缀节点的最后一个子节点的指针
memcpy(cp,&next,sizeof(next));将原来指向后面节点的指针内容赋值给新键后缀节点最后一个子节点的指针内容
rax->numnodes++;节点数加1
} else {
/* 4b: just use next as postfix node. */ 后缀没有字符了,之间使用原来的后面节点即可
postfix = next;
}
/* 5: Set splitnode first child as the postfix node. */
这里来处理分裂节点指向的后面节点情况,
raxNode **splitchild = raxNodeLastChildPtr(splitnode);获取分裂节点的最后一个子节点的指针
memcpy(splitchild,&postfix,sizeof(postfix)); 让分裂分裂节点的指针指向子节点,
这样就完成了上面两步的链接,原先是splitnode->? 和 postfixnode->
这里将问号补上splitnode->postfixnode->
/* 6. Continue insertion: this will cause the splitnode to
* get a new child (the non common character at the currently
* inserted key). */
继续插入:这个将导致分裂节点获得一个新的孩子(当前插入键中不同的字符,这个字符就是和原节点字符比较中不同的那个)
rax_free(h); 这里可以释放h了
h = splitnode; 将分裂节点赋给h
} else if (h->iscompr && i == len) { 待查找节点的键在压缩节点中被匹配到
/* ------------------------- ALGORITHM 2 --------------------------- */
debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
h->size, h->data, (void*)h, j);
/* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
尽快分配前面裁剪和后缀节点的空间,失败就优雅回退
size_t postfixlen = h->size - j; 后缀裁剪
size_t nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
sizeof(raxNode*);
if (data != NULL) nodesize += sizeof(void*);
raxNode *postfix = rax_malloc(nodesize);
nodesize = sizeof(raxNode)+j+raxPadding(j)+sizeof(raxNode*); 匹配部分进行裁剪
if (h->iskey && !h->isnull) nodesize += sizeof(void*);
raxNode *trimmed = rax_malloc(nodesize);
if (postfix == NULL || trimmed == NULL) {
rax_free(postfix);
rax_free(trimmed);
errno = ENOMEM;
return 0;
}
/* 1: Save next pointer. */ 保存指向子节点的指针
raxNode **childfield = raxNodeLastChildPtr(h);
raxNode *next;
memcpy(&next,childfield,sizeof(next));
/* 2: Create the postfix node. */ 创建后缀节点
postfix->size = postfixlen;
postfix->iscompr = postfixlen > 1;
postfix->iskey = 1;
postfix->isnull = 0;
memcpy(postfix->data,h->data+j,postfixlen);
raxSetData(postfix,data);
raxNode **cp = raxNodeLastChildPtr(postfix);
memcpy(cp,&next,sizeof(next)); 将子指针指向后面的节点
rax->numnodes++; 新的节点加1
/* 3: Trim the compressed node. */ 裁剪原来的压缩节点
trimmed->size = j;
trimmed->iscompr = j > 1;
trimmed->iskey = 0;
trimmed->isnull = 0;
memcpy(trimmed->data,h->data,j);
memcpy(parentlink,&trimmed,sizeof(trimmed));
if (h->iskey) { 如果原来的节点是一个键,那么新裁剪的节点也是一个键
void *aux = raxGetData(h);
raxSetData(trimmed,aux);
}
/* Fix the trimmed node child pointer to point to
* the postfix node. */
让裁剪的压缩节点的子指针指向后缀节点
cp = raxNodeLastChildPtr(trimmed);
memcpy(cp,&postfix,sizeof(postfix));
/* Finish! We don't need to continue with the insertion
* algorithm for ALGO 2. The key is already inserted. */
在这种情况下我们不需要再执行插入算法,因为键已经被插入
rax->numele++; 元素加1
rax_free(h); 释放原来的节点
return 1; /* Key inserted. */
}
/* We walked the radix tree as far as we could, but still there are left
* chars in our string. We need to insert the missing nodes. */
我们尽可能远的遍历基树,但是还是留下了剩下了部分不匹配字符。我们需要插入包含这部分不匹配字符的节点
while(i < len) { 如果匹配之后还有部分字符剩余
raxNode *child;
/* If this node is going to have a single child, and there
* are other characters, so that that would result in a chain
* of single-childed nodes, turn it into a compressed node. */
如果这个节点将拥有一个子节点,并且还有其它字符,那么将会导致一个单子节点链,可以转化为一个压缩节点
if (h->size == 0 && len-i > 1) { 超过一个字符,并且是个空节点,那么添加压缩节点
debugf("Inserting compressed node\n");
size_t comprsize = len-i;
if (comprsize > RAX_NODE_MAX_SIZE)
comprsize = RAX_NODE_MAX_SIZE;
raxNode *newh = raxCompressNode(h,s+i,comprsize,&child); 添加一个压缩节点
if (newh == NULL) goto oom;
h = newh;
memcpy(parentlink,&h,sizeof(h));
parentlink = raxNodeLastChildPtr(h);
i += comprsize;
} else { 只有一个字符的节点或者非空节点,添加子节点
debugf("Inserting normal node\n");
raxNode **new_parentlink;
raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
在当前节点新增一个子节点,只包含插入键的第一个非公共字符(后面的字符新建一个节点)
if (newh == NULL) goto oom;
h = newh;
memcpy(parentlink,&h,sizeof(h));
parentlink = new_parentlink;
i++;
}
rax->numnodes++;
h = child; 这个步骤非常重要,用于拆分剩下的不匹配字符串的两部分
}
raxNode *newh = raxReallocForData(h,data); 将新节点的数据保存起来
if (newh == NULL) goto oom;
h = newh;
if (!h->iskey) rax->numele++; 如果新节点原先不是键,那么键的个数就加1
raxSetData(h,data); 注意 这个函里面将节点设置为键
memcpy(parentlink,&h,sizeof(h)); 让父节点指针指向新的这个子节点
return 1; /* Element inserted. */
oom:
/* This code path handles out of memory after part of the sub-tree was
* already modified. Set the node as a key, and then remove it. However we
* do that only if the node is a terminal node, otherwise if the OOM
* happened reallocating a node in the middle, we don't need to free
* anything. */
if (h->size == 0) {
h->isnull = 1;
h->iskey = 1;
rax->numele++; /* Compensate the next remove. */
assert(raxRemove(rax,s,i,NULL) != 0); 这里的函数remove留到下次再分析
}
errno = ENOMEM;
return 0;
}
***********************************************************************************
/* Return the pointer to the last child pointer in a node. For the compressed
* nodes this is the only child pointer. */
返回一个节点中指向最后一个孩节点的指针。对于压缩节点来说就是唯一的子指针
#define raxNodeLastChildPtr(n) ((raxNode**) ( \
((char*)(n)) + \ 初始地址,用字节指针来计算
raxNodeCurrentLength(n) - \ 加上所有字节数,就指向了结尾
sizeof(raxNode*) - \ 往前推一个节点指针的长度就刚好是最后一个子节点指针的开始位置
(((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ 如果是有数据节点,需要去掉数据节点的指针长度
))
***********************************************************************************
/* Turn the node 'n', that must be a node without any children, into a
* compressed node representing a set of nodes linked one after the other
* and having exactly one child each. The node can be a key or not: this
* property and the associated value if any will be preserved.
将节点n(没有子节点)转变为一个压缩节点,代表一些列节点集合,这些节点是一个接一个连接着的。
这个节点可以是一个key也可以不是:这个属性和相关联的值将被保留
* The function also returns a child node, since the last node of the
* compressed chain cannot be part of the chain: it has zero children while
* we can only compress inner nodes with exactly one child each. */
这个函数总是返回一个子节点,因为压缩链上最后的节点不是链的一部分:它只有0个节点,
而我们只能压缩内部只有一个孩子的节点。
raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
assert(n->size == 0 && n->iscompr == 0);
void *data = NULL; /* Initialized only to avoid warnings. */
size_t newsize;
debugf("Compress node: %.*s\n", (int)len,s);
/* Allocate the child to link to this node. */
*child = raxNewNode(0,0); 创建结尾空节点(无孩节点)
if (*child == NULL) return NULL;
/* Make space in the parent node. */ 创建新节点所需的空间
newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
if (n->iskey) {
data = raxGetData(n); /* To restore it later. */ 如果有数据,先保存
if (!n->isnull) newsize += sizeof(void*); 非空,加上数据指针的大小
}
raxNode *newn = rax_realloc(n,newsize); 这里真正分配内存
if (newn == NULL) { 失败的情况下,就释放之前申请的内存
rax_free(*child);
return NULL;
}
n = newn; 将新节点的地址赋给n
n->iscompr = 1;
n->size = len; 新的长度
memcpy(n->data,s,len); 将新的字符串拷贝过来
if (n->iskey) raxSetData(n,data); 如果原来有数据,就把原来的数据重新复制过来
raxNode **childfield = raxNodeLastChildPtr(n);获取原来节点尾部的位置
memcpy(childfield,child,sizeof(*child)); 将新创建的空节点放在尾巴上
return n;
}
***********************************************************************************
/* realloc the node to make room for auxiliary data in order
* to store an item in that node. On out of memory NULL is returned. */
对节点重新分配内存空间,用来存储辅助数据,如果OOM就返回NULL
raxNode *raxReallocForData(raxNode *n, void *data) {
if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
无数据,不需要分配,直接返回
size_t curlen = raxNodeCurrentLength(n); 获取当前节点的现有长度
return rax_realloc(n,curlen+sizeof(void*)); 加上存储指针所需的字节数,重新在原地扩展分配
}
***********************************************************************************
/* Add a new child to the node 'n' representing the character 'c' and return
* its new pointer, as well as the child pointer by reference. Additionally
* '***parentlink' is populated with the raxNode pointer-to-pointer of where
* the new child was stored, which is useful for the caller to replace the
* child pointer if it gets reallocated.
对表示字符C的节点n添加一个新节点,返回它的新指针以及通过引用返回子指针。额外的,
参数***parentlink填充了指向新子节点存储的指针的指针,这个对调用者替换重新分配的子节点很有用
* On success the new parent node pointer is returned (it may change because
* of the realloc, so the caller should discard 'n' and use the new value).
* On out of memory NULL is returned, and the old node is still valid. */
成功的情况下,这个新的父节点指针会返回(注意这个新节点可能是重新分配过的,所以我们不能用n而应该用新值)
当OOM时返回NULL,并且旧节点仍然有效
raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
assert(n->iscompr == 0); 确认非压缩节点
size_t curlen = raxNodeCurrentLength(n); 获取当前节点的字节长度
n->size++; 如果当前节点新增一个字符
size_t newlen = raxNodeCurrentLength(n); 计算新增一个字符之后所需的长度,因为有填充,所以长度会变动
n->size--; /* For now restore the orignal size. We'll update it only on success at the end. */
现在恢复原来的大小,我们将在最后成功的时候更新这个值
/* Alloc the new child we will link to 'n'. */
raxNode *child = raxNewNode(0,0); 新增一个空节点用于收尾
if (child == NULL) return NULL;
/* Make space in the original node. */
raxNode *newn = rax_realloc(n,newlen);
if (newn == NULL) { 对原来节点原地扩展失败
rax_free(child); 需要释放已经申请的内存,否则就会有内存泄漏
return NULL;
}
n = newn; 指向新申请的地址
/* After the reallocation, we have up to 8/16 (depending on the system
* pointer size, and the required node padding) bytes at the end, that is,
* the additional char in the 'data' section, plus one pointer to the new
* child, plus the padding needed in order to store addresses into aligned
* locations.
经过重新分配,我们达到了末尾以8或者16字节对齐(这个依赖于系统指针大小和所需节点的填充字节)。
这就是说在data域增加字符,增加了一个指针指向新的子几点,再加上将地址存储到对齐位置所需的填充
* So if we start with the following node, having "abde" edges.
*假如我们以一下的节点开始,拥有abde的边。
* Note:
* - We assume 4 bytes pointer for simplicity.
* - Each space below corresponds to one byte
注意我们假设指针是4个字节的,每个空格代表一个字节
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
*
* After the reallocation we need: 1 byte for the new edge character
* plus 4 bytes for a new child pointer (assuming 32 bit machine).
* However after adding 1 byte to the edge char, the header + the edge
* characters are no longer aligned, so we also need 3 bytes of padding.
* In total the reallocation will add 1+4+3 bytes = 8 bytes:
在重分配后我需要: 1个字节的新边字符加上一个4字节的子指针(假设是32位机器).
然而经过边字符增加一个字节,头+边的字符不再对齐,所以我们也需要3个字节的填充。
所以在重新分配后总共需要1+4+3共8个字节。
* (Blank bytes are represented by ".")
*空字节用.表示
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
*
* Let's find where to insert the new child in order to make sure
* it is inserted in-place lexicographically. Assuming we are adding
* a child "c" in our case pos will be = 2 after the end of the following
* loop. */
让我们找到哪里可以插入新的子节点,确认这个位置是字典序的。假设我们的案例中新增一个子节点c,
经过下面的循环之后,新增的位置(pos)就是2,
int pos;
for (pos = 0; pos < n->size; pos++) {
if (n->data[pos] > c) break;
}
/* Now, if present, move auxiliary data pointer at the end
* so that we can mess with the other data without overwriting it.
* We will obtain something like that:
现在,如果有(数据),移动辅助数据指针到结尾,这样我们就可以在不覆盖数据的情况下处理其它数据。
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
*/
unsigned char *src, *dst;
if (n->iskey && !n->isnull) {
src = ((unsigned char*)n+curlen-sizeof(void*)); 原数据的指针地址,即末尾
dst = ((unsigned char*)n+newlen-sizeof(void*)); 新数据的指针地址,即末尾
memmove(dst,src,sizeof(void*));将原来数据迁移到新地址
}
/* Compute the "shift", that is, how many bytes we need to move the
* pointers section forward because of the addition of the new child
* byte in the string section. Note that if we had no padding, that
* would be always "1", since we are adding a single byte in the string
* section of the node (where now there is "abde" basically).
计算偏移量,那就是,我们需要把指针区域向前移动多少字节,因为在字符串区域增加新的子节点字节。
注意到如果没有填充,这个值总是1,因为我们对节点的字符串区域中增加一个单字节(本例的字符串区域就是abde)
* However we have padding, so it could be zero, or up to 8.
实际上我们有填充字节,所以结果可能是0或者最大到8.
* Another way to think at the shift is, how many bytes we need to
* move child pointers forward *other than* the obvious sizeof(void*)
* needed for the additional pointer itself. */
另外一个考虑偏移量方法就是,我们需要向前移动子指针多少个字节用来保存额外的新增子节点指针
size_t shift = newlen - curlen - sizeof(void*);
/* We said we are adding a node with edge 'c'. The insertion
* point is between 'b' and 'd', so the 'pos' variable value is
* the index of the first child pointer that we need to move forward
* to make space for our new pointer.
我们说我们增加了一个用用边c的节点。插入的位置就在节点b和d之间,
所以这个位置变量的值就是第一个子节点指针的索引值,那就是我们需要向前移动为新节点指针创建的空间地址。
* To start, move all the child pointers after the insertion point
* of shift+sizeof(pointer) bytes on the right, to obtain:
首先,需要移动所有插入位置后面的子节点指针shift+sizeof(pointer)个字节,获得如下结构
* [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
*/
src = n->data+n->size+ 初始位置 + 字符串长度
raxPadding(n->size)+ 填充字符长度
sizeof(raxNode*)*pos; 到当前位置的子节点指针长度
这个位置就是我们将要插入的位置
memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
这里把插入位置后面的所有字节指针全部重新拷贝到新的位置
/* Move the pointers to the left of the insertion position as well. Often
* we don't need to do anything if there was already some padding to use. In
* that case the final destination of the pointers will be the same, however
* in our example there was no pre-existing padding, so we added one byte
* plus thre bytes of padding. After the next memmove() things will look
* like thata:
将插入位置左侧的数据也进行移动。通常我们不需要做任何事情,如果存在填充空间。
在这种情况下,最终指针的目的地址将保持不变。然而在我们的例子中,他们存在没有填充的轻,
所以我们需要增加一个字节外加三个字节的填充。经过接下来的函数memmove调用获得如下结构:
* [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
*/
if (shift) { 偏移量大于0,说明头部长度有变动,那么需要移动前面的插入位置前面的数据
src = (unsigned char*) raxNodeFirstChildPtr(n); 找出第一个子节点的指针位置
memmove(src+shift,src,sizeof(raxNode*)*pos);头部腾出新增空间的位置
}
/* Now make the space for the additional char in the data section,
* but also move the pointers before the insertion point to the right
* by shift bytes, in order to obtain the following:
在数据区域留出新增字符的空间,同时向右移动了插入位置前面的指针偏移量个字节,
得到如下结构:
* [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
*/
src = n->data+pos; 字符串中插入位置后面的字符
memmove(src+1,src,n->size-pos); 将插入位置后面的字符往后挪一位(给新字符腾出一个字符的空间)
/* We can now set the character and its child node pointer to get:
*我们现在设置字符及其子节点指针获得如下结构:
* [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
* [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
*/
n->data[pos] = c; 将新字符放入上面腾出的位置
n->size++; 这里才真是的将字符串中的个数增加1
src = (unsigned char*) raxNodeFirstChildPtr(n); 获取第一个子节点的指针位置
raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos); 找到新增子节点指针的位置
memcpy(childfield,&child,sizeof(child)); 将新创建的空节点拷贝到新增子节点
*childptr = child; 将子节点返回给引用参数childptr
*parentlink = childfield; 将指向子节点的指针返回给引用参数parentlink
return n; 返回这个新增过字符的新节点
}
***********************************************************************************
/* Set the node auxiliary data to the specified pointer. */
设置节点辅助数据到特定的指针
void raxSetData(raxNode *n, void *data) {
n->iskey = 1; 将这个节点设置为关键字
if (data != NULL) { 数据不为空
n->isnull = 0; 设置非空
void **ndata = (void**)
((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
获取数据开始位置
memcpy(ndata,&data,sizeof(data)); 拷贝设置数据到ndata
} else {
n->isnull = 1; 没有数据设置数据为空
}
}
获取节点辅助数据
/* Get the node auxiliary data. */
void *raxGetData(raxNode *n) {
if (n->isnull) return NULL; 如果没有数据,返回NULL
void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
获取数据开始地址
void *data;
memcpy(&data,ndata,sizeof(data)); 拷贝数据
return data;
}
***********************************************************************************