B+树的一个实现

B+树是现在很多数据库索引技术的基础,也是讨论比较多的一种数据结构。而很多书上对它只有描述性的定义,并无伪代码的描述。因此,在真正实现B+树的时候,难免会掉入一些思维的陷进。因为本文主要来讲讲笔者在实现一个简单的B+树时注意的问题。最后将会给出一个实现好的B+树。(未完)

 

首先还是谈谈什么时候B+树?

 

B+ 树是一种树数据结构,通常用于数据库和操作系统的文件系统中。B+ 树的特点是能够保持数据稳定有序,其插入与修改拥有较稳定的对数时间复杂度。B+ 树元素自底向上插入,这与二叉树恰好相反。

B+ 树在节点访问时间远远超过节点内部访问时间的时候,比可作为替代的实现有着实在的优势。这通常在多数节点在次级存储比如硬盘中的时候出现。通过最大化在每个内部节点内的子节点的数目减少树的高度,平衡操作不经常发生,而且效率增加了。这种价值得以确立通常需要每个节点在次级存储中占据完整的磁盘块或近似的大小。

B+ 背后的想法是内部节点可以有在预定范围内的可变数目的子节点。因此,B+ 树不需要象其他自平衡二叉查找树那样经常的重新平衡。对于特定的实现在子节点数目上的低和高边界是固定的。例如,在 2-3 B 树(常简称为2-3 树)中,每个内部节点只可能有 2 或 3 个子节点。如果节点有无效数目的子节点则被当作处于违规状态。

B+ 树的创造者 Rudolf Bayer 没有解释B代表什么。最常见的观点是B代表平衡(balanced),因为所有的叶子节点在树中都在相同的级别上。B也可能代表Bayer,或者是波音(Boeing),因为他曾经工作于波音科学研究实验室。

 

  1 #include <stdlib.h>
  2 #include <memory>
  3 #include <queue>
  4 
  5 using namespace std;
  6 
  7 #define N 200
  8 #define NUM (2*N)
  9 
 10 struct BTreeNode;
 11 
 12 typedef struct BTreeInnerNode
 13 {
 14     BTreeNode * child[NUM+1];
 15 } BTreeInnerNode;
 16 
 17 typedef struct BTreeLeafNode
 18 {
 19     int value[NUM];
 20     int isDelete[NUM];
 21     BTreeNode * next;
 22 } BTreeLeafNode;
 23 
 24 typedef union Node
 25 {
 26     BTreeInnerNode innerNode;
 27     BTreeLeafNode leafNode;
 28 } Node;
 29 
 30 typedef struct BTreeNode
 31 {
 32     int number;
 33     int isLeaf;
 34     int key[NUM];
 35     int level;
 36     Node valueNode;
 37 } BTreeNode;
 38 
 39 BTreeNode * root = NULL;
 40 
 41 void Insert(int key, int value);
 42 int Delete(int key);
 43 int Query(int key);
 44 
 45 BTreeNode * InnerTravel(BTreeNode * node, int key);
 46 BTreeNode * CreateNode();
 47 void SplitNodeNPutLeafItem(BTreeNode * node, int key, int value);
 48 void SplitNodeNPutInnerItem(BTreeNode * fnode, BTreeNode * node);
 49 
 50 void PutLeafItem(BTreeNode * node, int key, int value);
 51 int GetLeafItem(BTreeNode * node, int key);
 52 int DelLeafItem(BTreeNode * node, int key);
 53 
 54 
 55 void PutInnerItem(BTreeNode * node, BTreeNode * cnode, int key);
 56 int GetInnerItem(BTreeNode * node, int key);
 57 void DelInnerItem(BTreeNode * node, int key);
 58 
 59 int GetMin(BTreeNode * node);
 60 BTreeNode * GetParent(BTreeNode * node);
 61 
 62 void Print();
 63 void PrintTree();
 64 bool ValidateTree();
 65 
 66 int _tmain(int argc, _TCHAR* argv[])
 67 {
 68     //Insert(2, 2);
 69     //Insert(3, 3);
 70     //Insert(5, 5);
 71     //Insert(7, 7);
 72 
 73     //Insert(11, 11);
 74     //Insert(13, 13);
 75     //Insert(17, 17);
 76     //Insert(19, 19);
 77 
 78     //Insert(41, 41);
 79     //Insert(43, 43);
 80     //Insert(47, 47);
 81 
 82     ////Insert(40, 40);
 83 
 84     //Insert(23, 23);
 85     //Insert(29, 29);
 86     //Insert(31, 31);
 87     //Insert(37, 37);
 88 
 89     //PrintTree();
 90 
 91     //Insert(40, 40);
 92 
 93     //PrintTree();
 94 
 95     int isError;
 96 
 97     //////////////////////////////////
 98 
 99     isError = 0;
100 
101     for(int i=20000; i>10000; i--)
102     {
103         Insert(i, i+1);
104         //if(!ValidateTree())
105         //{
106         //    printf("Error in %d\n", i);
107         //    isError = 1;
108         //    break;
109         //}
110     }
111 
112     //if(isError == 0)
113     //{
114     //    PrintTree();
115     //}
116 
117     /////////////////////////////////
118 
119     isError = 0;
120 
121     for(int i=0; i<10000; i++)
122     {
123         Insert(i, i+1);
124         //if(!ValidateTree())
125         //{
126         //    printf("Error in %d\n", i);
127         //    break;
128         //}
129     }
130 
131     //if(isError == 0)
132     //{
133     //    PrintTree();
134     //}
135 
136     printf("%d\n", Query(100));
137 
138     printf("%d\n", Delete(100));
139 
140     printf("%d\n", Query(100));
141 
142     return 0;
143 }
144 
145 void Insert(int key, int value)
146 {
147     BTreeNode * node = NULL;
148     BTreeNode * nnode = NULL;
149     BTreeNode * rnode = NULL;
150     
151     if(root != NULL)
152     {
153         node = InnerTravel(root, key);
154     }
155     else
156     {
157         node = root = CreateNode();
158         node->isLeaf = 1;
159     }
160 
161     if(node->number < NUM)
162     {
163         PutLeafItem(node, key, value);
164     }
165     else
166     {
167         SplitNodeNPutLeafItem(node, key, value);
168     }
169 }
170 
171 int Delete(int key)
172 {
173     BTreeNode * node = NULL;
174     if(root != NULL)
175     {
176         node = InnerTravel(root, key);
177     }
178     else
179     {
180         return -1;
181     }
182 
183     return DelLeafItem(node, key);
184 }
185 
186 int Query(int key)
187 {
188     BTreeNode * node = NULL;
189     if(root != NULL)
190     {
191         node = InnerTravel(root, key);
192     }
193     else
194     {
195         return -1;
196     }
197 
198     return GetLeafItem(node, key);
199 }
200 
201 
202 BTreeNode * CreateNode()
203 {
204     BTreeNode * node = (BTreeNode *)malloc(sizeof(BTreeNode));
205     memset(node, 0, sizeof(BTreeNode));
206 
207     return node;
208 }
209 
210 BTreeNode * InnerTravel(BTreeNode * node, int key)
211 {
212     if(node->isLeaf == 1)
213     {
214         return node;
215     }
216 
217     for(int i=0; i<node->number; i++)
218     {
219         if(key < node->key[i])
220         {
221             return InnerTravel(node->valueNode.innerNode.child[i], key);
222         }
223     }
224     return InnerTravel(node->valueNode.innerNode.child[node->number], key);
225 }
226 
227 void SplitNodeNPutLeafItem(BTreeNode * node, int key, int value)
228 {
229     BTreeNode * fnode = NULL;
230 
231     BTreeNode * nnode = CreateNode();
232     nnode->isLeaf = node->isLeaf;
233     nnode->level = node->level;
234 
235     for(int i=0; i<N; i++)
236     {
237         nnode->key[i] = node->key[N+i];
238         nnode->valueNode.leafNode.value[i] = node->valueNode.leafNode.value[N+i];
239     }
240 
241     nnode->number = N;
242     node->number = N;
243 
244     nnode->valueNode.leafNode.next = node->valueNode.leafNode.next;
245     node->valueNode.leafNode.next = nnode;
246 
247     if(key < nnode->key[0])
248     {
249         PutLeafItem(node, key, value);
250     }
251     else
252     {
253         PutLeafItem(nnode, key, value);
254     }
255 
256     fnode = GetParent(node);
257 
258     if(fnode == NULL)
259     {
260         fnode = CreateNode();
261         fnode->valueNode.innerNode.child[0] = root;
262         fnode->level = root->level + 1;
263         root = fnode;
264         
265     }
266 
267     if(fnode->number < NUM)
268     {
269         PutInnerItem(fnode, nnode, nnode->key[0]);
270     }
271     else
272     {
273         SplitNodeNPutInnerItem(fnode, nnode);
274     }
275     
276 }
277 
278 void PutLeafItem(BTreeNode * node, int key, int value)
279 {
280     int insertIndex = node->number;
281     for(int i=0; i<node->number; i++)
282     {
283         if(key < node->key[i])
284         {
285             insertIndex = i;
286             break;
287         }
288     }
289 
290     for(int i=node->number-1; i>=insertIndex; i--)
291     {
292         node->key[i+1] = node->key[i];
293         node->valueNode.leafNode.value[i+1] = node->valueNode.leafNode.value[i];
294     }
295 
296     node->key[insertIndex] = key;
297     node->valueNode.leafNode.value[insertIndex] = value;
298     node->number++;
299 }
300 
301 int DelLeafItem(BTreeNode * node, int key)
302 {
303     int deleteIndex = -1;
304     for(int i=0; i<node->number; i++)
305     {
306         if(key == node->key[i])
307         {
308             deleteIndex = i;
309             break;
310         }
311     }
312     if(deleteIndex != -1)
313     {
314         node->valueNode.leafNode.isDelete[deleteIndex] = 1;
315     }
316     return deleteIndex;
317 }
318 
319 int GetLeafItem(BTreeNode * node, int key)
320 {
321     for(int i=0; i<node->number; i++)
322     {
323         if(key == node->key[i] && node->valueNode.leafNode.isDelete[i] == 0)
324         {
325             return node->valueNode.leafNode.value[i];
326         }
327     }
328     return -1;
329 }
330 
331 void PutInnerItem(BTreeNode * node, BTreeNode * cnode, int key)
332 {
333     int insertIndex = node->number;
334     for(int i=0; i<node->number; i++)
335     {
336         if(key < node->key[i])
337         {
338             insertIndex = i;
339             break;
340         }
341     }
342 
343     for(int i=node->number-1; i>=insertIndex; i--)
344     {
345         node->key[i+1] = node->key[i];
346         node->valueNode.innerNode.child[i+2] = node->valueNode.innerNode.child[i+1];
347     }
348 
349     node->valueNode.innerNode.child[insertIndex+1] = cnode;
350     node->key[insertIndex] = key;
351 
352     node->number++;
353 }
354 
355 
356 void SplitNodeNPutInnerItem(BTreeNode * fnode, BTreeNode * node)
357 {
358     BTreeNode * nfnode = CreateNode();
359     BTreeNode * ffnode = NULL;
360     nfnode->isLeaf = fnode->isLeaf;
361     nfnode->level = fnode->level;
362 
363     int splitValue = fnode->valueNode.innerNode.child[N]->key[0];
364     int splitValueAfter = fnode->valueNode.innerNode.child[N+1]->key[0];
365 
366     for(int i=0; i<N; i++)
367     {
368         nfnode->key[i] = fnode->key[N+i];
369     }
370 
371     nfnode->number = N;
372     fnode->number = N;
373 
374     if(node->key[0] < splitValue)
375     {
376         for(int i=0; i<=N; i++)
377         {
378             nfnode->valueNode.innerNode.child[i] = fnode->valueNode.innerNode.child[N+i];
379         }
380         
381         fnode->number--;
382         
383         PutInnerItem(fnode, node, GetMin(node));
384     }
385     else
386     {
387         if(node->key[0] < splitValueAfter)
388         {
389             for(int i=0; i<N; i++)
390             {
391                 nfnode->valueNode.innerNode.child[i+1] = fnode->valueNode.innerNode.child[N+i+1];
392             }
393             nfnode->valueNode.innerNode.child[0] = node;
394             nfnode->key[0] = GetMin(nfnode->valueNode.innerNode.child[1]);
395         }
396         else
397         {
398             for(int i=0; i<N; i++)
399             {
400                 nfnode->valueNode.innerNode.child[i] = fnode->valueNode.innerNode.child[N+i+1];
401             }
402 
403             for(int i=0; i<N-1; i++)
404             {
405                 nfnode->key[i] = nfnode->key[i+1];
406             }
407 
408             nfnode->number--;
409 
410             PutInnerItem(nfnode, node, GetMin(node));
411         }
412     }
413 
414     ffnode = GetParent(fnode);
415 
416     if(ffnode == NULL)
417     {
418         ffnode = CreateNode();
419         ffnode->valueNode.innerNode.child[0] = root;
420         ffnode->level = root->level + 1;
421         root = ffnode;
422     }
423 
424     if(ffnode->number < NUM)
425     {
426         PutInnerItem(ffnode, nfnode, GetMin(nfnode));
427     }
428     else
429     {
430         SplitNodeNPutInnerItem(ffnode, nfnode);
431     }
432 }
433 
434 BTreeNode * GetParent(BTreeNode * node)
435 {
436     BTreeNode * fnode = root;
437     int minKey = node->key[0];
438 
439     int num = 0;
440 
441     if(node == root)
442         return NULL;
443 
444     if(root->isLeaf == 1)
445         return NULL;
446 
447     int isHit = 0;
448     
449     while(fnode->isLeaf == 0)
450     {
451         num = fnode->number;
452         isHit = 0;
453 
454         for(int i=0; i<num; i++)
455         {
456             if(minKey < fnode->key[i])
457             {
458                 if(fnode->valueNode.innerNode.child[i] == node)
459                     return fnode;
460                 else
461                 {
462                     fnode = fnode->valueNode.innerNode.child[i];
463                     isHit = 1;
464                     break;
465                 }
466             }
467         }
468 
469         if(isHit == 0)
470         {
471             if(fnode->valueNode.innerNode.child[fnode->number] == node)
472                 return fnode;
473             else
474             {
475                 fnode = fnode->valueNode.innerNode.child[fnode->number];
476             }
477         }
478     }
479     return NULL;
480 }
481 
482 void Print()
483 {
484     BTreeNode * node = root;
485     while(node->isLeaf == 0)
486     {
487         node = node->valueNode.innerNode.child[0];
488     }
489 
490     do
491     {
492         for(int i=0; i<node->number; i++)
493         {
494             printf("%d ", node->valueNode.leafNode.value[i]);
495         }
496         printf("|");
497         node = node->valueNode.leafNode.next;
498     }while(node != NULL);
499 
500     printf("\n");
501 }
502 
503 void PrintTree()
504 {
505     queue<BTreeNode*> nodeQueue;
506     BTreeNode * node = root;
507     int level = node->level;
508 
509     nodeQueue.push(node);
510 
511     printf("| ");
512     while(!nodeQueue.empty())
513     {
514         node = nodeQueue.front();
515 
516         nodeQueue.pop();
517 
518         if(node->isLeaf == 0)
519         {
520             for(int i=0; i<=node->number; i++)
521             {
522                 nodeQueue.push(node->valueNode.innerNode.child[i]);
523             }
524         }
525 
526         if(level != node->level)
527         {
528             level--;
529             printf("\n| ");
530         }
531 
532         for(int i=0; i<node->number; i++)
533         {
534             printf("%d ", node->key[i]);
535         }
536         printf("| ");
537 
538     }
539     printf("\n===================================\n");
540 
541 }
542 
543 bool ValidateTree()
544 {
545     queue<BTreeNode*> nodeQueue;
546     BTreeNode * node = root;
547     int level = node->level;
548 
549     int value = node->key[0];
550 
551     nodeQueue.push(node);
552 
553     while(!nodeQueue.empty())
554     {
555         node = nodeQueue.front();
556         nodeQueue.pop();
557 
558         if(node->isLeaf == 0)
559         {
560             for(int i=0; i<=node->number; i++)
561             {
562                 nodeQueue.push(node->valueNode.innerNode.child[i]);
563             }
564         }
565 
566         if(level != node->level)
567         {
568             value = node->key[0];
569             level--;
570         }
571 
572         for(int i=0; i<node->number; i++)
573         {
574             if(node->key[i] >= value)
575             {
576                 value = node->key[i];
577             }
578             else
579             {
580                 printf("node->key[%d]:%d, value:%d\n", i, node->key[i], value);
581                 return false;
582             }
583 
584             if(node->isLeaf == 0)
585             {
586                 if(node->key[i] <= node->valueNode.innerNode.child[i]->key[0] || node->key[i] > node->valueNode.innerNode.child[i+1]->key[0])
587                 {
588                     printf("node->key[%d]:%d, child[%d]:%d, child[%d]:%d\n", i, node->key[i], i, node->valueNode.innerNode.child[i]->key[0], i+1, node->valueNode.innerNode.child[i+1]->key[0]);
589                     return false;
590                 }
591             }
592         }
593     }
594     return true;
595 }
596 
597 int GetMin(BTreeNode * node)
598 {
599     while(node->isLeaf == 0)
600     {
601         node = node->valueNode.innerNode.child[0];
602     }
603     return node->valueNode.leafNode.value[0];
604 }

 

posted @ 2013-02-04 11:51  fivewind  阅读(404)  评论(0)    收藏  举报