B+树的一个实现
B+树是现在很多数据库索引技术的基础,也是讨论比较多的一种数据结构。而很多书上对它只有描述性的定义,并无伪代码的描述。因此,在真正实现B+树的时候,难免会掉入一些思维的陷进。因为本文主要来讲讲笔者在实现一个简单的B+树时注意的问题。最后将会给出一个实现好的B+树。(未完)
首先还是谈谈什么时候B+树?
B+ 树是一种树数据结构,通常用于数据库和操作系统的文件系统中。B+ 树的特点是能够保持数据稳定有序,其插入与修改拥有较稳定的对数时间复杂度。B+ 树元素自底向上插入,这与二叉树恰好相反。
B+ 树在节点访问时间远远超过节点内部访问时间的时候,比可作为替代的实现有着实在的优势。这通常在多数节点在次级存储比如硬盘中的时候出现。通过最大化在每个内部节点内的子节点的数目减少树的高度,平衡操作不经常发生,而且效率增加了。这种价值得以确立通常需要每个节点在次级存储中占据完整的磁盘块或近似的大小。
B+ 背后的想法是内部节点可以有在预定范围内的可变数目的子节点。因此,B+ 树不需要象其他自平衡二叉查找树那样经常的重新平衡。对于特定的实现在子节点数目上的低和高边界是固定的。例如,在 2-3 B 树(常简称为2-3 树)中,每个内部节点只可能有 2 或 3 个子节点。如果节点有无效数目的子节点则被当作处于违规状态。
B+ 树的创造者 Rudolf Bayer 没有解释B代表什么。最常见的观点是B代表平衡(balanced),因为所有的叶子节点在树中都在相同的级别上。B也可能代表Bayer,或者是波音(Boeing),因为他曾经工作于波音科学研究实验室。
1 #include <stdlib.h> 2 #include <memory> 3 #include <queue> 4 5 using namespace std; 6 7 #define N 200 8 #define NUM (2*N) 9 10 struct BTreeNode; 11 12 typedef struct BTreeInnerNode 13 { 14 BTreeNode * child[NUM+1]; 15 } BTreeInnerNode; 16 17 typedef struct BTreeLeafNode 18 { 19 int value[NUM]; 20 int isDelete[NUM]; 21 BTreeNode * next; 22 } BTreeLeafNode; 23 24 typedef union Node 25 { 26 BTreeInnerNode innerNode; 27 BTreeLeafNode leafNode; 28 } Node; 29 30 typedef struct BTreeNode 31 { 32 int number; 33 int isLeaf; 34 int key[NUM]; 35 int level; 36 Node valueNode; 37 } BTreeNode; 38 39 BTreeNode * root = NULL; 40 41 void Insert(int key, int value); 42 int Delete(int key); 43 int Query(int key); 44 45 BTreeNode * InnerTravel(BTreeNode * node, int key); 46 BTreeNode * CreateNode(); 47 void SplitNodeNPutLeafItem(BTreeNode * node, int key, int value); 48 void SplitNodeNPutInnerItem(BTreeNode * fnode, BTreeNode * node); 49 50 void PutLeafItem(BTreeNode * node, int key, int value); 51 int GetLeafItem(BTreeNode * node, int key); 52 int DelLeafItem(BTreeNode * node, int key); 53 54 55 void PutInnerItem(BTreeNode * node, BTreeNode * cnode, int key); 56 int GetInnerItem(BTreeNode * node, int key); 57 void DelInnerItem(BTreeNode * node, int key); 58 59 int GetMin(BTreeNode * node); 60 BTreeNode * GetParent(BTreeNode * node); 61 62 void Print(); 63 void PrintTree(); 64 bool ValidateTree(); 65 66 int _tmain(int argc, _TCHAR* argv[]) 67 { 68 //Insert(2, 2); 69 //Insert(3, 3); 70 //Insert(5, 5); 71 //Insert(7, 7); 72 73 //Insert(11, 11); 74 //Insert(13, 13); 75 //Insert(17, 17); 76 //Insert(19, 19); 77 78 //Insert(41, 41); 79 //Insert(43, 43); 80 //Insert(47, 47); 81 82 ////Insert(40, 40); 83 84 //Insert(23, 23); 85 //Insert(29, 29); 86 //Insert(31, 31); 87 //Insert(37, 37); 88 89 //PrintTree(); 90 91 //Insert(40, 40); 92 93 //PrintTree(); 94 95 int isError; 96 97 ////////////////////////////////// 98 99 isError = 0; 100 101 for(int i=20000; i>10000; i--) 102 { 103 Insert(i, i+1); 104 //if(!ValidateTree()) 105 //{ 106 // printf("Error in %d\n", i); 107 // isError = 1; 108 // break; 109 //} 110 } 111 112 //if(isError == 0) 113 //{ 114 // PrintTree(); 115 //} 116 117 ///////////////////////////////// 118 119 isError = 0; 120 121 for(int i=0; i<10000; i++) 122 { 123 Insert(i, i+1); 124 //if(!ValidateTree()) 125 //{ 126 // printf("Error in %d\n", i); 127 // break; 128 //} 129 } 130 131 //if(isError == 0) 132 //{ 133 // PrintTree(); 134 //} 135 136 printf("%d\n", Query(100)); 137 138 printf("%d\n", Delete(100)); 139 140 printf("%d\n", Query(100)); 141 142 return 0; 143 } 144 145 void Insert(int key, int value) 146 { 147 BTreeNode * node = NULL; 148 BTreeNode * nnode = NULL; 149 BTreeNode * rnode = NULL; 150 151 if(root != NULL) 152 { 153 node = InnerTravel(root, key); 154 } 155 else 156 { 157 node = root = CreateNode(); 158 node->isLeaf = 1; 159 } 160 161 if(node->number < NUM) 162 { 163 PutLeafItem(node, key, value); 164 } 165 else 166 { 167 SplitNodeNPutLeafItem(node, key, value); 168 } 169 } 170 171 int Delete(int key) 172 { 173 BTreeNode * node = NULL; 174 if(root != NULL) 175 { 176 node = InnerTravel(root, key); 177 } 178 else 179 { 180 return -1; 181 } 182 183 return DelLeafItem(node, key); 184 } 185 186 int Query(int key) 187 { 188 BTreeNode * node = NULL; 189 if(root != NULL) 190 { 191 node = InnerTravel(root, key); 192 } 193 else 194 { 195 return -1; 196 } 197 198 return GetLeafItem(node, key); 199 } 200 201 202 BTreeNode * CreateNode() 203 { 204 BTreeNode * node = (BTreeNode *)malloc(sizeof(BTreeNode)); 205 memset(node, 0, sizeof(BTreeNode)); 206 207 return node; 208 } 209 210 BTreeNode * InnerTravel(BTreeNode * node, int key) 211 { 212 if(node->isLeaf == 1) 213 { 214 return node; 215 } 216 217 for(int i=0; i<node->number; i++) 218 { 219 if(key < node->key[i]) 220 { 221 return InnerTravel(node->valueNode.innerNode.child[i], key); 222 } 223 } 224 return InnerTravel(node->valueNode.innerNode.child[node->number], key); 225 } 226 227 void SplitNodeNPutLeafItem(BTreeNode * node, int key, int value) 228 { 229 BTreeNode * fnode = NULL; 230 231 BTreeNode * nnode = CreateNode(); 232 nnode->isLeaf = node->isLeaf; 233 nnode->level = node->level; 234 235 for(int i=0; i<N; i++) 236 { 237 nnode->key[i] = node->key[N+i]; 238 nnode->valueNode.leafNode.value[i] = node->valueNode.leafNode.value[N+i]; 239 } 240 241 nnode->number = N; 242 node->number = N; 243 244 nnode->valueNode.leafNode.next = node->valueNode.leafNode.next; 245 node->valueNode.leafNode.next = nnode; 246 247 if(key < nnode->key[0]) 248 { 249 PutLeafItem(node, key, value); 250 } 251 else 252 { 253 PutLeafItem(nnode, key, value); 254 } 255 256 fnode = GetParent(node); 257 258 if(fnode == NULL) 259 { 260 fnode = CreateNode(); 261 fnode->valueNode.innerNode.child[0] = root; 262 fnode->level = root->level + 1; 263 root = fnode; 264 265 } 266 267 if(fnode->number < NUM) 268 { 269 PutInnerItem(fnode, nnode, nnode->key[0]); 270 } 271 else 272 { 273 SplitNodeNPutInnerItem(fnode, nnode); 274 } 275 276 } 277 278 void PutLeafItem(BTreeNode * node, int key, int value) 279 { 280 int insertIndex = node->number; 281 for(int i=0; i<node->number; i++) 282 { 283 if(key < node->key[i]) 284 { 285 insertIndex = i; 286 break; 287 } 288 } 289 290 for(int i=node->number-1; i>=insertIndex; i--) 291 { 292 node->key[i+1] = node->key[i]; 293 node->valueNode.leafNode.value[i+1] = node->valueNode.leafNode.value[i]; 294 } 295 296 node->key[insertIndex] = key; 297 node->valueNode.leafNode.value[insertIndex] = value; 298 node->number++; 299 } 300 301 int DelLeafItem(BTreeNode * node, int key) 302 { 303 int deleteIndex = -1; 304 for(int i=0; i<node->number; i++) 305 { 306 if(key == node->key[i]) 307 { 308 deleteIndex = i; 309 break; 310 } 311 } 312 if(deleteIndex != -1) 313 { 314 node->valueNode.leafNode.isDelete[deleteIndex] = 1; 315 } 316 return deleteIndex; 317 } 318 319 int GetLeafItem(BTreeNode * node, int key) 320 { 321 for(int i=0; i<node->number; i++) 322 { 323 if(key == node->key[i] && node->valueNode.leafNode.isDelete[i] == 0) 324 { 325 return node->valueNode.leafNode.value[i]; 326 } 327 } 328 return -1; 329 } 330 331 void PutInnerItem(BTreeNode * node, BTreeNode * cnode, int key) 332 { 333 int insertIndex = node->number; 334 for(int i=0; i<node->number; i++) 335 { 336 if(key < node->key[i]) 337 { 338 insertIndex = i; 339 break; 340 } 341 } 342 343 for(int i=node->number-1; i>=insertIndex; i--) 344 { 345 node->key[i+1] = node->key[i]; 346 node->valueNode.innerNode.child[i+2] = node->valueNode.innerNode.child[i+1]; 347 } 348 349 node->valueNode.innerNode.child[insertIndex+1] = cnode; 350 node->key[insertIndex] = key; 351 352 node->number++; 353 } 354 355 356 void SplitNodeNPutInnerItem(BTreeNode * fnode, BTreeNode * node) 357 { 358 BTreeNode * nfnode = CreateNode(); 359 BTreeNode * ffnode = NULL; 360 nfnode->isLeaf = fnode->isLeaf; 361 nfnode->level = fnode->level; 362 363 int splitValue = fnode->valueNode.innerNode.child[N]->key[0]; 364 int splitValueAfter = fnode->valueNode.innerNode.child[N+1]->key[0]; 365 366 for(int i=0; i<N; i++) 367 { 368 nfnode->key[i] = fnode->key[N+i]; 369 } 370 371 nfnode->number = N; 372 fnode->number = N; 373 374 if(node->key[0] < splitValue) 375 { 376 for(int i=0; i<=N; i++) 377 { 378 nfnode->valueNode.innerNode.child[i] = fnode->valueNode.innerNode.child[N+i]; 379 } 380 381 fnode->number--; 382 383 PutInnerItem(fnode, node, GetMin(node)); 384 } 385 else 386 { 387 if(node->key[0] < splitValueAfter) 388 { 389 for(int i=0; i<N; i++) 390 { 391 nfnode->valueNode.innerNode.child[i+1] = fnode->valueNode.innerNode.child[N+i+1]; 392 } 393 nfnode->valueNode.innerNode.child[0] = node; 394 nfnode->key[0] = GetMin(nfnode->valueNode.innerNode.child[1]); 395 } 396 else 397 { 398 for(int i=0; i<N; i++) 399 { 400 nfnode->valueNode.innerNode.child[i] = fnode->valueNode.innerNode.child[N+i+1]; 401 } 402 403 for(int i=0; i<N-1; i++) 404 { 405 nfnode->key[i] = nfnode->key[i+1]; 406 } 407 408 nfnode->number--; 409 410 PutInnerItem(nfnode, node, GetMin(node)); 411 } 412 } 413 414 ffnode = GetParent(fnode); 415 416 if(ffnode == NULL) 417 { 418 ffnode = CreateNode(); 419 ffnode->valueNode.innerNode.child[0] = root; 420 ffnode->level = root->level + 1; 421 root = ffnode; 422 } 423 424 if(ffnode->number < NUM) 425 { 426 PutInnerItem(ffnode, nfnode, GetMin(nfnode)); 427 } 428 else 429 { 430 SplitNodeNPutInnerItem(ffnode, nfnode); 431 } 432 } 433 434 BTreeNode * GetParent(BTreeNode * node) 435 { 436 BTreeNode * fnode = root; 437 int minKey = node->key[0]; 438 439 int num = 0; 440 441 if(node == root) 442 return NULL; 443 444 if(root->isLeaf == 1) 445 return NULL; 446 447 int isHit = 0; 448 449 while(fnode->isLeaf == 0) 450 { 451 num = fnode->number; 452 isHit = 0; 453 454 for(int i=0; i<num; i++) 455 { 456 if(minKey < fnode->key[i]) 457 { 458 if(fnode->valueNode.innerNode.child[i] == node) 459 return fnode; 460 else 461 { 462 fnode = fnode->valueNode.innerNode.child[i]; 463 isHit = 1; 464 break; 465 } 466 } 467 } 468 469 if(isHit == 0) 470 { 471 if(fnode->valueNode.innerNode.child[fnode->number] == node) 472 return fnode; 473 else 474 { 475 fnode = fnode->valueNode.innerNode.child[fnode->number]; 476 } 477 } 478 } 479 return NULL; 480 } 481 482 void Print() 483 { 484 BTreeNode * node = root; 485 while(node->isLeaf == 0) 486 { 487 node = node->valueNode.innerNode.child[0]; 488 } 489 490 do 491 { 492 for(int i=0; i<node->number; i++) 493 { 494 printf("%d ", node->valueNode.leafNode.value[i]); 495 } 496 printf("|"); 497 node = node->valueNode.leafNode.next; 498 }while(node != NULL); 499 500 printf("\n"); 501 } 502 503 void PrintTree() 504 { 505 queue<BTreeNode*> nodeQueue; 506 BTreeNode * node = root; 507 int level = node->level; 508 509 nodeQueue.push(node); 510 511 printf("| "); 512 while(!nodeQueue.empty()) 513 { 514 node = nodeQueue.front(); 515 516 nodeQueue.pop(); 517 518 if(node->isLeaf == 0) 519 { 520 for(int i=0; i<=node->number; i++) 521 { 522 nodeQueue.push(node->valueNode.innerNode.child[i]); 523 } 524 } 525 526 if(level != node->level) 527 { 528 level--; 529 printf("\n| "); 530 } 531 532 for(int i=0; i<node->number; i++) 533 { 534 printf("%d ", node->key[i]); 535 } 536 printf("| "); 537 538 } 539 printf("\n===================================\n"); 540 541 } 542 543 bool ValidateTree() 544 { 545 queue<BTreeNode*> nodeQueue; 546 BTreeNode * node = root; 547 int level = node->level; 548 549 int value = node->key[0]; 550 551 nodeQueue.push(node); 552 553 while(!nodeQueue.empty()) 554 { 555 node = nodeQueue.front(); 556 nodeQueue.pop(); 557 558 if(node->isLeaf == 0) 559 { 560 for(int i=0; i<=node->number; i++) 561 { 562 nodeQueue.push(node->valueNode.innerNode.child[i]); 563 } 564 } 565 566 if(level != node->level) 567 { 568 value = node->key[0]; 569 level--; 570 } 571 572 for(int i=0; i<node->number; i++) 573 { 574 if(node->key[i] >= value) 575 { 576 value = node->key[i]; 577 } 578 else 579 { 580 printf("node->key[%d]:%d, value:%d\n", i, node->key[i], value); 581 return false; 582 } 583 584 if(node->isLeaf == 0) 585 { 586 if(node->key[i] <= node->valueNode.innerNode.child[i]->key[0] || node->key[i] > node->valueNode.innerNode.child[i+1]->key[0]) 587 { 588 printf("node->key[%d]:%d, child[%d]:%d, child[%d]:%d\n", i, node->key[i], i, node->valueNode.innerNode.child[i]->key[0], i+1, node->valueNode.innerNode.child[i+1]->key[0]); 589 return false; 590 } 591 } 592 } 593 } 594 return true; 595 } 596 597 int GetMin(BTreeNode * node) 598 { 599 while(node->isLeaf == 0) 600 { 601 node = node->valueNode.innerNode.child[0]; 602 } 603 return node->valueNode.leafNode.value[0]; 604 }

浙公网安备 33010602011771号