1 // 2 // 3 /**************************************************************** 4 B+树的实现。这个B+树是建立在操作系统的文件系统之上的,并没有自己的文件系统。 5 B+树的节点全部存储在一个文件中。由于每个节点的大小是相同的,所以我对每个节点 6 进行编号,即每个节点的id。这样每个节点在文件的字节位置就可以通过计算sizeof(BPNode)*(c->id - 1)得到。 7 所以,每个B+树的节点有一个id属性,就是记录自己的标号。 8 9 同时对B+树建立一个结构体,这个结构体中的root属性,用于指向读入内存后的树的根节点。 10 locate属性记录树的根节点的在文件中的标号。num属性记录这棵树的节点个数,每次新增一个节点都会加一。 11 name属性记录用于存储这个B+树的文件名(相对cpp文件所在的文件夹),fp属性用于记录打开这个文件时的文件指针。 12 13 因为这个文件只记录B+树的节点,所以每次插入的时候只需要直接插入最后(只有num个节点,同时新插入的节点的id是num + 1) 14 暂时不考虑删除节点的文件空间回收。 15 *****************************************************************/ 16 17 18 19 /******************************************************************************************************* 20 打开文件是要注意打开模式,r+模式可以才可以随意用fseek()定位之后,读或写。 21 fseek()和fread()和fwrite()是用于二进制打开的文件,如果文本模式打开,可能出现问题,如:覆盖。 22 *******************************************************************************************************/ 23 #include <stdio.h> 24 #include <malloc.h> 25 #include <memory.h> 26 #include <string.h> 27 #define T 3 //b+树的度数 28 #define KeyType int //数据类型 29 #define Pointer int 30 //节点结构体 31 typedef struct BPNode 32 { 33 unsigned int id;//记录这个节点在文件的中的编号 34 unsigned int n; //记录这个节点有多少个关键字 35 int leaf; //判断是否为页节点 36 KeyType key[2*T];//该结点的关键字数组(及对应每个孩子节点的中关键字最小的关键字) 37 Pointer child[2*T];//节点上的子节点指针数组,记录每个孩子在文件的第几个位置 38 Pointer next;//指针,记录下一个兄弟 39 }BPNode,*P_BPNode; 40 41 //树的结构体 42 typedef struct BPTree 43 { 44 P_BPNode root; 45 unsigned int locate;//记录根节点的在文件中的标号,即id 46 unsigned int num; //记录更有多少个节点 47 char name[100]; //用于存储B+树的节点文件的名字 48 FILE *fp; //打开写入name文件时,使用 49 int start; //最小的数据所在的叶节点 50 }BPTree,*P_BPTree; 51 52 BPTree indexBPTree; //全局变量,b+树 53 54 55 int writeNode(P_BPNode w) 56 { 57 fseek(indexBPTree.fp, sizeof(BPNode)*(w->id - 1) + 2*sizeof(int), SEEK_SET); 58 fwrite(w, sizeof(BPNode),1,indexBPTree.fp); 59 60 return 0; 61 } 62 63 int readNode(P_BPNode r, Pointer id) 64 { 65 fseek(indexBPTree.fp, (sizeof(BPNode))*(id - 1) + 2*sizeof(int), SEEK_SET); 66 fread(r, (sizeof(BPNode)),1,indexBPTree.fp); 67 return 0; 68 } 69 int pNode(P_BPNode n); 70 71 int createIndexBPTree (char *tableName, char *attr) 72 {//创建B+树,并进行相应的初始化,B+树的结构体是一个全局变量。 73 P_BPNode root;//创建B+树的根节点 74 indexBPTree.root = (P_BPNode)malloc(sizeof(BPNode)); 75 indexBPTree.num = 1;//创建时,默认现在B+树仅有一个节点,即根节点 76 indexBPTree.start = 1;//创建时,默认现在B+树最小叶节点在id=1的根节点上 77 memcpy(indexBPTree.name, ".\\table\\", sizeof(".\\table\\"));//创建时,默认存储B+树文件的名字 78 strcat(indexBPTree.name, tableName); 79 strcat(indexBPTree.name, "."); 80 strcat(indexBPTree.name, attr); 81 puts(indexBPTree.name); 82 root = indexBPTree.root; //创建B+树时,初始化根节点的相关信息 83 root->n = 0; 84 root->leaf = 1; 85 root->next = -1; 86 root->id = 1; 87 indexBPTree.locate = 1; 88 89 indexBPTree.fp = fopen(indexBPTree.name, "wb");//将B+树的相关信息写到文件中,根节点信息也落盘 90 fwrite(&indexBPTree.num, sizeof(int),1,indexBPTree.fp); 91 fwrite(&indexBPTree.locate, sizeof(int),1,indexBPTree.fp); 92 writeNode(root); 93 94 95 fclose(indexBPTree.fp); 96 /* 97 printf("原始:%d\n", indexBPTree.root->next); 98 memset(indexBPTree.root,0,sizeof(BPNode)); 99 printf("memset后:%d\n", indexBPTree.root->next); 100 indexBPTree.fp = fopen(indexBPTree.name,"r"); 101 fread(indexBPTree.root, sizeof(BPNode),1,indexBPTree.fp); 102 fclose(indexBPTree.fp); 103 printf("读取文件后:%d\n", indexBPTree.root->next); 104 105 */ 106 free(root); 107 indexBPTree.root = NULL; 108 return 0; 109 } 110 111 112 int splitBPNode (P_BPNode p, P_BPNode c, int i) 113 {//节点的分裂,要求p节点至少还能插入一个节点,c节点是满的,即n为2*T; 114 int j; 115 P_BPNode b; 116 b = (P_BPNode)malloc(sizeof(BPNode)); 117 b->leaf = c->leaf; 118 b->n = T; 119 b->id = indexBPTree.num+1; //为b赋值id号,用于表示该节点,,同时id号就是这个节点在文件的位置 120 b->next = c->next; //为b的next赋值,即原来的c节点的next 121 //将c节点的后半部分关键字复制给b 122 for (j = 0; j < T; j++) 123 { 124 b->key[j] = c->key[j+T]; 125 b->child[j] = c->child[j+T]; 126 } 127 128 //至此b节点的对应元素已经建立好了,但还需要写入文件 129 130 indexBPTree.num++; 131 c->n = T; //c节点的关键字数目减半 132 c->next = b->id; 133 134 135 136 //将p节点的i之后的节点后移 137 for (j = p->n - 1; j > i; j--) 138 { 139 p->key[j+1] = p->key[j]; 140 p->child[j+1] = p->child[j]; 141 } 142 143 //将b节点插入p中 144 p->key[i+1] = b->key[0]; 145 p->child[i+1] = b->id; 146 147 p->n++; //p关键字个数加一 148 //写入p 149 writeNode(p); 150 writeNode(c); 151 writeNode(b); 152 free(b); 153 return 0; 154 }//splitBPNode 155 156 157 int insertBPNodeNotFull(P_BPNode s, KeyType k, unsigned int id) 158 {//插入,要求s节点不是满的 159 160 int i = s->n-1; 161 162 if (s->leaf) 163 {//叶节点,找的合适的位置 164 while (i >= 0 && s->key[i] > k) 165 { 166 s->key[i+1] = s->key[i]; 167 s->child[i+1] = s->child[i]; 168 i--; 169 } 170 171 s->key[i+1] = k; 172 s->child[i+1] = id; 173 s->n++; 174 writeNode(s); 175 } 176 else 177 { 178 P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode)); 179 while (i >= 0 && s->key[i] > k) 180 { 181 i--; 182 } 183 if (i < 0) 184 {//插入的元素最小,则把这个元素插入第一个节点,并修改对应的key 185 i++; 186 s->key[i] = k; 187 } 188 writeNode(s); 189 readNode(tmp, s->child[i]); //读取对应的 190 if (tmp->n == 2*T) 191 { 192 splitBPNode(s, tmp, i); 193 if (k > s->key[i+1]) 194 i++; 195 readNode(tmp, s->child[i]); //重新读取,,有待优化 196 } 197 insertBPNodeNotFull(tmp, k, id); 198 free(tmp); 199 } 200 201 return 0; 202 } 203 204 Pointer equalSearch(P_BPTree tree, KeyType k) 205 {//等值查询,给出key值,查找对应的id,并返回。如果不存在该节点,返回一个负数 206 int i; 207 int result; 208 P_BPNode r; 209 r = tree->root; 210 if (k < r->key[0]) //比最小的节点小 211 return -1; 212 P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode)); 213 while (1) 214 { 215 i = r->n - 1; 216 while (i >= 0 && r->key[i] > k) 217 i--; 218 219 if (r->leaf) //是叶子,结束 220 break; 221 readNode(tmp, r->child[i]); 222 r = tmp; 223 }//while 224 225 if (r->key[i] < k) 226 return -1; 227 result = r->child[i]; 228 229 free(tmp); 230 tmp = NULL; 231 return result; 232 }//equalSearch 233 234 int rangeSearch (P_BPTree tree, KeyType low, KeyType high) 235 {//范围查找,key值大于等于low,小于等于high。返回范围内的个数, 236 unsigned int i; 237 P_BPNode r = tree->root; 238 Pointer *result; 239 P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode)); 240 241 if (high < low) //low <= high才有能有结果 242 return 0; 243 if (high < r->key[0]) 244 return 0; 245 246 if (low < r->key[0]) 247 low = r->key[0]; 248 249 while (1) 250 { 251 i = r->n - 1; 252 while (i >= 0 && r->key[i] > low) 253 i--; 254 if (r->leaf) //是叶子,结束 255 break; 256 readNode(tmp, r->child[i]); 257 r = tmp; 258 }//while 259 260 if (r->key[i] < low) 261 i++; 262 unsigned int num=100; 263 result = (Pointer *)malloc(sizeof(Pointer)*num); 264 unsigned int j = 0; 265 266 while (1) 267 { 268 for (; i < r->n && r->key[i] <= high; i++) 269 { 270 if (j >= num) 271 { 272 num += 100; 273 realloc(result, sizeof(Pointer)*num); 274 } 275 result[j++] = r->child[i]; 276 // printf("sid:%d iid: %d id:%d\n", r->key[i],r->id, r->child[i]); 277 } 278 if (i <r->n || r->next < 0) 279 break; 280 281 readNode(tmp, r->next); 282 r = tmp; 283 i = 0; 284 }//while 285 free(tmp); 286 tmp = NULL; 287 return j; 288 }//rangeSearch 289 290 int insertKeyInBPTree (P_BPTree tree, KeyType k, Pointer id) 291 {//向树中插入节点 292 P_BPNode r = tree->root; 293 294 if (equalSearch(tree, k) > 0) 295 { 296 printf("元素已存在!"); 297 return -1; 298 } 299 300 if (tree->root->n == 2*T) 301 {//根节点满了,重新分配根节点,并进行初始化 302 P_BPNode s = (P_BPNode)malloc(sizeof(BPNode)); 303 s->leaf = 0; 304 s->n = 1; 305 s->key[0] = r->key[0]; 306 s->child[0] = r->id; 307 s->id = tree->num + 1; 308 309 s->next = -1; 310 //将新的根写入磁盘 311 writeNode(s); 312 313 tree->num++; 314 315 writeNode(r); 316 317 splitBPNode (s, r, 0); 318 319 //根变为s,所以将新根copy到tree->root指针所指向的内存。(tree->root将一直指向一片开辟了的内存,且时刻保存树根的整个节点) 320 memcpy(tree->root, s, sizeof(BPNode)); 321 tree->locate = s->id; 322 323 insertBPNodeNotFull(s, k, id); 324 free(s); //释放内存 325 } 326 else 327 insertBPNodeNotFull(r, k, id); 328 return 0; 329 }//insertBPNode 330 331 int initIndexBPTree(char *tableName, char *attr) 332 {//初始化BPTree,打开相应文件,fp记录;为root分配内存可以存储一个节点的内存,并读入根节点 333 indexBPTree.root = (P_BPNode)malloc(sizeof(BPNode)); 334 indexBPTree.start = 1; 335 336 memcpy(indexBPTree.name, ".\\table\\", sizeof(".\\table\\")); 337 strcat(indexBPTree.name, tableName); 338 strcat(indexBPTree.name, "."); 339 strcat(indexBPTree.name, attr); 340 indexBPTree.fp = fopen(indexBPTree.name, "rb+"); 341 fread(&indexBPTree.num,sizeof(int),1, indexBPTree.fp); 342 fread(&indexBPTree.locate,sizeof(int),1,indexBPTree.fp); 343 344 readNode(indexBPTree.root, indexBPTree.locate); 345 return 0; 346 } 347 348 int endBPTree() 349 {//将建立的树结束 350 fseek(indexBPTree.fp, 0, SEEK_SET); 351 fwrite(&indexBPTree.num, sizeof(int),1,indexBPTree.fp); 352 fwrite(&indexBPTree.locate, sizeof(int),1,indexBPTree.fp); 353 free(indexBPTree.root); 354 fclose(indexBPTree.fp); 355 return 0; 356 } 357 358 int pNode(P_BPNode n) 359 {//输出节点 360 printf("%s id:%d next:%d 个数:%d\n" ,n->leaf?"是叶节点":"不是叶节点", n->id, n->next, n->n); 361 for(unsigned int i = 0; i < n->n; i++) 362 printf(" key[%d]:%d\t",i,n->key[i]); 363 puts(""); 364 for(i = 0; i < n->n; i++) 365 printf("child[%d]:%d\t",i,n->child[i]); 366 puts(""); 367 return 0; 368 }//pNode 369 370 int replaceKeyInBPTree(P_BPTree tree, KeyType oldkey, KeyType newkey) 371 {//将oldkey替换为newkey 372 P_BPNode r = tree->root; 373 int i; 374 P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode)); 375 376 while (1) 377 { 378 i = r->n - 1; 379 while (i >= 0 && r->key[i] > oldkey) 380 i--; 381 if (r->key[i] == oldkey) 382 { 383 r->key[i] = newkey; 384 writeNode(r); 385 } 386 387 if (r->leaf) 388 break; 389 readNode(tmp, r->child[i]); 390 r = tmp; 391 } 392 free(tmp); 393 return 0; 394 } 395 396 int adjustToDel(P_BPNode p, P_BPNode x, unsigned int i) 397 {//p指向x的父节点,i指的是,x是p的下标 398 unsigned int j; 399 P_BPNode left = NULL; 400 P_BPNode right = NULL; 401 P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode)); 402 403 if (i > 0 ) //x有左兄弟 404 { 405 readNode(tmp, p->child[i-1]); 406 left = tmp; 407 if (left->n > T) 408 { 409 for (j = x->n; j > 0; j--) 410 { 411 x->key[j] = x->key[j-1]; 412 x->child[j] = x->child[j-1]; 413 } 414 x->n++; 415 x->key[0] = left->key[left->n-1]; 416 x->child[0] = left->child[left->n-1]; 417 writeNode(x); 418 419 left->n--; 420 writeNode(left); 421 422 p->key[i] = x->key[0]; 423 writeNode(p); 424 425 return 0; 426 } 427 }//if 428 429 if (i < p->n - 1) //x有又兄弟 430 { 431 readNode(tmp, p->child[i+1]); 432 right = tmp; 433 left = NULL; 434 if (right->n > T) 435 { 436 x->key[x->n] = right->key[0]; 437 x->child[x->n] = right->child[0]; 438 x->n++; 439 writeNode(x); 440 441 for (j = 0; j < right->n-1; j++) 442 { 443 right->key[j] = right->key[j+1]; 444 right->child[j] = right->child[j+1]; 445 } 446 right->n--; 447 writeNode(right); 448 449 p->key[i+1] = right->key[0]; 450 writeNode(p); 451 return 0; 452 } 453 } 454 455 if (left == tmp) 456 { 457 for (j = 0; j < T; j++) 458 { 459 left->key[T+j] = x->key[j]; 460 left->child[T+j] = x->child[j]; 461 } 462 left->n += T; 463 left->next = x->next; 464 writeNode(left); 465 466 for (j = i; j < p->n - 1; j++) 467 { 468 p->key[j] = p->key[j+1]; 469 p->child[j] = p->child[j+1]; 470 } 471 p->n--; 472 writeNode(p); 473 memcpy(x, left, sizeof(BPNode)); 474 } 475 else 476 { 477 for (j = 0; j < T; j++) 478 { 479 x->key[T+j] = right->key[j]; 480 x->child[T+j] = right->child[j]; 481 } 482 x->n += T; 483 x->next = right->next; 484 writeNode(x); 485 486 for (j = i+1; j < p->n -1; j++) 487 { 488 p->key[j] = p->key[j+1]; 489 p->child[j] = p->child[j+1]; 490 } 491 p->n--; 492 writeNode(p); 493 } 494 free(tmp); 495 left = right = tmp = NULL; 496 return 0; 497 } 498 //调用这个函数是,参数节点p,必须满足相应的要求: 499 //①如果p是根节点且是叶子节点,则没有要求 500 //②如果p是根节点(非叶),则p节点的子节点个数不小于2(B+树本身就满足这个要求)。 501 //③如果p是非根节点,则节点p的子节点个数必须大于T 502 KeyType delKeyInBPNode(P_BPNode p, KeyType k) 503 {//以这个节点为起点,找到k并删除。要求确保k存在 504 unsigned int i; 505 unsigned int j; 506 i = p->n - 1; 507 508 while (p->key[i] > k) 509 i--; 510 511 //是叶节点(如果p本身又是根节点,则这个是情况①) 512 if (p->leaf) 513 { 514 for (j = i; j < p->n-1; j++) 515 { 516 p->key[j] = p->key[j+1]; 517 p->child[j] = p->child[j+1]; 518 }//while 519 p->n--; 520 writeNode(p); 521 if (i == 0) //说明删除的关键字是该节点中最小的 522 replaceKeyInBPTree(&indexBPTree, k, p->key[i]); 523 return p->key[i]; 524 }//if 525 526 527 //p是内节点 528 P_BPNode x; 529 530 x = (P_BPNode)malloc(sizeof(BPNode)); 531 532 readNode(x, p->child[i]); 533 if (x->n > T) //x的子节点的个数大于T,则直接调用 534 return delKeyInBPNode(x, k); 535 else //x的子节点的个数等于T,需要调整 536 { 537 adjustToDel(p, x, i); 538 return delKeyInBPNode(x, k); 539 }//else 540 541 }//delKeyInNode 542 543 int delKeyInBPTree(P_BPTree tree, KeyType k) 544 {//1.如果一个根节点同时又是叶节点,则没有子节点限制(这个子节点指向的不再是树的节点) 545 //2.非叶根节点至少保持有两个子节点,其他的节点至少有T个子节点。 546 547 if (equalSearch(tree, k) < 0) //检查是否有k这个关键字 548 return -1; 549 550 P_BPNode r = tree->root; 551 552 delKeyInBPNode(r, k); 553 if (r->n == 1) 554 { 555 tree->locate = r->child[0]; 556 readNode(tree->root,r->child[0]); //更换根节点 557 tree->num--; 558 //还应该将原始的根节点从磁盘上删除 559 } 560 r = NULL; 561 return 0; 562 }//delKeyInBPTree 563 564 int main () 565 { 566 unsigned int i = 1; 567 // createIndexBPTree("student","sid"); 568 initIndexBPTree("student", "sid"); 569 /* 570 insertKeyInBPTree(&indexBPTree, 50, i++); 571 insertKeyInBPTree(&indexBPTree, 30, i++); 572 insertKeyInBPTree(&indexBPTree, 60, i++); 573 insertKeyInBPTree(&indexBPTree, 10, i++); 574 insertKeyInBPTree(&indexBPTree, 90, i++); 575 insertKeyInBPTree(&indexBPTree, 40, i++); 576 insertKeyInBPTree(&indexBPTree, 100, i++); 577 578 579 580 insertKeyInBPTree(&indexBPTree, 110, i++); 581 insertKeyInBPTree(&indexBPTree, 150, i++); 582 insertKeyInBPTree(&indexBPTree, 200, i++); 583 insertKeyInBPTree(&indexBPTree, 0, i++); 584 insertKeyInBPTree(&indexBPTree, 49, i++); 585 insertKeyInBPTree(&indexBPTree, 45, i++); 586 insertKeyInBPTree(&indexBPTree, -1, i++); 587 insertKeyInBPTree(&indexBPTree, 210, i++); 588 insertKeyInBPTree(&indexBPTree, 220, i++); 589 insertKeyInBPTree(&indexBPTree, 230, i++); 590 insertKeyInBPTree(&indexBPTree, 240, i++); 591 insertKeyInBPTree(&indexBPTree, 250, i++); 592 insertKeyInBPTree(&indexBPTree, 260, i++); 593 insertKeyInBPTree(&indexBPTree, 270, i++); 594 insertKeyInBPTree(&indexBPTree, 280, i++); 595 insertKeyInBPTree(&indexBPTree, 290, i++); 596 597 598 insertKeyInBPTree(&indexBPTree, 300, i++); 599 insertKeyInBPTree(&indexBPTree, 310, i++); 600 insertKeyInBPTree(&indexBPTree, 320, i++); 601 insertKeyInBPTree(&indexBPTree, 330, i++); 602 insertKeyInBPTree(&indexBPTree, 340, i++); 603 insertKeyInBPTree(&indexBPTree, 350, i++); 604 insertKeyInBPTree(&indexBPTree, 360, i++); 605 insertKeyInBPTree(&indexBPTree, -360, i++); 606 */ 607 /* 608 KeyType k; 609 k = 340; 610 611 int s = equalSearch(&indexBPTree, k); 612 if (s > 0) 613 printf("id:%d\n",s); 614 else 615 printf("不存在元素:%d\n",k); 616 */ 617 618 // rangeSearch(&indexBPTree, 500, 500); 619 /* 620 insertKeyInBPTree(&indexBPTree, 50, i++); 621 insertKeyInBPTree(&indexBPTree, 55, i++); 622 insertKeyInBPTree(&indexBPTree, 60, i++); 623 insertKeyInBPTree(&indexBPTree, 100, i++); 624 insertKeyInBPTree(&indexBPTree, 110, i++); 625 insertKeyInBPTree(&indexBPTree, 115, i++); 626 insertKeyInBPTree(&indexBPTree, 200, i++); 627 628 insertKeyInBPTree(&indexBPTree, 210, i++); 629 insertKeyInBPTree(&indexBPTree, 215, i++); 630 insertKeyInBPTree(&indexBPTree, 220, i++); 631 */ 632 633 P_BPNode t = (P_BPNode)malloc(sizeof(BPNode)); 634 635 printf("个数:%d 根id:%d\n", indexBPTree.num, indexBPTree.locate); 636 pNode(indexBPTree.root); 637 638 639 delKeyInBPTree(&indexBPTree,100); 640 printf("%d\n\n",indexBPTree.num); 641 642 pNode(indexBPTree.root); 643 644 for (i = 1; i < indexBPTree.num; i++) 645 { 646 readNode(t,i); 647 pNode(t); 648 } 649 endBPTree(); 650 return 0; 651 }