1 //  2 //
  3 /****************************************************************
  4     B+树的实现。这个B+树是建立在操作系统的文件系统之上的,并没有自己的文件系统。
  5 B+树的节点全部存储在一个文件中。由于每个节点的大小是相同的,所以我对每个节点
  6 进行编号,即每个节点的id。这样每个节点在文件的字节位置就可以通过计算sizeof(BPNode)*(c->id - 1)得到。
  7 所以,每个B+树的节点有一个id属性,就是记录自己的标号。
  8 
  9     同时对B+树建立一个结构体,这个结构体中的root属性,用于指向读入内存后的树的根节点。
 10 locate属性记录树的根节点的在文件中的标号。num属性记录这棵树的节点个数,每次新增一个节点都会加一。
 11 name属性记录用于存储这个B+树的文件名(相对cpp文件所在的文件夹),fp属性用于记录打开这个文件时的文件指针。
 12 
 13     因为这个文件只记录B+树的节点,所以每次插入的时候只需要直接插入最后(只有num个节点,同时新插入的节点的id是num + 1)
 14 暂时不考虑删除节点的文件空间回收。
 15 *****************************************************************/
 16
 17 
 18 
 19 /*******************************************************************************************************
 20 打开文件是要注意打开模式,r+模式可以才可以随意用fseek()定位之后,读或写。
 21 fseek()和fread()和fwrite()是用于二进制打开的文件,如果文本模式打开,可能出现问题,如:覆盖。
 22 *******************************************************************************************************/
 23 #include <stdio.h>
 24 #include <malloc.h>
 25 #include <memory.h>
 26 #include <string.h>
 27 #define T 3            //b+树的度数
 28 #define KeyType int    //数据类型
 29 #define Pointer int
 30 //节点结构体
 31 typedef struct BPNode
 32 {
 33     unsigned int id;//记录这个节点在文件的中的编号
 34     unsigned int n;    //记录这个节点有多少个关键字
 35     int leaf;        //判断是否为页节点
 36     KeyType key[2*T];//该结点的关键字数组(及对应每个孩子节点的中关键字最小的关键字)
 37     Pointer child[2*T];//节点上的子节点指针数组,记录每个孩子在文件的第几个位置
 38     Pointer next;//指针,记录下一个兄弟
 39 }BPNode,*P_BPNode;
 40 
 41 //树的结构体
 42 typedef struct BPTree
 43 {
 44     P_BPNode root;
 45     unsigned int locate;//记录根节点的在文件中的标号,即id
 46     unsigned int num;    //记录更有多少个节点
 47     char name[100];        //用于存储B+树的节点文件的名字
 48     FILE *fp;            //打开写入name文件时,使用
 49     int start;        //最小的数据所在的叶节点
 50 }BPTree,*P_BPTree;
 51 
 52 BPTree indexBPTree;        //全局变量,b+树
 53 
 54 
 55 int writeNode(P_BPNode w)
 56 {
 57     fseek(indexBPTree.fp, sizeof(BPNode)*(w->id - 1) + 2*sizeof(int), SEEK_SET);
 58     fwrite(w, sizeof(BPNode),1,indexBPTree.fp);
 59 
 60     return 0;
 61 }
 62 
 63 int readNode(P_BPNode r, Pointer id)
 64 {
 65     fseek(indexBPTree.fp, (sizeof(BPNode))*(id - 1) + 2*sizeof(int), SEEK_SET);
 66     fread(r, (sizeof(BPNode)),1,indexBPTree.fp);
 67     return 0;
 68 }
 69 int pNode(P_BPNode n);
 70 
 71 int createIndexBPTree (char *tableName, char *attr)    
 72 {//创建B+树,并进行相应的初始化,B+树的结构体是一个全局变量。
 73     P_BPNode root;//创建B+树的根节点
 74     indexBPTree.root = (P_BPNode)malloc(sizeof(BPNode));
 75     indexBPTree.num = 1;//创建时,默认现在B+树仅有一个节点,即根节点
 76     indexBPTree.start = 1;//创建时,默认现在B+树最小叶节点在id=1的根节点上
 77     memcpy(indexBPTree.name, ".\\table\\", sizeof(".\\table\\"));//创建时,默认存储B+树文件的名字
 78     strcat(indexBPTree.name, tableName);
 79     strcat(indexBPTree.name, ".");
 80     strcat(indexBPTree.name, attr);
 81     puts(indexBPTree.name);
 82     root = indexBPTree.root; //创建B+树时,初始化根节点的相关信息
 83     root->n = 0;
 84     root->leaf = 1;
 85     root->next = -1;
 86     root->id = 1;
 87     indexBPTree.locate = 1;
 88 
 89     indexBPTree.fp = fopen(indexBPTree.name, "wb");//将B+树的相关信息写到文件中,根节点信息也落盘
 90     fwrite(&indexBPTree.num, sizeof(int),1,indexBPTree.fp);
 91     fwrite(&indexBPTree.locate, sizeof(int),1,indexBPTree.fp);
 92     writeNode(root);
 93     
 94     
 95     fclose(indexBPTree.fp);
 96     /*
 97     printf("原始:%d\n", indexBPTree.root->next);
 98     memset(indexBPTree.root,0,sizeof(BPNode));
 99     printf("memset后:%d\n", indexBPTree.root->next);
100     indexBPTree.fp = fopen(indexBPTree.name,"r");
101     fread(indexBPTree.root, sizeof(BPNode),1,indexBPTree.fp);
102     fclose(indexBPTree.fp);
103     printf("读取文件后:%d\n", indexBPTree.root->next);
104 
105     */
106     free(root);
107     indexBPTree.root = NULL;
108     return 0;
109 }
110 
111 
112 int splitBPNode (P_BPNode p, P_BPNode c, int i)
113 {//节点的分裂,要求p节点至少还能插入一个节点,c节点是满的,即n为2*T;
114     int j;
115     P_BPNode b;
116     b = (P_BPNode)malloc(sizeof(BPNode));
117     b->leaf = c->leaf;
118     b->n = T;
119     b->id = indexBPTree.num+1;    //为b赋值id号,用于表示该节点,,同时id号就是这个节点在文件的位置
120     b->next = c->next;            //为b的next赋值,即原来的c节点的next
121     //将c节点的后半部分关键字复制给b
122     for (j = 0; j < T; j++)
123     {
124         b->key[j] = c->key[j+T];
125         b->child[j] = c->child[j+T];
126     }
127 
128     //至此b节点的对应元素已经建立好了,但还需要写入文件
129 
130     indexBPTree.num++;
131     c->n = T;    //c节点的关键字数目减半
132     c->next = b->id;
133 
134 
135 
136     //将p节点的i之后的节点后移
137     for (j = p->n - 1; j > i; j--)
138     {
139         p->key[j+1] = p->key[j];
140         p->child[j+1] = p->child[j];
141     }
142     
143     //将b节点插入p中
144     p->key[i+1] = b->key[0];
145     p->child[i+1] = b->id;
146 
147     p->n++;    //p关键字个数加一
148     //写入p
149     writeNode(p);
150     writeNode(c);
151     writeNode(b);
152     free(b);
153     return 0;
154 }//splitBPNode
155 
156 
157 int insertBPNodeNotFull(P_BPNode s, KeyType k, unsigned int id)
158 {//插入,要求s节点不是满的
159 
160     int i = s->n-1;
161 
162     if (s->leaf)
163     {//叶节点,找的合适的位置
164         while (i >= 0 && s->key[i] > k)
165         {
166             s->key[i+1] = s->key[i];
167             s->child[i+1] = s->child[i];
168             i--;
169         }
170 
171         s->key[i+1] = k;
172         s->child[i+1] = id;
173         s->n++;
174         writeNode(s);
175     }
176     else
177     {
178         P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode));
179         while (i >= 0 && s->key[i] > k)
180         {
181             i--;
182         }
183         if (i < 0)
184         {//插入的元素最小,则把这个元素插入第一个节点,并修改对应的key
185             i++;
186             s->key[i] = k;
187         }
188         writeNode(s);
189         readNode(tmp, s->child[i]);        //读取对应的
190         if (tmp->n == 2*T)
191         {
192             splitBPNode(s, tmp, i);
193             if (k > s->key[i+1])
194                 i++;
195             readNode(tmp, s->child[i]);        //重新读取,,有待优化    
196         }
197         insertBPNodeNotFull(tmp, k, id);
198         free(tmp);
199     }
200     
201     return 0;
202 }
203 
204 Pointer equalSearch(P_BPTree tree, KeyType k)
205 {//等值查询,给出key值,查找对应的id,并返回。如果不存在该节点,返回一个负数
206     int i;
207     int result;
208     P_BPNode r;
209     r = tree->root;
210     if (k < r->key[0])    //比最小的节点小
211         return -1;
212     P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode));
213     while (1)
214     {
215         i = r->n - 1;
216         while (i >= 0 && r->key[i] > k)
217             i--;
218 
219         if (r->leaf)    //是叶子,结束
220             break;
221         readNode(tmp, r->child[i]);
222         r = tmp;
223     }//while
224 
225     if (r->key[i] < k)
226         return -1;
227     result = r->child[i];
228 
229     free(tmp);
230     tmp = NULL;
231     return result;
232 }//equalSearch
233 
234 int rangeSearch (P_BPTree tree, KeyType low, KeyType high)
235 {//范围查找,key值大于等于low,小于等于high。返回范围内的个数,
236     unsigned int i;
237     P_BPNode r = tree->root;
238     Pointer *result;
239     P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode));
240 
241     if (high < low)    //low <= high才有能有结果
242         return 0;
243     if (high < r->key[0])
244         return 0;
245 
246     if (low < r->key[0])
247         low = r->key[0];
248     
249     while (1)
250     {
251         i = r->n - 1;
252         while (i >= 0 && r->key[i] > low)
253             i--;
254         if (r->leaf)    //是叶子,结束
255             break;
256         readNode(tmp, r->child[i]);
257         r = tmp;
258     }//while
259     
260     if (r->key[i] < low)
261         i++;
262     unsigned int num=100;
263     result = (Pointer *)malloc(sizeof(Pointer)*num);
264     unsigned int j = 0;
265 
266     while (1)
267     {
268         for (; i < r->n && r->key[i] <= high; i++)
269         {
270             if (j >= num)
271             {
272                 num += 100;
273                 realloc(result, sizeof(Pointer)*num);
274             }
275             result[j++] = r->child[i];
276         //    printf("sid:%d  iid: %d     id:%d\n", r->key[i],r->id, r->child[i]);
277         }
278         if (i <r->n || r->next < 0)
279             break;
280         
281         readNode(tmp, r->next);
282         r = tmp;
283         i = 0;
284     }//while 
285     free(tmp);
286     tmp = NULL;
287     return j;
288 }//rangeSearch
289 
290 int insertKeyInBPTree (P_BPTree tree, KeyType k, Pointer id)
291 {//向树中插入节点
292     P_BPNode r = tree->root;
293 
294     if (equalSearch(tree, k) > 0)
295     {
296         printf("元素已存在!");
297         return -1;
298     }
299 
300     if (tree->root->n == 2*T)
301     {//根节点满了,重新分配根节点,并进行初始化
302         P_BPNode s = (P_BPNode)malloc(sizeof(BPNode));
303         s->leaf = 0;
304         s->n = 1;
305         s->key[0] = r->key[0];
306         s->child[0] = r->id;
307         s->id = tree->num + 1;
308     
309         s->next = -1;
310         //将新的根写入磁盘
311         writeNode(s);
312     
313         tree->num++;
314     
315         writeNode(r);
316 
317         splitBPNode (s, r, 0);
318 
319         //根变为s,所以将新根copy到tree->root指针所指向的内存。(tree->root将一直指向一片开辟了的内存,且时刻保存树根的整个节点)
320         memcpy(tree->root, s, sizeof(BPNode));
321         tree->locate = s->id;
322 
323         insertBPNodeNotFull(s, k, id);
324         free(s);    //释放内存        
325     }
326     else
327         insertBPNodeNotFull(r, k, id);
328     return 0;
329 }//insertBPNode
330 
331 int initIndexBPTree(char *tableName, char *attr)
332 {//初始化BPTree,打开相应文件,fp记录;为root分配内存可以存储一个节点的内存,并读入根节点
333     indexBPTree.root = (P_BPNode)malloc(sizeof(BPNode));
334     indexBPTree.start = 1;
335 
336     memcpy(indexBPTree.name, ".\\table\\", sizeof(".\\table\\"));
337     strcat(indexBPTree.name, tableName);
338     strcat(indexBPTree.name, ".");
339     strcat(indexBPTree.name, attr);
340     indexBPTree.fp = fopen(indexBPTree.name, "rb+");
341     fread(&indexBPTree.num,sizeof(int),1, indexBPTree.fp);
342     fread(&indexBPTree.locate,sizeof(int),1,indexBPTree.fp);
343 
344     readNode(indexBPTree.root, indexBPTree.locate);
345     return 0;
346 }
347 
348 int endBPTree()
349 {//将建立的树结束
350     fseek(indexBPTree.fp, 0, SEEK_SET);
351     fwrite(&indexBPTree.num, sizeof(int),1,indexBPTree.fp);
352     fwrite(&indexBPTree.locate, sizeof(int),1,indexBPTree.fp);
353     free(indexBPTree.root);
354     fclose(indexBPTree.fp);
355     return 0;
356 }
357 
358 int pNode(P_BPNode n)
359 {//输出节点
360     printf("%s  id:%d  next:%d  个数:%d\n" ,n->leaf?"是叶节点":"不是叶节点", n->id, n->next, n->n);
361     for(unsigned int i = 0; i < n->n; i++)
362         printf("  key[%d]:%d\t",i,n->key[i]);
363     puts("");
364     for(i = 0; i < n->n; i++)
365         printf("child[%d]:%d\t",i,n->child[i]);
366     puts("");
367     return 0;
368 }//pNode
369 
370 int replaceKeyInBPTree(P_BPTree tree, KeyType oldkey, KeyType newkey)
371 {//将oldkey替换为newkey
372     P_BPNode r = tree->root;
373     int i;
374     P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode));
375 
376     while (1)
377     {
378         i = r->n - 1;
379         while (i >= 0 && r->key[i] > oldkey)
380             i--;
381         if (r->key[i] == oldkey)
382         {
383             r->key[i] = newkey;
384             writeNode(r);
385         }
386 
387         if (r->leaf)
388             break;
389         readNode(tmp, r->child[i]);
390         r = tmp;
391     }
392     free(tmp);
393     return 0;
394 }
395 
396 int adjustToDel(P_BPNode p, P_BPNode x, unsigned int i)
397 {//p指向x的父节点,i指的是,x是p的下标
398     unsigned int j;
399     P_BPNode left = NULL;
400     P_BPNode right = NULL;
401     P_BPNode tmp = (P_BPNode)malloc(sizeof(BPNode));
402 
403     if    (i > 0 )    //x有左兄弟
404     {
405         readNode(tmp, p->child[i-1]);
406         left = tmp;
407         if (left->n > T)
408         {
409             for (j = x->n; j > 0; j--)
410             {
411                 x->key[j] = x->key[j-1];
412                 x->child[j] = x->child[j-1];
413             }
414             x->n++;
415             x->key[0] = left->key[left->n-1];
416             x->child[0] = left->child[left->n-1];
417             writeNode(x);
418 
419             left->n--;
420             writeNode(left);
421             
422             p->key[i] = x->key[0];
423             writeNode(p);
424 
425             return 0;
426         }
427     }//if
428 
429     if (i < p->n - 1) //x有又兄弟
430     {
431         readNode(tmp, p->child[i+1]);
432         right = tmp;
433         left = NULL;
434         if (right->n > T)
435         {
436             x->key[x->n] = right->key[0];
437             x->child[x->n] = right->child[0];
438             x->n++;
439             writeNode(x);
440 
441             for (j = 0; j < right->n-1; j++)
442             {
443                 right->key[j] = right->key[j+1];
444                 right->child[j] = right->child[j+1];
445             }
446             right->n--;
447             writeNode(right);
448 
449             p->key[i+1] = right->key[0];
450             writeNode(p);
451             return 0;
452         }
453     }
454 
455     if (left == tmp)
456     {
457         for (j = 0; j < T; j++)
458         {
459             left->key[T+j] = x->key[j];
460             left->child[T+j] = x->child[j];
461         }
462         left->n += T;
463         left->next = x->next;
464         writeNode(left);
465 
466         for (j = i; j < p->n - 1; j++)
467         {
468             p->key[j] = p->key[j+1];
469             p->child[j] = p->child[j+1];
470         }
471         p->n--;
472         writeNode(p);
473         memcpy(x, left, sizeof(BPNode));
474     }
475     else
476     {
477         for (j = 0; j < T; j++)
478         {
479             x->key[T+j] = right->key[j];
480             x->child[T+j] = right->child[j];
481         }
482         x->n += T;
483         x->next = right->next;
484         writeNode(x);
485 
486         for (j = i+1; j < p->n -1; j++)
487         {
488             p->key[j] = p->key[j+1];
489             p->child[j] = p->child[j+1];
490         }
491         p->n--;
492         writeNode(p);
493     }
494     free(tmp);
495     left = right = tmp = NULL;
496     return 0;
497 }
498 //调用这个函数是,参数节点p,必须满足相应的要求:
499 //①如果p是根节点且是叶子节点,则没有要求
500 //②如果p是根节点(非叶),则p节点的子节点个数不小于2(B+树本身就满足这个要求)。
501 //③如果p是非根节点,则节点p的子节点个数必须大于T
502 KeyType delKeyInBPNode(P_BPNode p, KeyType k)
503 {//以这个节点为起点,找到k并删除。要求确保k存在
504     unsigned int i;
505     unsigned int j;
506     i = p->n - 1;
507 
508     while (p->key[i] > k)
509         i--;
510 
511     //是叶节点(如果p本身又是根节点,则这个是情况①)
512     if (p->leaf)
513     {
514         for (j = i; j < p->n-1; j++)
515         {
516             p->key[j] = p->key[j+1];
517             p->child[j] = p->child[j+1];
518         }//while
519         p->n--;
520         writeNode(p);
521         if (i == 0)    //说明删除的关键字是该节点中最小的
522             replaceKeyInBPTree(&indexBPTree, k, p->key[i]);
523         return p->key[i];
524     }//if
525     
526 
527     //p是内节点
528     P_BPNode x;
529 
530     x = (P_BPNode)malloc(sizeof(BPNode));
531     
532     readNode(x, p->child[i]);
533     if (x->n > T)    //x的子节点的个数大于T,则直接调用
534         return delKeyInBPNode(x, k);
535     else            //x的子节点的个数等于T,需要调整
536     {
537         adjustToDel(p, x, i);
538         return delKeyInBPNode(x, k);
539     }//else
540 
541 }//delKeyInNode
542 
543 int delKeyInBPTree(P_BPTree tree, KeyType k)
544 {//1.如果一个根节点同时又是叶节点,则没有子节点限制(这个子节点指向的不再是树的节点)
545  //2.非叶根节点至少保持有两个子节点,其他的节点至少有T个子节点。 
546  
547     if (equalSearch(tree, k) < 0)    //检查是否有k这个关键字
548         return -1;
549 
550     P_BPNode r = tree->root;
551 
552     delKeyInBPNode(r, k);
553     if (r->n == 1)
554     {
555         tree->locate = r->child[0];
556         readNode(tree->root,r->child[0]);    //更换根节点
557         tree->num--;
558         //还应该将原始的根节点从磁盘上删除
559     }
560     r = NULL;
561     return 0;
562 }//delKeyInBPTree
563 
564 int main ()
565 {
566     unsigned int i = 1;
567 //    createIndexBPTree("student","sid");
568     initIndexBPTree("student", "sid");
569 /*
570     insertKeyInBPTree(&indexBPTree, 50, i++);
571     insertKeyInBPTree(&indexBPTree, 30, i++);
572     insertKeyInBPTree(&indexBPTree, 60, i++);
573     insertKeyInBPTree(&indexBPTree, 10, i++);
574     insertKeyInBPTree(&indexBPTree, 90, i++);
575     insertKeyInBPTree(&indexBPTree, 40, i++);
576     insertKeyInBPTree(&indexBPTree, 100, i++);
577 
578 
579 
580     insertKeyInBPTree(&indexBPTree, 110, i++);
581     insertKeyInBPTree(&indexBPTree, 150, i++);
582     insertKeyInBPTree(&indexBPTree, 200, i++);
583     insertKeyInBPTree(&indexBPTree, 0, i++);
584     insertKeyInBPTree(&indexBPTree, 49, i++);
585     insertKeyInBPTree(&indexBPTree, 45, i++);
586     insertKeyInBPTree(&indexBPTree, -1, i++);
587     insertKeyInBPTree(&indexBPTree, 210, i++);
588     insertKeyInBPTree(&indexBPTree, 220, i++);
589     insertKeyInBPTree(&indexBPTree, 230, i++);
590     insertKeyInBPTree(&indexBPTree, 240, i++);
591     insertKeyInBPTree(&indexBPTree, 250, i++);
592     insertKeyInBPTree(&indexBPTree, 260, i++);
593     insertKeyInBPTree(&indexBPTree, 270, i++);
594     insertKeyInBPTree(&indexBPTree, 280, i++);
595     insertKeyInBPTree(&indexBPTree, 290, i++);
596 
597 
598     insertKeyInBPTree(&indexBPTree, 300, i++);
599     insertKeyInBPTree(&indexBPTree, 310, i++);
600     insertKeyInBPTree(&indexBPTree, 320, i++);
601     insertKeyInBPTree(&indexBPTree, 330, i++);
602     insertKeyInBPTree(&indexBPTree, 340, i++);
603     insertKeyInBPTree(&indexBPTree, 350, i++);
604     insertKeyInBPTree(&indexBPTree, 360, i++);
605     insertKeyInBPTree(&indexBPTree, -360, i++);
606 */
607     /*
608     KeyType k;
609     k = 340;
610 
611     int s = equalSearch(&indexBPTree, k);
612     if (s > 0)
613         printf("id:%d\n",s);
614     else
615         printf("不存在元素:%d\n",k); 
616     */
617 
618 //    rangeSearch(&indexBPTree, 500, 500);
619 /*
620     insertKeyInBPTree(&indexBPTree, 50, i++);
621     insertKeyInBPTree(&indexBPTree, 55, i++);
622     insertKeyInBPTree(&indexBPTree, 60, i++);
623     insertKeyInBPTree(&indexBPTree, 100, i++);
624     insertKeyInBPTree(&indexBPTree, 110, i++);
625     insertKeyInBPTree(&indexBPTree, 115, i++);
626     insertKeyInBPTree(&indexBPTree, 200, i++);
627 
628     insertKeyInBPTree(&indexBPTree, 210, i++);
629     insertKeyInBPTree(&indexBPTree, 215, i++);
630     insertKeyInBPTree(&indexBPTree, 220, i++);
631 */
632     
633     P_BPNode t = (P_BPNode)malloc(sizeof(BPNode));
634 
635     printf("个数:%d  根id:%d\n", indexBPTree.num, indexBPTree.locate);
636     pNode(indexBPTree.root);
637     
638     
639     delKeyInBPTree(&indexBPTree,100);
640     printf("%d\n\n",indexBPTree.num);
641     
642     pNode(indexBPTree.root);
643     
644     for (i = 1; i < indexBPTree.num; i++)
645     {
646         readNode(t,i); 
647         pNode(t);
648     }
649     endBPTree();
650     return 0;
651 }