B+Tree原理及实现

一, B+Tree的定义:

  • 分为叶子节点, 分支节点 (非叶子节点) 和根节点
  • 分支节点只储存索引值和节点指针, 指针是指向具体节点的内存地址; 叶子节点存有索引值和数据; 根节点和分支节点类似, 不同的是每次检索数据都是从根节点开始
  • 叶子节点之间会构成一个单向链表

二, B+Tree的结构图:

B+Tree结构图

三, B+Tree的应用场景:

一般应用在mysql的存储引擎innodb, 其优点是检索数据效率快; 相比二叉平衡树, 二叉平衡树在顺序插入时会形成一个链表, 检索性能降低, 在数据量多的情况下, 层级较深, 检索速度慢; 使用B+Tree则可以避免上述问题, 因为其每个节点都会有上限索引值, 比如设置最大度数位3阶, 意味着每个节点最多两个索引值, 和三个指针

四, B+Tree的代码实现:

代码分析:

索引类:

  • data : 存储的具体数据( 只有叶子节点才会有值)
  • indexVal : 索引值
  • left : 当前索引左边的节点指针
  • right : 当前索引右边的节点指针

节点类:

  • id : 节点指针(节点的内存地址)
  • parent : 父节点的指针
  • isLeaf : 是否是叶子节点
  • indexNum : 当前树的颗粒度(阶), 索引数大于等这个值则分裂
  • indexMap : 索引对象数组
  • next : 下一个兄弟节点的ID值(该属性仅针对叶子节点)

使用PHP代码实现B+Tree结构:

代码参考网上的文章, 但是跟实际结构有偏差, 所以自己做了优化调整, 支持多阶指针, 符合B+Tree官方结构图(https://www.cs.usfca.edu/~galles/visualization/BPlusTree.html)

<?php
/**
 * Class Data
 * 数据类
 */
class Data
{
    public $data = [];

    private $indexKey;

    public function __construct(array $data, $indexKey = 'index')
    {
        $this->data = $data;
        $this->indexKey = $indexKey;
    }

    public function getIndexVal()
    {
        return isset($this->data[$this->indexKey]) ? $this->data[$this->indexKey] : 0;
    }

    public function __toString()
    {
        return json_encode($this->data);
    }
}

/**
 * Class Index
 * 索引对象
 */
class Index
{
    /**
     * @var int
     * 索引值
     */
    private $indexVal;

    /**
     * @var Data
     * 索引指向的具体数据,在叶节点中该属性才有值
     */
    private $data;

    /**
     * @var int
     * 索引左边的BTNode的ID
     */
    private $left;

    /**
     * @var int
     * 索引右边的BTNode的ID
     */
    private $right;

    public function __construct($indexVal = 0, $left = 0, $right = 0, $data = null)
    {
        $this->indexVal = $indexVal;
        $this->left = $left;
        $this->right = $right;
        $this->data = $data;
    }

    public function getIndexVal()
    {
        return $this->indexVal;
    }

    public function getLeft()
    {
        return $this->left;
    }

    public function getRight()
    {
        return $this->right;
    }

    /**
     * @return Data
     */
    public function getData()
    {
        return $this->data;
    }

    public function updateLeft($pointer)
    {
        $this->left = $pointer;
    }

    public function updateRight($pointer)
    {
        $this->right = $pointer;
    }
}

/**
 * Class BTNode
 * B+树节点
 */
class BTNode
{
    /**
     * @var int
     * 标识节点对象的唯一值
     */
    private $id = 0;

    /**
     * @var int
     * 父节点的ID值
     */
    public $parent = 0;

    /**
     * @var bool
     * 是否是叶节点
     */
    public $isLeaf = false;

    /**
     * @var int
     * 当前树的颗粒度(阶), 索引数大于等这个值则分裂
     */
    public $indexNum = 0;

    /**
     * @var array
     * 索引对象列表
     */
    private $indexMap = [];

    /**
     * @var int
     * 下一个兄弟节点的ID值(该属性仅针对叶子节点)
     */
    public $next = 0;

    public function __construct($isLeaf = false, $parent = 0)
    {
        $this->init($isLeaf, $parent);
    }

    protected function init($isLeaf, $parent)
    {
        $this->id = uniqid();
        $this->isLeaf = $isLeaf;
        $this->parent = $parent;
        $this->indexMap = [];
    }

    public function getID()
    {
        return $this->id;
    }

    /**
     * @param Index $index
     * 向树节点中添加新的索引对象,添加完成后需要按索引值升序排序
     */
    public function addIndex(Index $index)
    {
        array_push($this->indexMap, $index);
        usort($this->indexMap, function (Index $a, Index $b) {
            if ($a->getIndexVal() == $b->getIndexVal()) {
                return 0;
            }
            return $a->getIndexVal() > $b->getIndexVal() ? 1 : -1;
        });
        $this->indexNum++;
    }

    /**
     * @return IndexMapIterator
     * 生成indexMap的迭代器
     */
    public function generateIndexMapIterator()
    {
        return new IndexMapIterator($this->indexMap);
    }

    /**
     * @param $order
     * @return bool
     * 判断该节点是否已满,当前的索引对象树超过树的阶即为满.
     */
    public function isFull($order)
    {
        return $this->indexNum >= $order;
    }

    public function deleteMap($start)
    {
        $count = 0;
        for ($i = $start; $i < $this->indexNum; $i++) {
            $count++;
            unset($this->indexMap[$i]);
        }

        $this->indexNum = $this->indexNum - $count;
    }

    public function updateParent($id)
    {
        $this->parent = $id;
    }

    public function setNext($id)
    {
        $this->next = $id;
    }
}

/**
 * Class BPlusTree
 * B+树
 */
class BPlusTree
{
    /**
     * @var int
     * 根节点ID
     */
    public $root = 0;

    /**
     * @var array
     * 节点池: 节点的id为key, 节点对象为value
     */
    private $nodeMap = [];

    /**
     * @var int
     * B+树的阶
     */
    private $order;

    public function __construct($order = 3)
    {
        $this->order = $order;
    }

    /**
     * @param Data $record
     * 写入数据
     */
    public function insert(Data $record)
    {
        $indexVal = $record->getIndexVal();

        if ($this->isEmpty()) {
            //树为空,直接创建一个根节点,此节点是叶节点.
            $node = new BTNode(true, 0);
            $node->addIndex(new Index($indexVal, 0, 0, $record->data));
            $this->storeNode($node);
            $this->root = $node->getID();
        } else {
            $tmpNode = $this->getNodeByID($this->root);
            $prevNode = $tmpNode;

            //定位需要插入的叶节点
            while ($tmpNode != null) {
                $prevNode = $tmpNode;
                $indexMapIterator = $tmpNode->generateIndexMapIterator();

                $left = false;
                while ($indexMapIterator->valid()) {
                    $indexObj = $indexMapIterator->current();

                    if ($indexVal > $indexObj->getIndexVal()) {
                        $indexMapIterator->next();
                    } elseif ($indexVal == $indexObj->getIndexVal()) {
                        //树中已经存在相同的索引,不做处理.
                        return false;
                    } else {
                        $left = true;
                        break;
                    }
                }

                if ($left) {
                    $tmpNode = $this->getNodeByID($indexObj->getLeft());
                } else {
                    $indexMapIterator->prev();
                    $currentIndex = $indexMapIterator->current();
                    $tmpNode = $this->getNodeByID($currentIndex->getRight());
                }
            }

            //叶子节点中保存具体的值.
            $prevNode->addIndex(new Index($indexVal, 0, 0, $record->data));

            //树节点需要分裂
            if ($prevNode->isFull($this->order)) {
                $this->split($prevNode);
            }
        }
    }

    /**
     * @param BTNode $node
     * 分裂节点
     *
     */
    protected function split(BTNode $node)
    {
        //获取中间索引,创建新的索引
        $middle = intval($node->indexNum/2);
        $middleIndexValue = 0;

        $pid = $node->parent;

        //分裂节点为根节点时,树高度+1,创建新节点作为根节点.
        if ($pid == 0) {
            $parent = new BTNode(false, 0);
            $this->storeNode($parent);
            $pid = $parent->getID();

            //新节点作为根节点
            $this->root = $pid;
        }
        $parent = $this->getNodeByID($pid);

        $newNode = new BTNode($node->isLeaf, $pid);
        $this->storeNode($newNode);

        $indexMapIterator = $node->generateIndexMapIterator();
        while ($indexMapIterator->valid()) {
            //将中间索引及之后的索引,移动到新节点
            $indexObj = $indexMapIterator->current();
            if ($newNode->isLeaf) {
                if ($indexMapIterator->key() >= $middle) {
                    $newNode->addIndex(new Index($indexObj->getIndexVal(), $indexObj->getLeft(), $indexObj->getRight(), $indexObj->getData()));
                }
            } else {
                if ($indexMapIterator->key() > $middle) {
                    $newNode->addIndex(new Index($indexObj->getIndexVal(), $indexObj->getLeft(), $indexObj->getRight(), $indexObj->getData()));
                    //修改当前索引下节点的父节点
                    $sonLeftNode = $this->getNodeByID($indexObj->getLeft());
                    $sonLeftNode->updateParent($newNode->getID());
                    $sonRightNode = $this->getNodeByID($indexObj->getRight());
                    $sonRightNode->updateParent($newNode->getID());
                }
            }
            if ($indexMapIterator->key() == $middle) {
                $middleIndexValue = $indexObj->getIndexVal();
            }
            $indexMapIterator->next();
        }

        //原节点的父节点更新为新的父节点(原节点为根节点时,会重新创建根节点,此时原节点的父节点是这个新的根节点)
        $node->updateParent($pid);

        //原节点分裂后,中间索引及之后的索引都被移动到了新节点,所以把移动的索引在原节点中删除
        $node->deleteMap($middle);

        //B+树的叶子节点之间形成一个链表,在原节点分裂后,原节点的next指向新节点,新节点的next指向原节点的next
        if ($node->isLeaf) {
            $newNode->setNext($node->next);
            $node->setNext($newNode->getID());
        }

        //向分裂节点的父节点添加索引对象,该索引对象的索引值是分裂节点的中间索引值,指向的是新创建的树节点和原节点
        $parent->addIndex(new Index($middleIndexValue, $node->getID(), $newNode->getID()));
        //调整父节点索引的指针
        $parentIndexMapIterator = $parent->generateIndexMapIterator();
        while ($parentIndexMapIterator->valid()) {
            $currentIndexObj = $parentIndexMapIterator->current();
            if ($parentIndexMapIterator->key() > 0) {
                $parentIndexMapIterator->prev();
                $prevIndexObj = $parentIndexMapIterator->current();
                $prevRight = $prevIndexObj->getRight();
                $currentIndexObj->updateLeft($prevRight);
                $parentIndexMapIterator->next();
            }
            $parentIndexMapIterator->next();
        }

        //若分裂节点的父节点索引达到上限,需要分裂父节点
        if ($parent->isFull($this->order)) {
            $this->split($parent);
        }
    }

    /**
     * @param $index
     * @return Data|string
     * 索引单条查询
     */
    public function find($indexVal)
    {
        $tmpNode = $this->getNodeByID($this->root);
        while ($tmpNode != null) {
            $indexMapIterator = $tmpNode->generateIndexMapIterator();
            $left = false;
            while ($indexMapIterator->valid()) {
                $indexObj = $indexMapIterator->current();
                if ($indexVal > $indexObj->getIndexVal()) {
                    $indexMapIterator->next();
                } elseif ($indexVal == $indexObj->getIndexVal()) {
                    //只有叶节点中索引值相同的索引对象才持有具体数据.
                    if ($tmpNode->isLeaf) {
                        return $indexObj->getData();
                    } else {
                        $indexMapIterator->next();
                    }
                } else {
                    $left = true;
                    break;
                }
            }
            if ($left) {
                $tmpNode = $this->getNodeByID($indexObj->getLeft());
            } else {
                $indexMapIterator->prev();
                $currentIndex = $indexMapIterator->current();
                $tmpNode = $this->getNodeByID($currentIndex->getRight());
            }
        }

        return 'record ['.$indexVal. '] is not exists!';
    }

    /**
     * @param $start
     * @param $end
     * @return array
     * 范围查询
     */
    public function rangeFind($start, $end)
    {
        $index = $start;

        $tmpNode = $this->getNodeByID($this->root);
        $prevNode = $tmpNode;

        //根据start索引,定位到叶节点链表开始的节点.
        while ($tmpNode != null) {
            $prevNode = $tmpNode;
            $indexMapIterator = $tmpNode->generateIndexMapIterator();
            $left = false;
            while ($indexMapIterator->valid()) {
                $indexObj = $indexMapIterator->current();
                if ($index >= $indexObj->getIndexVal()) {
                    $indexMapIterator->next();
                } else {
                    $left = true;
                    break;
                }
            }
            if ($left) {
                $tmpNode = $this->getNodeByID($indexObj->getLeft());
            } else {
                $indexMapIterator->prev();
                $currentIndex = $indexMapIterator->current();
                $tmpNode = $this->getNodeByID($currentIndex->getRight());
            }
        }

        $tNode = $prevNode;
        $resultData  = [];

        //从定位到的节点,遍历叶节点链表,查询出范围内的记录
        while ($tNode != null) {
            $indexMapIterator = $tNode->generateIndexMapIterator();
            while ($indexMapIterator->valid()) {
                $indexObj = $indexMapIterator->current();
                if ($indexObj->getIndexVal() > $end) {
                    break 2;
                }
                if ($indexObj->getIndexVal() >= $start) {
                    array_push($resultData, $indexObj->getData());
                }
                $indexMapIterator->next();
            }
            $tNode = $this->getNodeByID($tNode->next);
        }

        return $resultData;
    }

    public function isEmpty()
    {
        return $this->root === 0;
    }

    /**
     * @param BTNode $node
     * 以节点的id为key, 节点对象为value, 保存到节点池中.
     */
    private function storeNode(BTNode $node)
    {
        $id = $node->getID();
        $this->nodeMap[$id] = $node;
    }

    /**
     * @param $id
     * @return BTNode
     */
    public function getNodeByID($id)
    {
        return isset($this->nodeMap[$id]) ? $this->nodeMap[$id] : null;
    }

    /**
     * @param bool $onlyLeafNode
     * 打印所有节点
     */
    public function dumpNodeMap($onlyLeafNode = false)
    {
        // echo '<pre>';
        foreach ($this->nodeMap as $eachNode) {
            // if (!$onlyLeafNode || $eachNode->isLeaf) {
            //     print_r($eachNode);
            // }
            if (!$eachNode->isLeaf) {
                print_r($eachNode);
            }
        }
    }
}

/**
 * Class IndexMapIterator
 * BTNode的indexMap的迭代器
 */
class IndexMapIterator
{
    private $indexMap = [];
    private $position = 0;

    public function __construct($indexMap = [])
    {
        $this->indexMap = $indexMap;
        $this->position = 0;
    }

    /**
     * @return Index
     */
    public function current()
    {
        return $this->indexMap[$this->position];
    }

    public function next()
    {
        $this->position++;
    }

    public function prev()
    {
        $this->position--;
    }

    public function key()
    {
        return $this->position;
    }

    public function valid()
    {
        return isset($this->indexMap[$this->position]);
    }

    public function rewind()
    {
        $this->position = 0;
    }
}

$dataList = [
    ['id' => 10, 'name' => 'name_10', 'age' => 28],
    ['id' => 20, 'name' => 'name_20', 'age' => 23],
    ['id' => 30, 'name' => 'name_30', 'age' => 25],
    ['id' => 40, 'name' => 'name_40', 'age' => 24],
    ['id' => 5, 'name' => 'name_5', 'age' => 18],
    ['id' => 15, 'name' => 'name_15', 'age' => 22],
    ['id' => 50, 'name' => 'name_50', 'age' => 21],
    ['id' => 35, 'name' => 'name_35', 'age' => 24],
    ['id' => 60, 'name' => 'name_60', 'age' => 23],
    ['id' => 45, 'name' => 'name_45', 'age' => 23],
    ['id' => 22, 'name' => 'name_22', 'age' => 19],
];

$btree = new BPlusTree(5);

foreach ($dataList as $value) {
    $data = new Data($value, 'id');
    $btree->insert($data);
}

// var_dump($btree->find(22));
print_r($btree);
var_dump($btree->rangeFind(40, 70));
// $btree->dumpNodeMap();
die;

posted @ 2022-05-11 17:32  吹_神  阅读(1060)  评论(1)    收藏  举报