B+Tree原理及实现
一, B+Tree的定义:
- 分为叶子节点, 分支节点 (非叶子节点) 和根节点
- 分支节点只储存索引值和节点指针, 指针是指向具体节点的内存地址; 叶子节点存有索引值和数据; 根节点和分支节点类似, 不同的是每次检索数据都是从根节点开始
- 叶子节点之间会构成一个单向链表
二, B+Tree的结构图:


三, B+Tree的应用场景:
一般应用在mysql的存储引擎innodb, 其优点是检索数据效率快; 相比二叉平衡树, 二叉平衡树在顺序插入时会形成一个链表, 检索性能降低, 在数据量多的情况下, 层级较深, 检索速度慢; 使用B+Tree则可以避免上述问题, 因为其每个节点都会有上限索引值, 比如设置最大度数位3阶, 意味着每个节点最多两个索引值, 和三个指针
四, B+Tree的代码实现:
代码分析:
索引类:
- data : 存储的具体数据( 只有叶子节点才会有值)
- indexVal : 索引值
- left : 当前索引左边的节点指针
- right : 当前索引右边的节点指针
节点类:
- id : 节点指针(节点的内存地址)
- parent : 父节点的指针
- isLeaf : 是否是叶子节点
- indexNum : 当前树的颗粒度(阶), 索引数大于等这个值则分裂
- indexMap : 索引对象数组
- next : 下一个兄弟节点的ID值(该属性仅针对叶子节点)
使用PHP代码实现B+Tree结构:
代码参考网上的文章, 但是跟实际结构有偏差, 所以自己做了优化调整, 支持多阶指针, 符合B+Tree官方结构图(https://www.cs.usfca.edu/~galles/visualization/BPlusTree.html)
<?php
/**
* Class Data
* 数据类
*/
class Data
{
public $data = [];
private $indexKey;
public function __construct(array $data, $indexKey = 'index')
{
$this->data = $data;
$this->indexKey = $indexKey;
}
public function getIndexVal()
{
return isset($this->data[$this->indexKey]) ? $this->data[$this->indexKey] : 0;
}
public function __toString()
{
return json_encode($this->data);
}
}
/**
* Class Index
* 索引对象
*/
class Index
{
/**
* @var int
* 索引值
*/
private $indexVal;
/**
* @var Data
* 索引指向的具体数据,在叶节点中该属性才有值
*/
private $data;
/**
* @var int
* 索引左边的BTNode的ID
*/
private $left;
/**
* @var int
* 索引右边的BTNode的ID
*/
private $right;
public function __construct($indexVal = 0, $left = 0, $right = 0, $data = null)
{
$this->indexVal = $indexVal;
$this->left = $left;
$this->right = $right;
$this->data = $data;
}
public function getIndexVal()
{
return $this->indexVal;
}
public function getLeft()
{
return $this->left;
}
public function getRight()
{
return $this->right;
}
/**
* @return Data
*/
public function getData()
{
return $this->data;
}
public function updateLeft($pointer)
{
$this->left = $pointer;
}
public function updateRight($pointer)
{
$this->right = $pointer;
}
}
/**
* Class BTNode
* B+树节点
*/
class BTNode
{
/**
* @var int
* 标识节点对象的唯一值
*/
private $id = 0;
/**
* @var int
* 父节点的ID值
*/
public $parent = 0;
/**
* @var bool
* 是否是叶节点
*/
public $isLeaf = false;
/**
* @var int
* 当前树的颗粒度(阶), 索引数大于等这个值则分裂
*/
public $indexNum = 0;
/**
* @var array
* 索引对象列表
*/
private $indexMap = [];
/**
* @var int
* 下一个兄弟节点的ID值(该属性仅针对叶子节点)
*/
public $next = 0;
public function __construct($isLeaf = false, $parent = 0)
{
$this->init($isLeaf, $parent);
}
protected function init($isLeaf, $parent)
{
$this->id = uniqid();
$this->isLeaf = $isLeaf;
$this->parent = $parent;
$this->indexMap = [];
}
public function getID()
{
return $this->id;
}
/**
* @param Index $index
* 向树节点中添加新的索引对象,添加完成后需要按索引值升序排序
*/
public function addIndex(Index $index)
{
array_push($this->indexMap, $index);
usort($this->indexMap, function (Index $a, Index $b) {
if ($a->getIndexVal() == $b->getIndexVal()) {
return 0;
}
return $a->getIndexVal() > $b->getIndexVal() ? 1 : -1;
});
$this->indexNum++;
}
/**
* @return IndexMapIterator
* 生成indexMap的迭代器
*/
public function generateIndexMapIterator()
{
return new IndexMapIterator($this->indexMap);
}
/**
* @param $order
* @return bool
* 判断该节点是否已满,当前的索引对象树超过树的阶即为满.
*/
public function isFull($order)
{
return $this->indexNum >= $order;
}
public function deleteMap($start)
{
$count = 0;
for ($i = $start; $i < $this->indexNum; $i++) {
$count++;
unset($this->indexMap[$i]);
}
$this->indexNum = $this->indexNum - $count;
}
public function updateParent($id)
{
$this->parent = $id;
}
public function setNext($id)
{
$this->next = $id;
}
}
/**
* Class BPlusTree
* B+树
*/
class BPlusTree
{
/**
* @var int
* 根节点ID
*/
public $root = 0;
/**
* @var array
* 节点池: 节点的id为key, 节点对象为value
*/
private $nodeMap = [];
/**
* @var int
* B+树的阶
*/
private $order;
public function __construct($order = 3)
{
$this->order = $order;
}
/**
* @param Data $record
* 写入数据
*/
public function insert(Data $record)
{
$indexVal = $record->getIndexVal();
if ($this->isEmpty()) {
//树为空,直接创建一个根节点,此节点是叶节点.
$node = new BTNode(true, 0);
$node->addIndex(new Index($indexVal, 0, 0, $record->data));
$this->storeNode($node);
$this->root = $node->getID();
} else {
$tmpNode = $this->getNodeByID($this->root);
$prevNode = $tmpNode;
//定位需要插入的叶节点
while ($tmpNode != null) {
$prevNode = $tmpNode;
$indexMapIterator = $tmpNode->generateIndexMapIterator();
$left = false;
while ($indexMapIterator->valid()) {
$indexObj = $indexMapIterator->current();
if ($indexVal > $indexObj->getIndexVal()) {
$indexMapIterator->next();
} elseif ($indexVal == $indexObj->getIndexVal()) {
//树中已经存在相同的索引,不做处理.
return false;
} else {
$left = true;
break;
}
}
if ($left) {
$tmpNode = $this->getNodeByID($indexObj->getLeft());
} else {
$indexMapIterator->prev();
$currentIndex = $indexMapIterator->current();
$tmpNode = $this->getNodeByID($currentIndex->getRight());
}
}
//叶子节点中保存具体的值.
$prevNode->addIndex(new Index($indexVal, 0, 0, $record->data));
//树节点需要分裂
if ($prevNode->isFull($this->order)) {
$this->split($prevNode);
}
}
}
/**
* @param BTNode $node
* 分裂节点
*
*/
protected function split(BTNode $node)
{
//获取中间索引,创建新的索引
$middle = intval($node->indexNum/2);
$middleIndexValue = 0;
$pid = $node->parent;
//分裂节点为根节点时,树高度+1,创建新节点作为根节点.
if ($pid == 0) {
$parent = new BTNode(false, 0);
$this->storeNode($parent);
$pid = $parent->getID();
//新节点作为根节点
$this->root = $pid;
}
$parent = $this->getNodeByID($pid);
$newNode = new BTNode($node->isLeaf, $pid);
$this->storeNode($newNode);
$indexMapIterator = $node->generateIndexMapIterator();
while ($indexMapIterator->valid()) {
//将中间索引及之后的索引,移动到新节点
$indexObj = $indexMapIterator->current();
if ($newNode->isLeaf) {
if ($indexMapIterator->key() >= $middle) {
$newNode->addIndex(new Index($indexObj->getIndexVal(), $indexObj->getLeft(), $indexObj->getRight(), $indexObj->getData()));
}
} else {
if ($indexMapIterator->key() > $middle) {
$newNode->addIndex(new Index($indexObj->getIndexVal(), $indexObj->getLeft(), $indexObj->getRight(), $indexObj->getData()));
//修改当前索引下节点的父节点
$sonLeftNode = $this->getNodeByID($indexObj->getLeft());
$sonLeftNode->updateParent($newNode->getID());
$sonRightNode = $this->getNodeByID($indexObj->getRight());
$sonRightNode->updateParent($newNode->getID());
}
}
if ($indexMapIterator->key() == $middle) {
$middleIndexValue = $indexObj->getIndexVal();
}
$indexMapIterator->next();
}
//原节点的父节点更新为新的父节点(原节点为根节点时,会重新创建根节点,此时原节点的父节点是这个新的根节点)
$node->updateParent($pid);
//原节点分裂后,中间索引及之后的索引都被移动到了新节点,所以把移动的索引在原节点中删除
$node->deleteMap($middle);
//B+树的叶子节点之间形成一个链表,在原节点分裂后,原节点的next指向新节点,新节点的next指向原节点的next
if ($node->isLeaf) {
$newNode->setNext($node->next);
$node->setNext($newNode->getID());
}
//向分裂节点的父节点添加索引对象,该索引对象的索引值是分裂节点的中间索引值,指向的是新创建的树节点和原节点
$parent->addIndex(new Index($middleIndexValue, $node->getID(), $newNode->getID()));
//调整父节点索引的指针
$parentIndexMapIterator = $parent->generateIndexMapIterator();
while ($parentIndexMapIterator->valid()) {
$currentIndexObj = $parentIndexMapIterator->current();
if ($parentIndexMapIterator->key() > 0) {
$parentIndexMapIterator->prev();
$prevIndexObj = $parentIndexMapIterator->current();
$prevRight = $prevIndexObj->getRight();
$currentIndexObj->updateLeft($prevRight);
$parentIndexMapIterator->next();
}
$parentIndexMapIterator->next();
}
//若分裂节点的父节点索引达到上限,需要分裂父节点
if ($parent->isFull($this->order)) {
$this->split($parent);
}
}
/**
* @param $index
* @return Data|string
* 索引单条查询
*/
public function find($indexVal)
{
$tmpNode = $this->getNodeByID($this->root);
while ($tmpNode != null) {
$indexMapIterator = $tmpNode->generateIndexMapIterator();
$left = false;
while ($indexMapIterator->valid()) {
$indexObj = $indexMapIterator->current();
if ($indexVal > $indexObj->getIndexVal()) {
$indexMapIterator->next();
} elseif ($indexVal == $indexObj->getIndexVal()) {
//只有叶节点中索引值相同的索引对象才持有具体数据.
if ($tmpNode->isLeaf) {
return $indexObj->getData();
} else {
$indexMapIterator->next();
}
} else {
$left = true;
break;
}
}
if ($left) {
$tmpNode = $this->getNodeByID($indexObj->getLeft());
} else {
$indexMapIterator->prev();
$currentIndex = $indexMapIterator->current();
$tmpNode = $this->getNodeByID($currentIndex->getRight());
}
}
return 'record ['.$indexVal. '] is not exists!';
}
/**
* @param $start
* @param $end
* @return array
* 范围查询
*/
public function rangeFind($start, $end)
{
$index = $start;
$tmpNode = $this->getNodeByID($this->root);
$prevNode = $tmpNode;
//根据start索引,定位到叶节点链表开始的节点.
while ($tmpNode != null) {
$prevNode = $tmpNode;
$indexMapIterator = $tmpNode->generateIndexMapIterator();
$left = false;
while ($indexMapIterator->valid()) {
$indexObj = $indexMapIterator->current();
if ($index >= $indexObj->getIndexVal()) {
$indexMapIterator->next();
} else {
$left = true;
break;
}
}
if ($left) {
$tmpNode = $this->getNodeByID($indexObj->getLeft());
} else {
$indexMapIterator->prev();
$currentIndex = $indexMapIterator->current();
$tmpNode = $this->getNodeByID($currentIndex->getRight());
}
}
$tNode = $prevNode;
$resultData = [];
//从定位到的节点,遍历叶节点链表,查询出范围内的记录
while ($tNode != null) {
$indexMapIterator = $tNode->generateIndexMapIterator();
while ($indexMapIterator->valid()) {
$indexObj = $indexMapIterator->current();
if ($indexObj->getIndexVal() > $end) {
break 2;
}
if ($indexObj->getIndexVal() >= $start) {
array_push($resultData, $indexObj->getData());
}
$indexMapIterator->next();
}
$tNode = $this->getNodeByID($tNode->next);
}
return $resultData;
}
public function isEmpty()
{
return $this->root === 0;
}
/**
* @param BTNode $node
* 以节点的id为key, 节点对象为value, 保存到节点池中.
*/
private function storeNode(BTNode $node)
{
$id = $node->getID();
$this->nodeMap[$id] = $node;
}
/**
* @param $id
* @return BTNode
*/
public function getNodeByID($id)
{
return isset($this->nodeMap[$id]) ? $this->nodeMap[$id] : null;
}
/**
* @param bool $onlyLeafNode
* 打印所有节点
*/
public function dumpNodeMap($onlyLeafNode = false)
{
// echo '<pre>';
foreach ($this->nodeMap as $eachNode) {
// if (!$onlyLeafNode || $eachNode->isLeaf) {
// print_r($eachNode);
// }
if (!$eachNode->isLeaf) {
print_r($eachNode);
}
}
}
}
/**
* Class IndexMapIterator
* BTNode的indexMap的迭代器
*/
class IndexMapIterator
{
private $indexMap = [];
private $position = 0;
public function __construct($indexMap = [])
{
$this->indexMap = $indexMap;
$this->position = 0;
}
/**
* @return Index
*/
public function current()
{
return $this->indexMap[$this->position];
}
public function next()
{
$this->position++;
}
public function prev()
{
$this->position--;
}
public function key()
{
return $this->position;
}
public function valid()
{
return isset($this->indexMap[$this->position]);
}
public function rewind()
{
$this->position = 0;
}
}
$dataList = [
['id' => 10, 'name' => 'name_10', 'age' => 28],
['id' => 20, 'name' => 'name_20', 'age' => 23],
['id' => 30, 'name' => 'name_30', 'age' => 25],
['id' => 40, 'name' => 'name_40', 'age' => 24],
['id' => 5, 'name' => 'name_5', 'age' => 18],
['id' => 15, 'name' => 'name_15', 'age' => 22],
['id' => 50, 'name' => 'name_50', 'age' => 21],
['id' => 35, 'name' => 'name_35', 'age' => 24],
['id' => 60, 'name' => 'name_60', 'age' => 23],
['id' => 45, 'name' => 'name_45', 'age' => 23],
['id' => 22, 'name' => 'name_22', 'age' => 19],
];
$btree = new BPlusTree(5);
foreach ($dataList as $value) {
$data = new Data($value, 'id');
$btree->insert($data);
}
// var_dump($btree->find(22));
print_r($btree);
var_dump($btree->rangeFind(40, 70));
// $btree->dumpNodeMap();
die;

浙公网安备 33010602011771号