BT(3) - B+树完整版以及B*性质
继承前两篇 B+树(Vec) 和 B+树(TreeMap) 的完整版 B+ 树 (Vec) 实现。系列所有代码可以在这里
数据结构
树
为了 pop_first
/ pop_last
增加了最小、最大节点。
impl_tree!(
/// B+ Trees
///
BPT {
cnt: usize,
min_node: WeakNode<K, V>,
max_node: WeakNode<K, V>
}
);
新增简单方法
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
#[inline(always)]
pub fn len(&self) -> usize {
self.cnt
}
fn nodes(&self) -> impl Iterator<Item = Node<K, V>> {
let mut cur = self.min_node.upgrade();
std::iter::from_generator(move || {
while cur.is_some() {
yield cur.clone();
cur = succ!(cur);
}
})
}
pub fn entries(&self) -> impl Iterator<Item = (&K, &V)> {
std::iter::from_generator(move || {
for x in self.nodes() {
for ent in entries!(x) {
yield (&ent.0, &ent.1)
}
}
})
}
pub fn keys(&self) -> impl Iterator<Item = &K> {
self.entries().map(|ent| ent.0)
}
pub fn values(&self) -> impl Iterator<Item = &V> {
self.entries().map(|ent| ent.1)
}
pub fn into_iter(self) -> impl Iterator<Item = (K, V)> {
let mut cur = self.min_node.upgrade();
std::iter::from_generator(move || {
while cur.is_some() {
let nxt = succ!(cur);
for ent in entries_mut!(cur).drain(..) {
yield ent.drain();
}
cur = nxt;
}
drop(self);
})
}
/// collect all item without drop itself used for BPT2::remove
pub(crate) fn drain_all(&mut self) -> impl Iterator<Item = (K, V)> {
let mut cur = self.min_node.upgrade();
std::iter::from_generator(move || {
while cur.is_some() {
let nxt = succ!(cur);
for ent in entries_mut!(cur).drain(..) {
yield ent.drain();
}
cur = nxt;
}
})
}
#[inline]
pub fn min_key(&self) -> Option<&K> {
self.min_node
.upgrade()
.min_key()
.map(|k| unsafe { &*(k as *const K) })
}
#[inline]
pub fn max_key(&self) -> Option<&K> {
self.max_node
.upgrade()
.max_key()
.map(|k| unsafe { &*(k as *const K) })
}
}
修改的既有方法
插入
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
pub fn insert(&mut self, k: K, v: V) -> Option<V>
where
K: Clone,
{
let x = Self::search_to_leaf(&self.root, &k);
self.insert_into_leaf(x, k, v)
}
fn insert_into_leaf(&mut self, x: Node<K, V>, k: K, v: V) -> Option<V>
where
K: Clone,
{
/* NonInternal Node */
debug_assert!(!x.is_internal());
/* Nil */
if x.is_none() {
self.root = node!(kv | k, v);
self.min_node = self.root.downgrade();
self.max_node = self.root.downgrade();
}
/* Leaf */
else {
let idx = match entries!(x).binary_search_by_key(&&k, |ent| &ent.0)
{
Ok(idx) => {
return Some(replace(&mut entries_mut!(x)[idx].1, v))
}
Err(idx) => idx,
};
/* insert into leaf */
entries_mut!(x).insert(idx, KVEntry(k, v));
self.insert_retracing(x);
}
self.cnt += 1;
None
}
fn insert_retracing(&mut self, x: Node<K, V>)
where
K: Clone,
{
if entries!(x).len() == M {
self.promote(x);
}
}
fn promote(&mut self, x: Node<K, V>)
where
K: Clone,
{
debug_assert!(x.is_some());
let p = paren!(x);
/* split node */
let lpos = M.div_floor(2);
let hpos = M.div_ceil(2);
let head_key;
let x2;
if x.is_leaf() {
debug_assert_eq!(entries!(x).len(), Self::entries_high_bound());
// keep key for leaf
let entries_x2 = entries_mut!(x).split_off(lpos);
head_key = entries_x2[0].0.clone();
x2 = node!(
basic - leaf | entries_x2,
succ!(x).downgrade(),
WeakNode::none()
);
succ!(x, x2.clone());
if x.rc_eq(&self.max_node.upgrade()) {
self.max_node = x2.downgrade();
}
} else {
debug_assert_eq!(keys!(x).len(), Self::entries_high_bound());
let keys_x2 = keys_mut!(x).split_off(hpos);
head_key = keys_mut!(x).pop().unwrap();
let children_x2 = children_mut!(x).split_off(hpos);
x2 = node!(
basic - internal | keys_x2,
children_x2,
WeakNode::none()
);
children_revref!(&x2);
}
/* push new level */
if p.is_none() {
let keys = vec![head_key];
let children = vec![x, x2];
self.root =
node!(basic - internal | keys, children, WeakNode::none());
children_revref!(&self.root);
}
/* insert into paren node */
else {
let x_idx = index_of_child!(p, &x);
keys_mut!(p).insert(x_idx, head_key);
children_mut!(p).insert(x_idx + 1, x2.clone());
paren!(x2, p.clone());
if keys!(p).len() == Self::entries_high_bound() {
self.promote(p);
}
}
}
}
删除
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
pub fn remove<Q>(&mut self, k: &Q) -> Option<V>
where
K: Borrow<Q> + Clone,
Q: Ord + ?Sized,
V: Debug,
{
let mut x = &self.root;
let mut internal_and_idx = None;
while x.is_internal() {
match keys!(x).binary_search_by_key(&k, |k_| k_.borrow()) {
Ok(idx) => {
debug_assert!(internal_and_idx.is_none());
internal_and_idx = Some((x.clone(), idx));
x = &children!(x)[idx + 1];
}
Err(idx) => {
x = &children!(x)[idx];
}
}
}
if x.is_none() {
return None;
}
let idx;
match entries!(x).binary_search_by_key(&k, |ent| ent.0.borrow()) {
Ok(idx_) => idx = idx_,
Err(_idx) => return None,
}
self.remove_on_leaf(internal_and_idx, x.clone(), idx)
.map(|(_, v)| v)
}
fn remove_on_leaf(
&mut self,
internal_and_idx: Option<(Node<K, V>, usize)>,
x: Node<K, V>,
idx: usize,
) -> Option<(K, V)>
where
K: Clone,
{
/* Update internal key with its succsessor key */
if let Some((internal, i_idx)) = internal_and_idx {
self.update_internal_key(&internal, i_idx, &x, idx);
}
let popped = entries_mut!(x).remove(idx);
self.remove_retracing(x.clone());
self.cnt -= 1;
Some((popped.0, popped.1))
}
fn remove_retracing(&mut self, x: Node<K, V>)
where
K: Clone,
{
if entries!(x).is_empty() {
if self.root.rc_eq(&x) {
self.root = Node::none();
self.min_node = WeakNode::none();
self.max_node = WeakNode::none();
} else {
self.unpromote(x);
}
}
}
fn unpromote(&mut self, x: Node<K, V>)
where
K: Clone,
{
debug_assert!(
x.is_leaf() || keys!(x).len() == Self::entries_low_bound() - 1
);
let p = paren!(x);
debug_assert!(p.is_some());
let idx = index_of_child_by_rc!(p, x);
if Self::try_rebalancing(&p, idx) {
return;
}
/* merge node */
if idx > 0 {
self.merge_node(&p, idx - 1);
} else {
self.merge_node(&p, idx);
}
if paren!(p).is_none() {
debug_assert!(p.is_internal());
if keys!(p).is_empty() {
// pop new level
self.root = children_mut!(p).pop().unwrap();
paren!(self.root, Node::none());
}
} else {
if keys!(p).len() < Self::entries_low_bound() {
self.unpromote(p);
}
}
}
/// (parent, left-idx)
fn merge_node(&mut self, p: &Node<K, V>, idx: usize) {
let children = children!(p);
let left = children[idx].clone();
let right = children[idx + 1].clone();
// for leaf node
if left.is_leaf() {
keys_mut!(p).remove(idx);
entries_mut!(left).extend(entries_mut!(right).drain(..));
// update succ
succ!(left, succ!(right));
// update max_node
if right.rc_eq(&self.max_node.upgrade()) {
// update max_node
self.max_node = left.downgrade();
}
}
// for internal node
else {
keys_mut!(left).push(keys_mut!(p).remove(idx));
keys_mut!(left).extend(keys_mut!(right).drain(..));
// merge right's children to the left
let left_children = children_mut!(left);
for child in children_mut!(right).drain(..) {
paren!(child, left.clone());
left_children.push(child);
}
}
// remove right from p
children_mut!(p).remove(idx + 1);
}
/// parent, x idx of parent
fn try_rebalancing(p: &Node<K, V>, idx: usize) -> bool
where
K: Clone,
{
// Try left first
if idx > 0 && Self::try_node_redistribution(&p, idx - 1, Left) {
return true;
}
// Try right then
if idx < children!(p).len() - 1
&& Self::try_node_redistribution(&p, idx, Right)
{
return true;
}
false
}
fn try_node_redistribution(
p: &Node<K, V>,
idx: usize,
sib_dir: Dir,
) -> bool
where
K: Clone,
{
let children = children!(p);
let left = &children[idx];
let right = &children[idx + 1];
let sib = if sib_dir.is_left() { left } else { right };
if sib.is_leaf() && entries!(sib).len() > 1 {
if sib_dir.is_left() {
entries_mut!(right)
.insert(0, entries_mut!(left).pop().unwrap());
}
// sib is right
else {
entries_mut!(left).push(entries_mut!(right).remove(0));
keys_mut!(p)[idx] = entries!(right)[0].0.clone();
}
return true;
}
if sib.is_internal() && keys!(sib).len() > Self::entries_low_bound() {
if sib_dir.is_left() {
keys_mut!(right).insert(
0,
replace(
&mut keys_mut!(p)[idx],
keys_mut!(left).pop().unwrap(),
),
);
let child = children_mut!(left).pop().unwrap();
paren!(child, right.clone());
children_mut!(right).insert(0, child);
} else {
keys_mut!(left).push(replace(
&mut keys_mut!(p)[idx],
keys_mut!(right).remove(0),
));
let child = children_mut!(right).remove(0);
paren!(child, left.clone());
children_mut!(left).push(child);
}
return true;
}
false
}
}
测试
impl<K: Ord + Debug, V, const M: usize> Debug for BPT<K, V, M> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.debug_struct("BPT")
.field("root", &self.root)
.field("cnt", &self.cnt)
.field("min_node", &self.min_node.upgrade())
.field("max_node", &self.max_node.upgrade())
.finish()
}
}
新增方法
push_back
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
/// push into max
pub fn push_back(&mut self, k: K, v: V)
where
K: Clone,
V: Debug,
{
let x = self.max_node.upgrade();
self.insert_into_leaf(x, k, v);
}
}
push_front
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
/// push into min
pub fn push_front(&mut self, k: K, v: V)
where
K: Clone,
V: Debug,
{
let x = self.min_node.upgrade();
// self.insert_into_leaf(x, k, v);
self.cnt += 1;
/* Nil */
if x.is_none() {
self.root = node!(kv | k, v);
self.min_node = self.root.downgrade();
self.max_node = self.root.downgrade();
}
/* Leaf */
else {
/* insert into leaf */
entries_mut!(x).insert(0, KVEntry(k, v));
self.insert_retracing(x);
}
}
}
pop_first
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
pub fn pop_first(&mut self) -> Option<(K, V)>
where
K: Clone,
{
let x = self.min_node.upgrade();
if x.is_none() {
return None;
}
/* min-key has no internal index */
self.remove_on_leaf(None, x.clone(), 0)
}
}
pop_last
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
pub fn pop_last(&mut self) -> Option<(K, V)>
where
K: Clone,
{
let x = self.max_node.upgrade();
if x.is_none() {
return None;
}
let p = paren!(x);
let internal_and_idx;
if p.is_some() && entries!(x).len() == 1 {
internal_and_idx = Some((p.clone(), keys!(p).len() - 1));
} else {
internal_and_idx = None;
}
self.remove_on_leaf(internal_and_idx, x.clone(), entries!(x).len() - 1)
}
}
rank
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
/// Start from 0 O(n/M)
pub fn rank<Q>(&self, key: &Q) -> std::result::Result<usize, usize>
where
K: Borrow<Q>,
Q: Ord + ?Sized,
{
if self.root.is_none() {
return Err(0);
}
let x = Self::search_to_leaf(&self.root, key);
debug_assert!(x.is_some());
let idx;
let mut is_err = false;
match entries!(x).binary_search_by_key(&key, |ent| ent.0.borrow()) {
Ok(idx_) => idx = idx_,
Err(idx_) => {
idx = idx_;
is_err = true;
}
}
let mut rem = entries!(x).len() - (idx + 1);
let mut cur = succ!(x);
while cur.is_some() {
rem += entries!(cur).len();
cur = succ!(cur);
}
let rk = self.cnt - rem - 1;
if !is_err {
Ok(rk)
}
else {
Err(rk)
}
}
}
nth
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
/// return Nth child (start from 0), O(n/M)
pub fn nth(&self, mut idx: usize) -> Option<(&K, &V)> {
let mut cur = self.min_node.upgrade();
while cur.is_some() {
if idx < entries!(cur).len() {
let ent = &entries!(cur)[idx];
return Some((&ent.0, &ent.1));
} else {
idx -= entries!(cur).len();
cur = succ!(cur);
}
}
None
}
}
split_off
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
/// return [at, ...)
pub fn split_off(&mut self, at: usize) -> Self
where
K: Clone,
V: Debug,
{
let mut oth = Self::new();
if self.cnt == 0 || at >= self.cnt {
return oth;
}
oth.bulk_push_front(self.bulk_pop(self.cnt - at));
oth
}
}
B* 性质
B* 就是在键值插入的时候如果发生节点溢出时,不急着先分裂节点,而是和删除时一样,首先尝试从邻居节点进行平衡操作。
try_balacing
impl<K: Ord, V, const M: usize> BPT<K, V, M> {
fn promote(&mut self, x: Node<K, V>)
where
K: Clone,
{
debug_assert!(x.is_some());
let p = paren!(x);
if p.is_some() {
let idx = index_of_child_by_rc!(p, x);
if Self::try_rebalancing(&p, idx) {
return;
}
}
// ...
}
/// parent, x idx of parent
fn try_rebalancing(p: &Node<K, V>, idx: usize) -> bool
where
K: Clone,
{
// Try left first
if idx > 0 && Self::try_node_redistribution_eager(&p, idx - 1, Left) {
return true;
}
// Try right then
if idx < children!(p).len() - 1
&& Self::try_node_redistribution_eager(&p, idx, Right)
{
return true;
}
false
}
fn try_node_redistribution_eager(
p: &Node<K, V>,
idx: usize,
sib_dir: Dir,
) -> bool
where
K: Clone,
{
use common::vec_even_up;
let children = children!(p);
let left = &children[idx];
let right = &children[idx + 1];
let (sib, x) = if sib_dir.is_left() {
(left, right)
} else {
(right, left)
};
if sib.is_leaf()
&& (entries!(x).len() == 0 && entries!(sib).len() > 1
|| entries!(x).len() == Self::entries_high_bound()
&& entries!(sib).len() < Self::entries_high_bound() - 1)
{
vec_even_up(entries_mut!(left), entries_mut!(right));
keys_mut!(p)[idx] = entries!(right)[0].0.clone();
return true;
}
if sib.is_internal()
&& (keys!(x).len() < Self::entries_low_bound()
&& keys!(sib).len() > Self::entries_low_bound()
|| keys!(x).len() == Self::entries_high_bound()
&& keys!(sib).len() < Self::entries_high_bound() - 1)
{
let left_old_len = children!(left).len();
let right_old_len = children!(right).len();
keys_mut!(left).push(keys_mut!(p)[idx].clone());
vec_even_up(keys_mut!(left), keys_mut!(right));
keys_mut!(p)[idx] = keys_mut!(left).pop().unwrap();
vec_even_up(children_mut!(left), children_mut!(right));
if (children!(left).len() + children!(right).len()) % 2 > 0 {
children_mut!(right)
.insert(0, children_mut!(left).pop().unwrap());
}
if left_old_len < right_old_len {
children_revref!(left, left_old_len..children!(left).len());
} else {
children_revref!(right, 0..children!(right).len()-right_old_len);
}
return true;
}
false
}
}
vec_even_up
保持左右的顺序前提下,平分左右两个数组。
/// Even up two vector using clone
///
/// (This implementation is inspired by Vec::remove)
///
/// ```
/// use m6_common::vec_even_up;
///
/// /* case-0 left max for pop is easy (odd) */
///
/// let mut v0 = (0..2).collect();
/// let mut v1 = (4..7).collect();
/// vec_even_up(&mut v0, &mut v1);
/// assert_eq!(v0, vec![0, 1, 4]);
/// assert_eq!(v1, vec![5, 6]);
///
/// /* case-1 the left is samller (even) */
///
/// let mut v0 = (0..2).collect();
/// let mut v1 = (4..8).collect();
/// vec_even_up(&mut v0, &mut v1);
/// assert_eq!(v0, vec![0, 1, 4]);
/// assert_eq!(v1, vec![5, 6, 7]);
///
/// /* case-2 the left is larger (even) */
///
/// let mut v0 = (0..4).collect();
/// let mut v1 = (4..6).collect();
/// vec_even_up(&mut v0, &mut v1);
/// assert_eq!(v0, vec![0, 1, 2]);
/// assert_eq!(v1, vec![3, 4, 5]);
///
///
/// /* case-3-0 the left is larger (given more than old_len) */
///
/// let mut v0 = (0..7).collect();
/// let mut v1 = (7..9).collect();
/// vec_even_up(&mut v0, &mut v1);
/// assert_eq!(v0, vec![0, 1, 2, 3, 4,]);
/// assert_eq!(v1, vec![5, 6, 7, 8]);
///
/// ```
///
pub fn vec_even_up<T>(v0: &mut Vec<T>, v1: &mut Vec<T>) {
let v0_old_len = v0.len();
let v1_old_len = v1.len();
let tnt = v0_old_len + v1_old_len;
let cnt_lf = tnt.div_ceil(2);
let cnt_rh = tnt - cnt_lf;
/* Check if it's balanced? */
if v0_old_len == cnt_lf {
debug_assert_eq!(v1.len(), cnt_rh);
return;
}
debug_assert!(v0_old_len > 0 || v1_old_len > 0);
use std::ptr::{ copy, copy_nonoverlapping, read };
// V0 is smaller
if v0_old_len < cnt_lf {
// let v0_get = cnt_lf - v0_old_len;
let v1_given = v1_old_len - cnt_rh;
let v1_ptr = v1.as_mut_ptr();
let mut i = 0;
v0.resize_with(cnt_lf, move || unsafe {
let v = read(v1_ptr.add(i));
i += 1;
v
});
unsafe {
copy(
v1_ptr.add(v1_given),
v1_ptr,
cnt_rh
);
v1.set_len(cnt_rh);
}
}
// V0 is larger
else {
let v0_given = v0_old_len - cnt_lf;
let v0_ptr = v0.as_mut_ptr();
v1.resize_with(cnt_rh, move || unsafe {
// Fill with dirty data
// read(v0_ptr)
std::mem::zeroed()
});
// get ptr after resize to avoid realloc issue
let v1_ptr = v1.as_mut_ptr();
unsafe {
copy(
v1_ptr,
v1_ptr.add(v0_given),
v1_old_len
);
copy_nonoverlapping(
v0_ptr.add(cnt_lf),
v1_ptr,
v0_given
);
v0.set_len(cnt_lf);
}
}
}
总结
B* 没什么用,对插入的性能改进没有什么帮助