From 30004a344b41f812c694dcdf51d0e7aeac48e255 Mon Sep 17 00:00:00 2001 From: Christopher Berner Date: Sun, 19 Nov 2023 16:10:56 -0800 Subject: [PATCH] Improve performance and fix panic When selecting the page size to allocate when building a leaf(s) page, space for offsets was still being allocated for fixed width types. This lead to those types having leaves that were much larger than necessary, and worse they could grow without bound eventually leading to a panic when the page overflowed the maximum 2^16 storable pairs. --- src/multimap_table.rs | 17 +++++++++++--- src/tree_store/btree_base.rs | 39 +++++++++++++++++++++++++-------- src/tree_store/btree_mutator.rs | 10 ++++++--- tests/integration_tests.rs | 23 +++++++++++++++++++ 4 files changed, 74 insertions(+), 15 deletions(-) diff --git a/src/multimap_table.rs b/src/multimap_table.rs index 0c8ce299..e3a7d9eb 100644 --- a/src/multimap_table.rs +++ b/src/multimap_table.rs @@ -791,8 +791,12 @@ impl<'db, 'txn, K: RedbKey + 'static, V: RedbKey + 'static> MultimapTable<'db, ' accessor.length_of_pairs(0, accessor.num_pairs()) + value_bytes_ref.len(); let new_key_bytes = accessor.length_of_keys(0, accessor.num_pairs()) + value_bytes_ref.len(); - let required_inline_bytes = - RawLeafBuilder::required_bytes(new_pairs, new_pair_bytes); + let required_inline_bytes = RawLeafBuilder::required_bytes( + new_pairs, + new_pair_bytes, + V::fixed_width(), + <() as RedbValue>::fixed_width(), + ); if required_inline_bytes < self.mem.get_page_size() / 2 { let mut data = vec![0; required_inline_bytes]; @@ -866,7 +870,12 @@ impl<'db, 'txn, K: RedbKey + 'static, V: RedbKey + 'static> MultimapTable<'db, ' } } else { drop(get_result); - let required_inline_bytes = RawLeafBuilder::required_bytes(1, value_bytes_ref.len()); + let required_inline_bytes = RawLeafBuilder::required_bytes( + 1, + value_bytes_ref.len(), + V::fixed_width(), + <() as RedbValue>::fixed_width(), + ); if required_inline_bytes < self.mem.get_page_size() / 2 { let mut data = vec![0; required_inline_bytes]; let mut builder = RawLeafBuilder::new( @@ -935,6 +944,8 @@ impl<'db, 'txn, K: RedbKey + 'static, V: RedbKey + 'static> MultimapTable<'db, ' let required = RawLeafBuilder::required_bytes( old_num_pairs - 1, old_pairs_len - removed_value_len, + V::fixed_width(), + <() as RedbValue>::fixed_width(), ); let mut new_data = vec![0; required]; let new_key_len = diff --git a/src/tree_store/btree_base.rs b/src/tree_store/btree_base.rs index 2e7f8182..0547a02f 100644 --- a/src/tree_store/btree_base.rs +++ b/src/tree_store/btree_base.rs @@ -441,8 +441,13 @@ pub(super) struct LeafBuilder<'a, 'b> { } impl<'a, 'b> LeafBuilder<'a, 'b> { - pub(super) fn required_bytes(num_pairs: usize, keys_values_bytes: usize) -> usize { - RawLeafBuilder::required_bytes(num_pairs, keys_values_bytes) + pub(super) fn required_bytes(&self, num_pairs: usize, keys_values_bytes: usize) -> usize { + RawLeafBuilder::required_bytes( + num_pairs, + keys_values_bytes, + self.fixed_key_size, + self.fixed_value_size, + ) } pub(super) fn new( @@ -484,7 +489,7 @@ impl<'a, 'b> LeafBuilder<'a, 'b> { } pub(super) fn should_split(&self) -> bool { - let required_size = Self::required_bytes( + let required_size = self.required_bytes( self.pairs.len(), self.total_key_bytes + self.total_value_bytes, ); @@ -506,7 +511,7 @@ impl<'a, 'b> LeafBuilder<'a, 'b> { } let required_size = - Self::required_bytes(division, first_split_key_bytes + first_split_value_bytes); + self.required_bytes(division, first_split_key_bytes + first_split_value_bytes); let mut page1 = self.mem.allocate(required_size, CachePriority::Low)?; let mut builder = RawLeafBuilder::new( page1.memory_mut(), @@ -520,7 +525,7 @@ impl<'a, 'b> LeafBuilder<'a, 'b> { } drop(builder); - let required_size = Self::required_bytes( + let required_size = self.required_bytes( self.pairs.len() - division, self.total_key_bytes + self.total_value_bytes - first_split_key_bytes @@ -543,7 +548,7 @@ impl<'a, 'b> LeafBuilder<'a, 'b> { } pub(super) fn build(self) -> Result> { - let required_size = Self::required_bytes( + let required_size = self.required_bytes( self.pairs.len(), self.total_key_bytes + self.total_value_bytes, ); @@ -587,11 +592,21 @@ pub(crate) struct RawLeafBuilder<'a> { } impl<'a> RawLeafBuilder<'a> { - pub(crate) fn required_bytes(num_pairs: usize, keys_values_bytes: usize) -> usize { + pub(crate) fn required_bytes( + num_pairs: usize, + keys_values_bytes: usize, + key_size: Option, + value_size: Option, + ) -> usize { // Page id & header; let mut result = 4; // key & value lengths - result += num_pairs * 2 * size_of::(); + if key_size.is_none() { + result += num_pairs * size_of::(); + } + if value_size.is_none() { + result += num_pairs * size_of::(); + } result += keys_values_bytes; result @@ -609,7 +624,13 @@ impl<'a> RawLeafBuilder<'a> { #[cfg(debug_assertions)] { // Poison all the key & value offsets, in case the caller forgets to write them - let last = 4 + 2 * size_of::() * num_pairs; + let mut last = 4; + if fixed_key_size.is_none() { + last += size_of::() * num_pairs; + } + if fixed_value_size.is_none() { + last += size_of::() * num_pairs; + } for x in &mut page[4..last] { *x = 0xFF; } diff --git a/src/tree_store/btree_mutator.rs b/src/tree_store/btree_mutator.rs index 4ead0d59..ec852b87 100644 --- a/src/tree_store/btree_mutator.rs +++ b/src/tree_store/btree_mutator.rs @@ -6,7 +6,7 @@ use crate::tree_store::btree_mutator::DeletionResult::{ DeletedBranch, DeletedLeaf, PartialBranch, PartialLeaf, Subtree, }; use crate::tree_store::page_store::{Page, PageImpl}; -use crate::tree_store::{AccessGuardMut, PageNumber, TransactionalMemory}; +use crate::tree_store::{AccessGuardMut, PageNumber, RawLeafBuilder, TransactionalMemory}; use crate::types::{RedbKey, RedbValue}; use crate::{AccessGuard, Result}; use std::cmp::{max, min}; @@ -484,8 +484,12 @@ impl<'a, 'b, K: RedbKey, V: RedbValue> MutateHelper<'a, 'b, K, V> { } let new_kv_bytes = accessor.length_of_pairs(0, accessor.num_pairs()) - accessor.length_of_pairs(position, position + 1); - let new_required_bytes = - LeafBuilder::required_bytes(accessor.num_pairs() - 1, new_kv_bytes); + let new_required_bytes = RawLeafBuilder::required_bytes( + accessor.num_pairs() - 1, + new_kv_bytes, + K::fixed_width(), + V::fixed_width(), + ); let uncommitted = self.mem.uncommitted(page.get_page_number()); // Fast-path for dirty pages diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 02771922..59a61bdf 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -225,6 +225,29 @@ fn large_values() { txn.commit().unwrap(); } +#[test] +fn many_pairs() { + let tmpfile = create_tempfile(); + const TABLE: TableDefinition = TableDefinition::new("TABLE"); + + let db = Database::create(tmpfile.path()).unwrap(); + let wtx = db.begin_write().unwrap(); + + let mut table = wtx.open_table(TABLE).unwrap(); + + for i in 0..200_000 { + table.insert(i, i).unwrap(); + + if i % 10_000 == 0 { + eprintln!("{i}"); + } + } + + drop(table); + + wtx.commit().unwrap(); +} + #[test] fn large_keys() { let tmpfile = create_tempfile();