From 4bd8e1d8430ee17c7e3be47732d1786074ecb592 Mon Sep 17 00:00:00 2001 From: max-dfinity <100170574+max-dfinity@users.noreply.github.com> Date: Tue, 15 Oct 2024 15:07:57 -0700 Subject: [PATCH] refactor(nns): Improve range neurons performance for stable store (#2033) When scanning neurons in the main StableBTreeMap, instead of doing a separate traversal of the auxiliary StableBTreeMaps for EACH neuron in the result set, ranges of each auxiliary StableBTreeMap are used (and scanned in parallel). This gets around a 40% performance improvement. This method, while not currently extensively used, will be used more after the migration of neurons to stable storage. Benchmark results before the change: ``` // Output from benchmark test Benchmark: range_neurons_performance total: instructions: 47.07 M (improved by 41.47%) heap_increase: 0 pages (no change) stable_memory_increase: 0 pages (no change) ``` ``` range_neurons_performance: total: instructions: 80415437 heap_increase: 0 stable_memory_increase: 0 scopes: {} ``` After the change (what's in PR): ``` range_neurons_performance: total: instructions: 47070657 heap_increase: 0 stable_memory_increase: 0 scopes: {} ``` --- .../governance/canbench/canbench_results.yml | 16 +- rs/nns/governance/src/neuron_store/benches.rs | 33 +++- rs/nns/governance/src/storage/neurons.rs | 141 +++++++++++++++++- .../src/storage/neurons/neurons_tests.rs | 51 +++++++ 4 files changed, 225 insertions(+), 16 deletions(-) diff --git a/rs/nns/governance/canbench/canbench_results.yml b/rs/nns/governance/canbench/canbench_results.yml index 2794e5723b3..314b17dae62 100644 --- a/rs/nns/governance/canbench/canbench_results.yml +++ b/rs/nns/governance/canbench/canbench_results.yml @@ -1,26 +1,32 @@ benches: add_neuron_active_maximum: total: - instructions: 45003037 + instructions: 35991301 heap_increase: 1 stable_memory_increase: 0 scopes: {} add_neuron_active_typical: total: - instructions: 2703302 + instructions: 1836362 heap_increase: 0 stable_memory_increase: 0 scopes: {} add_neuron_inactive_maximum: total: - instructions: 122465697 + instructions: 98457016 heap_increase: 1 stable_memory_increase: 0 scopes: {} add_neuron_inactive_typical: total: - instructions: 10581822 + instructions: 7574043 heap_increase: 0 stable_memory_increase: 0 scopes: {} -version: 0.1.3 + range_neurons_performance: + total: + instructions: 47070657 + heap_increase: 0 + stable_memory_increase: 0 + scopes: {} +version: 0.1.7 diff --git a/rs/nns/governance/src/neuron_store/benches.rs b/rs/nns/governance/src/neuron_store/benches.rs index 6199aae64eb..529dea07275 100644 --- a/rs/nns/governance/src/neuron_store/benches.rs +++ b/rs/nns/governance/src/neuron_store/benches.rs @@ -117,13 +117,17 @@ fn build_neuron(rng: &mut impl RngCore, location: NeuronLocation, size: NeuronSi neuron } -fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore { +fn set_up_neuron_store( + rng: &mut impl RngCore, + active_count: u64, + inactive_count: u64, +) -> NeuronStore { // We insert 200 inactive neurons and 100 active neurons. They are not very realistic sizes, but // it would take too long to prepare those neurons for each benchmark. - let inactive_neurons: Vec<_> = (0..200) + let inactive_neurons: Vec<_> = (0..inactive_count) .map(|_| build_neuron(rng, NeuronLocation::Stable, NeuronSize::Typical)) .collect(); - let active_neurons: Vec<_> = (0..100) + let active_neurons: Vec<_> = (0..active_count) .map(|_| build_neuron(rng, NeuronLocation::Heap, NeuronSize::Typical)) .collect(); let neurons: BTreeMap = inactive_neurons @@ -138,7 +142,7 @@ fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore { #[bench(raw)] fn add_neuron_active_typical() -> BenchResult { let mut rng = new_rng(); - let mut neuron_store = set_up_neuron_store(&mut rng); + let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200); let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Typical); bench_fn(|| { @@ -149,7 +153,7 @@ fn add_neuron_active_typical() -> BenchResult { #[bench(raw)] fn add_neuron_active_maximum() -> BenchResult { let mut rng = new_rng(); - let mut neuron_store = set_up_neuron_store(&mut rng); + let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200); let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Maximum); bench_fn(|| { @@ -160,7 +164,7 @@ fn add_neuron_active_maximum() -> BenchResult { #[bench(raw)] fn add_neuron_inactive_typical() -> BenchResult { let mut rng = new_rng(); - let mut neuron_store = set_up_neuron_store(&mut rng); + let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200); let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Typical); bench_fn(|| { @@ -171,10 +175,25 @@ fn add_neuron_inactive_typical() -> BenchResult { #[bench(raw)] fn add_neuron_inactive_maximum() -> BenchResult { let mut rng = new_rng(); - let mut neuron_store = set_up_neuron_store(&mut rng); + let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200); let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Maximum); bench_fn(|| { neuron_store.add_neuron(neuron).unwrap(); }) } + +#[bench(raw)] +fn range_neurons_performance() -> BenchResult { + let mut rng = new_rng(); + let _neuron_store = set_up_neuron_store(&mut rng, 100, 200); + + bench_fn(|| { + with_stable_neuron_store(|stable_store| { + let iter = stable_store.range_neurons(..); + for n in iter { + n.id(); + } + }); + }) +} diff --git a/rs/nns/governance/src/storage/neurons.rs b/rs/nns/governance/src/storage/neurons.rs index e2a8bde8bd0..7cfb5e81081 100644 --- a/rs/nns/governance/src/storage/neurons.rs +++ b/rs/nns/governance/src/storage/neurons.rs @@ -17,7 +17,8 @@ use prost::Message; use std::{ borrow::Cow, collections::{BTreeMap as HeapBTreeMap, BTreeSet as HeapBTreeSet, HashMap}, - ops::RangeBounds, + iter::Peekable, + ops::{Bound as RangeBound, RangeBounds}, }; // Because many arguments are needed to construct a StableNeuronStore, there is @@ -343,10 +344,85 @@ where /// Returns the next neuron_id equal to or higher than the provided neuron_id pub fn range_neurons(&self, range: R) -> impl Iterator + '_ where - R: RangeBounds, + R: RangeBounds + Clone, { - self.main.range(range).map(|(neuron_id, neuron)| { - self.reconstitute_neuron(neuron_id, neuron, NeuronSections::all()) + let (start, end) = get_range_boundaries(range.clone()); + + // We want our ranges for sub iterators to include start and end + let hotkeys_range = (start, u64::MIN)..=(end, u64::MAX); + let ballots_range = (start, u64::MIN)..=(end, u64::MAX); + + let followees_range = FolloweesKey { + follower_id: start, + ..FolloweesKey::MIN + }..=FolloweesKey { + follower_id: end, + ..FolloweesKey::MAX + }; + + // Instead of randomly accessing each map (which is expensive for StableBTreeMaps), we + // use a range query on each map, and iterate all of the ranges at the same time. This + // uses 40% less instructions compared to just iterating on the top level range, and + // accessing the other maps for each neuron_id. + // This is only possible because EVERY range begins with NeuronId, so that their ordering + // is the same in respect to the main range's neurons. + + let main_range = self.main.range(range.clone()); + let mut hot_keys_iter = self.hot_keys_map.range(hotkeys_range).peekable(); + let mut recent_ballots_iter = self.recent_ballots_map.range(ballots_range).peekable(); + let mut followees_iter = self.followees_map.range(followees_range).peekable(); + let mut known_neuron_data_iter = self.known_neuron_data_map.range(range.clone()).peekable(); + let mut transfer_iter = self.transfer_map.range(range).peekable(); + + main_range.map(move |(main_neuron_id, abridged_neuron)| { + // We'll collect data from all relevant maps for this neuron_id + + let hot_keys = collect_values_for_neuron_from_peekable_range( + &mut hot_keys_iter, + main_neuron_id, + |((neuron_id, _), _)| *neuron_id, + |((_, _), principal)| PrincipalId::from(principal), + ); + + let ballots = collect_values_for_neuron_from_peekable_range( + &mut recent_ballots_iter, + main_neuron_id, + |((neuron_id, _), _)| *neuron_id, + |((_, _), ballot_info)| ballot_info, + ); + + let followees = collect_values_for_neuron_from_peekable_range( + &mut followees_iter, + main_neuron_id, + |(followees_key, _)| followees_key.follower_id, + |x| x, + ); + + let current_known_neuron_data = collect_values_for_neuron_from_peekable_range( + &mut known_neuron_data_iter, + main_neuron_id, + |(neuron_id, _)| *neuron_id, + |(_, known_neuron_data)| known_neuron_data, + ) + .pop(); + + let current_transfer = collect_values_for_neuron_from_peekable_range( + &mut transfer_iter, + main_neuron_id, + |(neuron_id, _)| *neuron_id, + |(_, transfer)| transfer, + ) + .pop(); + + Neuron::from(DecomposedNeuron { + id: main_neuron_id, + main: abridged_neuron, + hot_keys, + recent_ballots: ballots, + followees: self.reconstitute_followees_from_range(followees.into_iter()), + known_neuron_data: current_known_neuron_data, + transfer: current_transfer, + }) }) } @@ -438,6 +514,13 @@ where }; let range = self.followees_map.range(first..=last); + self.reconstitute_followees_from_range(range) + } + + fn reconstitute_followees_from_range( + &self, + range: impl Iterator, + ) -> HashMap { range // create groups for topics .group_by(|(followees_key, _followee_id)| followees_key.topic) @@ -612,6 +695,56 @@ impl Storable for NeuronStakeTransfer { // Private Helpers // =============== +fn get_range_boundaries(range_bound: R) -> (NeuronId, NeuronId) +where + R: RangeBounds, +{ + let start = match range_bound.start_bound() { + RangeBound::Included(start) => *start, + RangeBound::Excluded(start) => *start, + RangeBound::Unbounded => NeuronId::MIN, + }; + let end = match range_bound.end_bound() { + RangeBound::Included(end) => *end, + RangeBound::Excluded(end) => *end, + RangeBound::Unbounded => NeuronId::MAX, + }; + + (start, end) +} + +/// Skips until extract_neuron_id(item) == target_neuron_id, then maps corresponding items through +/// extract_value and returns the result as a vector. +fn collect_values_for_neuron_from_peekable_range( + iter: &mut Peekable, + target_neuron_id: NeuronId, + extract_neuron_id: FNeuronId, + extract_value: FValue, +) -> Vec +where + Iter: Iterator, + FNeuronId: Fn(&T) -> NeuronId, + FValue: Fn(T) -> R, +{ + let mut result = vec![]; + + while let Some(item) = iter.peek() { + let neuron_id = extract_neuron_id(item); + if neuron_id > target_neuron_id { + break; + } + let item = iter + .next() + .expect("Peek had a value, but next did not! This should be impossible."); + + if neuron_id == target_neuron_id { + result.push(extract_value(item)); + } + } + + result +} + // This is copied from candid/src. Seems like their definition should be public, // but it's not. Seems to be an oversight. const PRINCIPAL_MAX_LENGTH_IN_BYTES: usize = 29; diff --git a/rs/nns/governance/src/storage/neurons/neurons_tests.rs b/rs/nns/governance/src/storage/neurons/neurons_tests.rs index 71d9d5622c4..2da3e35fe9c 100644 --- a/rs/nns/governance/src/storage/neurons/neurons_tests.rs +++ b/rs/nns/governance/src/storage/neurons/neurons_tests.rs @@ -512,3 +512,54 @@ fn test_abridged_neuron_size() { // headroom. assert_eq!(abridged_neuron.encoded_len(), 184); } + +#[test] +fn test_range_neurons_reconstitutes_fully() { + let mut store = new_heap_based(); + let neurons = { + let mut neurons = vec![]; + for i in 1..10 { + let neuron = create_model_neuron(i); + store.create(neuron.clone()).unwrap(); + neurons.push(neuron); + } + neurons + }; + + let result = store.range_neurons(..).collect::>(); + + assert_eq!(result, neurons); +} + +#[test] +fn test_range_neurons_ranges_work_correctly() { + // This test is here to ensure that the conversions that happen inside range_neurons are correct. + let mut store = new_heap_based(); + let neurons = { + let mut neurons = vec![]; + for i in 1..=10 { + let neuron = create_model_neuron(i); + store.create(neuron.clone()).unwrap(); + neurons.push(neuron); + } + neurons + }; + + let result = store + .range_neurons(NeuronId::from_u64(2)..NeuronId::from_u64(9)) + .collect::>(); + assert_eq!(result, neurons[1..8]); + + let result = store + .range_neurons(NeuronId::from_u64(2)..=NeuronId::from_u64(3)) + .collect::>(); + assert_eq!(result, neurons[1..3]); + + let result = store + .range_neurons(( + std::ops::Bound::Excluded(NeuronId::from_u64(2)), + std::ops::Bound::Included(NeuronId::from_u64(3)), + )) + .collect::>(); + assert_eq!(result, neurons[2..3]); +}