refactor(nns): Improve range neurons performance for stable store (#2033

) When scanning neurons in the main StableBTreeMap, instead of doing a separate traversal of the auxiliary StableBTreeMaps for EACH neuron in the result set, ranges of each auxiliary StableBTreeMap are used (and scanned in parallel). This gets around a 40% performance improvement. This method, while not currently extensively used, will be used more after the migration of neurons to stable storage. Benchmark results before the change: ``` // Output from benchmark test Benchmark: range_neurons_performance total: instructions: 47.07 M (improved by 41.47%) heap_increase: 0 pages (no change) stable_memory_increase: 0 pages (no change) ``` ``` range_neurons_performance: total: instructions: 80415437 heap_increase: 0 stable_memory_increase: 0 scopes: {} ``` After the change (what's in PR): ``` range_neurons_performance: total: instructions: 47070657 heap_increase: 0 stable_memory_increase: 0 scopes: {} ```
dfinity · Oct 15, 2024 · 4bd8e1d · 4bd8e1d
1 parent 9fc5ab4
commit 4bd8e1d
Show file tree

Hide file tree

Showing 4 changed files with 225 additions and 16 deletions.
diff --git a/rs/nns/governance/canbench/canbench_results.yml b/rs/nns/governance/canbench/canbench_results.yml
@@ -1,26 +1,32 @@
 benches:
   add_neuron_active_maximum:
     total:
-      instructions: 45003037
+      instructions: 35991301
       heap_increase: 1
       stable_memory_increase: 0
     scopes: {}
   add_neuron_active_typical:
     total:
-      instructions: 2703302
+      instructions: 1836362
       heap_increase: 0
       stable_memory_increase: 0
     scopes: {}
   add_neuron_inactive_maximum:
     total:
-      instructions: 122465697
+      instructions: 98457016
       heap_increase: 1
       stable_memory_increase: 0
     scopes: {}
   add_neuron_inactive_typical:
     total:
-      instructions: 10581822
+      instructions: 7574043
       heap_increase: 0
       stable_memory_increase: 0
     scopes: {}
-version: 0.1.3
+  range_neurons_performance:
+    total:
+      instructions: 47070657
+      heap_increase: 0
+      stable_memory_increase: 0
+    scopes: {}
+version: 0.1.7
diff --git a/rs/nns/governance/src/neuron_store/benches.rs b/rs/nns/governance/src/neuron_store/benches.rs
@@ -117,13 +117,17 @@ fn build_neuron(rng: &mut impl RngCore, location: NeuronLocation, size: NeuronSi
     neuron
 }
 
-fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore {
+fn set_up_neuron_store(
+    rng: &mut impl RngCore,
+    active_count: u64,
+    inactive_count: u64,
+) -> NeuronStore {
     // We insert 200 inactive neurons and 100 active neurons. They are not very realistic sizes, but
     // it would take too long to prepare those neurons for each benchmark.
-    let inactive_neurons: Vec<_> = (0..200)
+    let inactive_neurons: Vec<_> = (0..inactive_count)
         .map(|_| build_neuron(rng, NeuronLocation::Stable, NeuronSize::Typical))
         .collect();
-    let active_neurons: Vec<_> = (0..100)
+    let active_neurons: Vec<_> = (0..active_count)
         .map(|_| build_neuron(rng, NeuronLocation::Heap, NeuronSize::Typical))
         .collect();
     let neurons: BTreeMap<u64, Neuron> = inactive_neurons
@@ -138,7 +142,7 @@ fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore {
 #[bench(raw)]
 fn add_neuron_active_typical() -> BenchResult {
     let mut rng = new_rng();
-    let mut neuron_store = set_up_neuron_store(&mut rng);
+    let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
     let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Typical);
 
     bench_fn(|| {
@@ -149,7 +153,7 @@ fn add_neuron_active_typical() -> BenchResult {
 #[bench(raw)]
 fn add_neuron_active_maximum() -> BenchResult {
     let mut rng = new_rng();
-    let mut neuron_store = set_up_neuron_store(&mut rng);
+    let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
     let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Maximum);
 
     bench_fn(|| {
@@ -160,7 +164,7 @@ fn add_neuron_active_maximum() -> BenchResult {
 #[bench(raw)]
 fn add_neuron_inactive_typical() -> BenchResult {
     let mut rng = new_rng();
-    let mut neuron_store = set_up_neuron_store(&mut rng);
+    let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
     let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Typical);
 
     bench_fn(|| {
@@ -171,10 +175,25 @@ fn add_neuron_inactive_typical() -> BenchResult {
 #[bench(raw)]
 fn add_neuron_inactive_maximum() -> BenchResult {
     let mut rng = new_rng();
-    let mut neuron_store = set_up_neuron_store(&mut rng);
+    let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
     let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Maximum);
 
     bench_fn(|| {
         neuron_store.add_neuron(neuron).unwrap();
     })
 }
+
+#[bench(raw)]
+fn range_neurons_performance() -> BenchResult {
+    let mut rng = new_rng();
+    let _neuron_store = set_up_neuron_store(&mut rng, 100, 200);
+
+    bench_fn(|| {
+        with_stable_neuron_store(|stable_store| {
+            let iter = stable_store.range_neurons(..);
+            for n in iter {
+                n.id();
+            }
+        });
+    })
+}
diff --git a/rs/nns/governance/src/storage/neurons.rs b/rs/nns/governance/src/storage/neurons.rs
@@ -17,7 +17,8 @@ use prost::Message;
 use std::{
     borrow::Cow,
     collections::{BTreeMap as HeapBTreeMap, BTreeSet as HeapBTreeSet, HashMap},
-    ops::RangeBounds,
+    iter::Peekable,
+    ops::{Bound as RangeBound, RangeBounds},
 };
 
 // Because many arguments are needed to construct a StableNeuronStore, there is
@@ -343,10 +344,85 @@ where
     /// Returns the next neuron_id equal to or higher than the provided neuron_id
     pub fn range_neurons<R>(&self, range: R) -> impl Iterator<Item = Neuron> + '_
     where
-        R: RangeBounds<NeuronId>,
+        R: RangeBounds<NeuronId> + Clone,
     {
-        self.main.range(range).map(|(neuron_id, neuron)| {
-            self.reconstitute_neuron(neuron_id, neuron, NeuronSections::all())
+        let (start, end) = get_range_boundaries(range.clone());
+
+        // We want our ranges for sub iterators to include start and end
+        let hotkeys_range = (start, u64::MIN)..=(end, u64::MAX);
+        let ballots_range = (start, u64::MIN)..=(end, u64::MAX);
+
+        let followees_range = FolloweesKey {
+            follower_id: start,
+            ..FolloweesKey::MIN
+        }..=FolloweesKey {
+            follower_id: end,
+            ..FolloweesKey::MAX
+        };
+
+        // Instead of randomly accessing each map (which is expensive for StableBTreeMaps), we
+        // use a range query on each map, and iterate all of the ranges at the same time.  This
+        // uses 40% less instructions compared to just iterating on the top level range, and
+        // accessing the other maps for each neuron_id.
+        // This is only possible because EVERY range begins with NeuronId, so that their ordering
+        // is the same in respect to the main range's neurons.
+
+        let main_range = self.main.range(range.clone());
+        let mut hot_keys_iter = self.hot_keys_map.range(hotkeys_range).peekable();
+        let mut recent_ballots_iter = self.recent_ballots_map.range(ballots_range).peekable();
+        let mut followees_iter = self.followees_map.range(followees_range).peekable();
+        let mut known_neuron_data_iter = self.known_neuron_data_map.range(range.clone()).peekable();
+        let mut transfer_iter = self.transfer_map.range(range).peekable();
+
+        main_range.map(move |(main_neuron_id, abridged_neuron)| {
+            // We'll collect data from all relevant maps for this neuron_id
+
+            let hot_keys = collect_values_for_neuron_from_peekable_range(
+                &mut hot_keys_iter,
+                main_neuron_id,
+                |((neuron_id, _), _)| *neuron_id,
+                |((_, _), principal)| PrincipalId::from(principal),
+            );
+
+            let ballots = collect_values_for_neuron_from_peekable_range(
+                &mut recent_ballots_iter,
+                main_neuron_id,
+                |((neuron_id, _), _)| *neuron_id,
+                |((_, _), ballot_info)| ballot_info,
+            );
+
+            let followees = collect_values_for_neuron_from_peekable_range(
+                &mut followees_iter,
+                main_neuron_id,
+                |(followees_key, _)| followees_key.follower_id,
+                |x| x,
+            );
+
+            let current_known_neuron_data = collect_values_for_neuron_from_peekable_range(
+                &mut known_neuron_data_iter,
+                main_neuron_id,
+                |(neuron_id, _)| *neuron_id,
+                |(_, known_neuron_data)| known_neuron_data,
+            )
+            .pop();
+
+            let current_transfer = collect_values_for_neuron_from_peekable_range(
+                &mut transfer_iter,
+                main_neuron_id,
+                |(neuron_id, _)| *neuron_id,
+                |(_, transfer)| transfer,
+            )
+            .pop();
+
+            Neuron::from(DecomposedNeuron {
+                id: main_neuron_id,
+                main: abridged_neuron,
+                hot_keys,
+                recent_ballots: ballots,
+                followees: self.reconstitute_followees_from_range(followees.into_iter()),
+                known_neuron_data: current_known_neuron_data,
+                transfer: current_transfer,
+            })
         })
     }
 
@@ -438,6 +514,13 @@ where
         };
         let range = self.followees_map.range(first..=last);
 
+        self.reconstitute_followees_from_range(range)
+    }
+
+    fn reconstitute_followees_from_range(
+        &self,
+        range: impl Iterator<Item = (FolloweesKey, NeuronId)>,
+    ) -> HashMap</* topic ID */ i32, Followees> {
         range
             // create groups for topics
             .group_by(|(followees_key, _followee_id)| followees_key.topic)
@@ -612,6 +695,56 @@ impl Storable for NeuronStakeTransfer {
 // Private Helpers
 // ===============
 
+fn get_range_boundaries<R>(range_bound: R) -> (NeuronId, NeuronId)
+where
+    R: RangeBounds<NeuronId>,
+{
+    let start = match range_bound.start_bound() {
+        RangeBound::Included(start) => *start,
+        RangeBound::Excluded(start) => *start,
+        RangeBound::Unbounded => NeuronId::MIN,
+    };
+    let end = match range_bound.end_bound() {
+        RangeBound::Included(end) => *end,
+        RangeBound::Excluded(end) => *end,
+        RangeBound::Unbounded => NeuronId::MAX,
+    };
+
+    (start, end)
+}
+
+/// Skips until extract_neuron_id(item) == target_neuron_id, then maps corresponding items through
+/// extract_value and returns the result as a vector.
+fn collect_values_for_neuron_from_peekable_range<Iter, T, R, FNeuronId, FValue>(
+    iter: &mut Peekable<Iter>,
+    target_neuron_id: NeuronId,
+    extract_neuron_id: FNeuronId,
+    extract_value: FValue,
+) -> Vec<R>
+where
+    Iter: Iterator<Item = T>,
+    FNeuronId: Fn(&T) -> NeuronId,
+    FValue: Fn(T) -> R,
+{
+    let mut result = vec![];
+
+    while let Some(item) = iter.peek() {
+        let neuron_id = extract_neuron_id(item);
+        if neuron_id > target_neuron_id {
+            break;
+        }
+        let item = iter
+            .next()
+            .expect("Peek had a value, but next did not!  This should be impossible.");
+
+        if neuron_id == target_neuron_id {
+            result.push(extract_value(item));
+        }
+    }
+
+    result
+}
+
 // This is copied from candid/src. Seems like their definition should be public,
 // but it's not. Seems to be an oversight.
 const PRINCIPAL_MAX_LENGTH_IN_BYTES: usize = 29;

diff --git a/rs/nns/governance/src/storage/neurons/neurons_tests.rs b/rs/nns/governance/src/storage/neurons/neurons_tests.rs
@@ -512,3 +512,54 @@ fn test_abridged_neuron_size() {
     // headroom.
     assert_eq!(abridged_neuron.encoded_len(), 184);
 }
+
+#[test]
+fn test_range_neurons_reconstitutes_fully() {
+    let mut store = new_heap_based();
+    let neurons = {
+        let mut neurons = vec![];
+        for i in 1..10 {
+            let neuron = create_model_neuron(i);
+            store.create(neuron.clone()).unwrap();
+            neurons.push(neuron);
+        }
+        neurons
+    };
+
+    let result = store.range_neurons(..).collect::<Vec<_>>();
+
+    assert_eq!(result, neurons);
+}
+
+#[test]
+fn test_range_neurons_ranges_work_correctly() {
+    // This test is here to ensure that the conversions that happen inside range_neurons are correct.
+    let mut store = new_heap_based();
+    let neurons = {
+        let mut neurons = vec![];
+        for i in 1..=10 {
+            let neuron = create_model_neuron(i);
+            store.create(neuron.clone()).unwrap();
+            neurons.push(neuron);
+        }
+        neurons
+    };
+
+    let result = store
+        .range_neurons(NeuronId::from_u64(2)..NeuronId::from_u64(9))
+        .collect::<Vec<_>>();
+    assert_eq!(result, neurons[1..8]);
+
+    let result = store
+        .range_neurons(NeuronId::from_u64(2)..=NeuronId::from_u64(3))
+        .collect::<Vec<_>>();
+    assert_eq!(result, neurons[1..3]);
+
+    let result = store
+        .range_neurons((
+            std::ops::Bound::Excluded(NeuronId::from_u64(2)),
+            std::ops::Bound::Included(NeuronId::from_u64(3)),
+        ))
+        .collect::<Vec<_>>();
+    assert_eq!(result, neurons[2..3]);
+}