Skip to content

Commit

Permalink
refactor(nns): Improve range neurons performance for stable store (#2033
Browse files Browse the repository at this point in the history
)

When scanning neurons in the main StableBTreeMap, instead of doing a
separate traversal of the auxiliary StableBTreeMaps for EACH neuron in
the result set, ranges of each auxiliary StableBTreeMap are used (and
scanned in parallel).

This gets around a 40% performance improvement.

This method, while not currently extensively used, will be used more
after the migration of neurons to stable storage.

Benchmark results before the change:

```
// Output from benchmark test
Benchmark: range_neurons_performance
  total:
    instructions: 47.07 M (improved by 41.47%)
    heap_increase: 0 pages (no change)
    stable_memory_increase: 0 pages (no change)

```

```
range_neurons_performance:
  total:
    instructions: 80415437
    heap_increase: 0
    stable_memory_increase: 0
  scopes: {}
```

After the change (what's in PR):

```
  range_neurons_performance:
    total:
      instructions: 47070657
      heap_increase: 0
      stable_memory_increase: 0
    scopes: {}
```
  • Loading branch information
max-dfinity authored Oct 15, 2024
1 parent 9fc5ab4 commit 4bd8e1d
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 16 deletions.
16 changes: 11 additions & 5 deletions rs/nns/governance/canbench/canbench_results.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,32 @@
benches:
add_neuron_active_maximum:
total:
instructions: 45003037
instructions: 35991301
heap_increase: 1
stable_memory_increase: 0
scopes: {}
add_neuron_active_typical:
total:
instructions: 2703302
instructions: 1836362
heap_increase: 0
stable_memory_increase: 0
scopes: {}
add_neuron_inactive_maximum:
total:
instructions: 122465697
instructions: 98457016
heap_increase: 1
stable_memory_increase: 0
scopes: {}
add_neuron_inactive_typical:
total:
instructions: 10581822
instructions: 7574043
heap_increase: 0
stable_memory_increase: 0
scopes: {}
version: 0.1.3
range_neurons_performance:
total:
instructions: 47070657
heap_increase: 0
stable_memory_increase: 0
scopes: {}
version: 0.1.7
33 changes: 26 additions & 7 deletions rs/nns/governance/src/neuron_store/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,17 @@ fn build_neuron(rng: &mut impl RngCore, location: NeuronLocation, size: NeuronSi
neuron
}

fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore {
fn set_up_neuron_store(
rng: &mut impl RngCore,
active_count: u64,
inactive_count: u64,
) -> NeuronStore {
// We insert 200 inactive neurons and 100 active neurons. They are not very realistic sizes, but
// it would take too long to prepare those neurons for each benchmark.
let inactive_neurons: Vec<_> = (0..200)
let inactive_neurons: Vec<_> = (0..inactive_count)
.map(|_| build_neuron(rng, NeuronLocation::Stable, NeuronSize::Typical))
.collect();
let active_neurons: Vec<_> = (0..100)
let active_neurons: Vec<_> = (0..active_count)
.map(|_| build_neuron(rng, NeuronLocation::Heap, NeuronSize::Typical))
.collect();
let neurons: BTreeMap<u64, Neuron> = inactive_neurons
Expand All @@ -138,7 +142,7 @@ fn set_up_neuron_store(rng: &mut impl RngCore) -> NeuronStore {
#[bench(raw)]
fn add_neuron_active_typical() -> BenchResult {
let mut rng = new_rng();
let mut neuron_store = set_up_neuron_store(&mut rng);
let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Typical);

bench_fn(|| {
Expand All @@ -149,7 +153,7 @@ fn add_neuron_active_typical() -> BenchResult {
#[bench(raw)]
fn add_neuron_active_maximum() -> BenchResult {
let mut rng = new_rng();
let mut neuron_store = set_up_neuron_store(&mut rng);
let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
let neuron = build_neuron(&mut rng, NeuronLocation::Heap, NeuronSize::Maximum);

bench_fn(|| {
Expand All @@ -160,7 +164,7 @@ fn add_neuron_active_maximum() -> BenchResult {
#[bench(raw)]
fn add_neuron_inactive_typical() -> BenchResult {
let mut rng = new_rng();
let mut neuron_store = set_up_neuron_store(&mut rng);
let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Typical);

bench_fn(|| {
Expand All @@ -171,10 +175,25 @@ fn add_neuron_inactive_typical() -> BenchResult {
#[bench(raw)]
fn add_neuron_inactive_maximum() -> BenchResult {
let mut rng = new_rng();
let mut neuron_store = set_up_neuron_store(&mut rng);
let mut neuron_store = set_up_neuron_store(&mut rng, 100, 200);
let neuron = build_neuron(&mut rng, NeuronLocation::Stable, NeuronSize::Maximum);

bench_fn(|| {
neuron_store.add_neuron(neuron).unwrap();
})
}

#[bench(raw)]
fn range_neurons_performance() -> BenchResult {
let mut rng = new_rng();
let _neuron_store = set_up_neuron_store(&mut rng, 100, 200);

bench_fn(|| {
with_stable_neuron_store(|stable_store| {
let iter = stable_store.range_neurons(..);
for n in iter {
n.id();
}
});
})
}
141 changes: 137 additions & 4 deletions rs/nns/governance/src/storage/neurons.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ use prost::Message;
use std::{
borrow::Cow,
collections::{BTreeMap as HeapBTreeMap, BTreeSet as HeapBTreeSet, HashMap},
ops::RangeBounds,
iter::Peekable,
ops::{Bound as RangeBound, RangeBounds},
};

// Because many arguments are needed to construct a StableNeuronStore, there is
Expand Down Expand Up @@ -343,10 +344,85 @@ where
/// Returns the next neuron_id equal to or higher than the provided neuron_id
pub fn range_neurons<R>(&self, range: R) -> impl Iterator<Item = Neuron> + '_
where
R: RangeBounds<NeuronId>,
R: RangeBounds<NeuronId> + Clone,
{
self.main.range(range).map(|(neuron_id, neuron)| {
self.reconstitute_neuron(neuron_id, neuron, NeuronSections::all())
let (start, end) = get_range_boundaries(range.clone());

// We want our ranges for sub iterators to include start and end
let hotkeys_range = (start, u64::MIN)..=(end, u64::MAX);
let ballots_range = (start, u64::MIN)..=(end, u64::MAX);

let followees_range = FolloweesKey {
follower_id: start,
..FolloweesKey::MIN
}..=FolloweesKey {
follower_id: end,
..FolloweesKey::MAX
};

// Instead of randomly accessing each map (which is expensive for StableBTreeMaps), we
// use a range query on each map, and iterate all of the ranges at the same time. This
// uses 40% less instructions compared to just iterating on the top level range, and
// accessing the other maps for each neuron_id.
// This is only possible because EVERY range begins with NeuronId, so that their ordering
// is the same in respect to the main range's neurons.

let main_range = self.main.range(range.clone());
let mut hot_keys_iter = self.hot_keys_map.range(hotkeys_range).peekable();
let mut recent_ballots_iter = self.recent_ballots_map.range(ballots_range).peekable();
let mut followees_iter = self.followees_map.range(followees_range).peekable();
let mut known_neuron_data_iter = self.known_neuron_data_map.range(range.clone()).peekable();
let mut transfer_iter = self.transfer_map.range(range).peekable();

main_range.map(move |(main_neuron_id, abridged_neuron)| {
// We'll collect data from all relevant maps for this neuron_id

let hot_keys = collect_values_for_neuron_from_peekable_range(
&mut hot_keys_iter,
main_neuron_id,
|((neuron_id, _), _)| *neuron_id,
|((_, _), principal)| PrincipalId::from(principal),
);

let ballots = collect_values_for_neuron_from_peekable_range(
&mut recent_ballots_iter,
main_neuron_id,
|((neuron_id, _), _)| *neuron_id,
|((_, _), ballot_info)| ballot_info,
);

let followees = collect_values_for_neuron_from_peekable_range(
&mut followees_iter,
main_neuron_id,
|(followees_key, _)| followees_key.follower_id,
|x| x,
);

let current_known_neuron_data = collect_values_for_neuron_from_peekable_range(
&mut known_neuron_data_iter,
main_neuron_id,
|(neuron_id, _)| *neuron_id,
|(_, known_neuron_data)| known_neuron_data,
)
.pop();

let current_transfer = collect_values_for_neuron_from_peekable_range(
&mut transfer_iter,
main_neuron_id,
|(neuron_id, _)| *neuron_id,
|(_, transfer)| transfer,
)
.pop();

Neuron::from(DecomposedNeuron {
id: main_neuron_id,
main: abridged_neuron,
hot_keys,
recent_ballots: ballots,
followees: self.reconstitute_followees_from_range(followees.into_iter()),
known_neuron_data: current_known_neuron_data,
transfer: current_transfer,
})
})
}

Expand Down Expand Up @@ -438,6 +514,13 @@ where
};
let range = self.followees_map.range(first..=last);

self.reconstitute_followees_from_range(range)
}

fn reconstitute_followees_from_range(
&self,
range: impl Iterator<Item = (FolloweesKey, NeuronId)>,
) -> HashMap</* topic ID */ i32, Followees> {
range
// create groups for topics
.group_by(|(followees_key, _followee_id)| followees_key.topic)
Expand Down Expand Up @@ -612,6 +695,56 @@ impl Storable for NeuronStakeTransfer {
// Private Helpers
// ===============

fn get_range_boundaries<R>(range_bound: R) -> (NeuronId, NeuronId)
where
R: RangeBounds<NeuronId>,
{
let start = match range_bound.start_bound() {
RangeBound::Included(start) => *start,
RangeBound::Excluded(start) => *start,
RangeBound::Unbounded => NeuronId::MIN,
};
let end = match range_bound.end_bound() {
RangeBound::Included(end) => *end,
RangeBound::Excluded(end) => *end,
RangeBound::Unbounded => NeuronId::MAX,
};

(start, end)
}

/// Skips until extract_neuron_id(item) == target_neuron_id, then maps corresponding items through
/// extract_value and returns the result as a vector.
fn collect_values_for_neuron_from_peekable_range<Iter, T, R, FNeuronId, FValue>(
iter: &mut Peekable<Iter>,
target_neuron_id: NeuronId,
extract_neuron_id: FNeuronId,
extract_value: FValue,
) -> Vec<R>
where
Iter: Iterator<Item = T>,
FNeuronId: Fn(&T) -> NeuronId,
FValue: Fn(T) -> R,
{
let mut result = vec![];

while let Some(item) = iter.peek() {
let neuron_id = extract_neuron_id(item);
if neuron_id > target_neuron_id {
break;
}
let item = iter
.next()
.expect("Peek had a value, but next did not! This should be impossible.");

if neuron_id == target_neuron_id {
result.push(extract_value(item));
}
}

result
}

// This is copied from candid/src. Seems like their definition should be public,
// but it's not. Seems to be an oversight.
const PRINCIPAL_MAX_LENGTH_IN_BYTES: usize = 29;
Expand Down
51 changes: 51 additions & 0 deletions rs/nns/governance/src/storage/neurons/neurons_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,54 @@ fn test_abridged_neuron_size() {
// headroom.
assert_eq!(abridged_neuron.encoded_len(), 184);
}

#[test]
fn test_range_neurons_reconstitutes_fully() {
let mut store = new_heap_based();
let neurons = {
let mut neurons = vec![];
for i in 1..10 {
let neuron = create_model_neuron(i);
store.create(neuron.clone()).unwrap();
neurons.push(neuron);
}
neurons
};

let result = store.range_neurons(..).collect::<Vec<_>>();

assert_eq!(result, neurons);
}

#[test]
fn test_range_neurons_ranges_work_correctly() {
// This test is here to ensure that the conversions that happen inside range_neurons are correct.
let mut store = new_heap_based();
let neurons = {
let mut neurons = vec![];
for i in 1..=10 {
let neuron = create_model_neuron(i);
store.create(neuron.clone()).unwrap();
neurons.push(neuron);
}
neurons
};

let result = store
.range_neurons(NeuronId::from_u64(2)..NeuronId::from_u64(9))
.collect::<Vec<_>>();
assert_eq!(result, neurons[1..8]);

let result = store
.range_neurons(NeuronId::from_u64(2)..=NeuronId::from_u64(3))
.collect::<Vec<_>>();
assert_eq!(result, neurons[1..3]);

let result = store
.range_neurons((
std::ops::Bound::Excluded(NeuronId::from_u64(2)),
std::ops::Bound::Included(NeuronId::from_u64(3)),
))
.collect::<Vec<_>>();
assert_eq!(result, neurons[2..3]);
}

0 comments on commit 4bd8e1d

Please sign in to comment.