Skip to content

Commit

Permalink
Merge branch 'dderler/improve-xnet-test' into 'master'
Browse files Browse the repository at this point in the history
test: Improve XNet compatibility tests

With this change there will be constant XNet load throughout the entire compatibility test. This way it is ensured that there will be messages around during upgrade/downgrade of the subnets and we implicitly test backward/forward compatibility of the related structures. 

See merge request dfinity-lab/public/ic!19831
  • Loading branch information
derlerd-dfinity committed Jun 28, 2024
2 parents d5319fa + 037c679 commit 88f1a3c
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 87 deletions.
71 changes: 53 additions & 18 deletions rs/tests/src/message_routing/compatibility.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@ Runbook::
0. Deploy 1 root and 2 app subnets and install NNS canisters onto root,
all running mainnet version.
1. Bless current version.
2. Run XNet test between two app subnets (success criteria same as for SLO test).
3. Upgrade one app subnet to current version.
4. Run XNet test again.
5. Downgrade back to mainnet.
6. Run XNet test again.
2. Deploy and start XNet test canisters for long running XNet test
3. Run XNet test between two app subnets (success criteria same as for SLO test).
4. Upgrade one app subnet to current version.
5. Run XNet test again.
6. Downgrade back to mainnet.
7. Run XNet test again.
8. Tear down XNet test canisters for long running XNet test and check success
(success conditions for the long running test are more generous, as the main
expected signal is that upgrade/downgrade with messages around will succeed
and no messages are lost)
Success::
1. XNet test successfully completes for all version combinations
Expand All @@ -34,7 +39,7 @@ use crate::orchestrator::utils::{
UpdateImageType,
},
};
use crate::util::block_on;
use crate::util::{block_on, runtime_from_url};
use ic_registry_subnet_type::SubnetType;
use slog::{info, Logger};
use std::time::Duration;
Expand Down Expand Up @@ -118,7 +123,30 @@ pub async fn test_async(env: TestEnv) {
.map(|s| (s.subnet_id, s.clone(), s.nodes().next().unwrap()))
.collect();

let xnet_test_config = xnet_slo_test::Config::new(2, 1, Duration::from_secs(30), 10);
let app_subnet_runtimes = app_subnets
.clone()
.into_iter()
.map(|(_, _, node)| node)
.map(|node| runtime_from_url(node.get_public_url(), node.effective_canister_id()));

let xnet_config = xnet_slo_test::Config::new(2, 1, Duration::from_secs(30), 10);
let long_xnet_config = xnet_slo_test::Config::new_with_custom_thresholds(
2,
1,
// Given that we use `deploy_and_start` and `tear_down` directly
// the runtime parameter will be ignored for the main test run
// and only used when checking the success of the test. We set
// it conservatively low so that the success evaluation is more
// generous.
Duration::from_secs(90),
10,
0.3,
// Given that there are a couple of subnet upgrades happening
// while the long running test is running we are generous
// with error thresholds.
50.0,
40,
);

let mainnet_version = env
.read_dependency_to_string("testnet/mainnet_nns_revision.txt")
Expand Down Expand Up @@ -147,12 +175,17 @@ pub async fn test_async(env: TestEnv) {

info!(&logger, "Blessed all versions.");

info!(&logger, "Starting long running XNet load");
let runtimes = app_subnet_runtimes.clone().collect::<Vec<_>>();
let long_running_canisters =
xnet_slo_test::deploy_and_start(env.clone(), &runtimes, &long_xnet_config, &logger).await;

info!(&logger, "Starting XNet test between 2 app subnets.");

xnet_slo_test::test_async_impl(
env.clone(),
app_subnets.clone().into_iter().map(|(_, _, node)| node),
xnet_test_config.clone(),
app_subnet_runtimes.clone(),
xnet_config.clone(),
&logger,
)
.await;
Expand All @@ -172,8 +205,8 @@ pub async fn test_async(env: TestEnv) {

xnet_slo_test::test_async_impl(
env.clone(),
app_subnets.clone().into_iter().map(|(_, _, node)| node),
xnet_test_config.clone(),
app_subnet_runtimes.clone(),
xnet_config.clone(),
&logger,
)
.await;
Expand All @@ -191,13 +224,15 @@ pub async fn test_async(env: TestEnv) {

info!(&logger, "Starting XNet test between 2 app subnets.");

xnet_slo_test::test_async_impl(
env,
app_subnets.clone().into_iter().map(|(_, _, node)| node),
xnet_test_config,
&logger,
)
.await;
xnet_slo_test::test_async_impl(env, app_subnet_runtimes, xnet_config.clone(), &logger).await;

info!(&logger, "Tearing down long running canisters.");

let metrics = xnet_slo_test::tear_down(&long_running_canisters, &logger).await;
assert!(
xnet_slo_test::check_success(metrics, &long_xnet_config, &logger),
"Long running canisters didn't meet success conditions."
);
}

async fn upgrade_to(
Expand Down
Loading

0 comments on commit 88f1a3c

Please sign in to comment.