From 6b09303dadb4ef0461fb541b65a7138b187669d8 Mon Sep 17 00:00:00 2001 From: Leo Eichhorn <99166915+eichhorl@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:32:58 +0100 Subject: [PATCH 1/3] refactor: CON-1434 Serialize current and next transcripts into `pb::NiDkgTranscript` (#3039) As the `NiDkgId` (and therefore the `NiDkgTag`) is already part of the `NiDkgTranscript`, serializing the `NiDkgTag` separately as part of `pb::TaggedNiDkgTranscript` is redundant. This PR instead serializes the current and next transcripts directly as `pb::NiDkgTranscript`. https://github.com/dfinity/ic/pull/2838 introduced the new block payload proto fields of type `pb::NiDkgTranscript`, and implemented the deserialization function to consider both new and old fields. This ensures the backwards/forwards compatibility of the change made by this PR. During upgrade: - The old summary still contains `pb::TaggedNiDkgTranscript`s, but they can still be read as the deserialization function supports both types. - Newly created summary blocks will instead contain `pb::NiDkgTranscript`s directly. During downgrade: - As the deserialization function supporting both types already exists in the previous version, Summary blocks created by the version of this PR (using `pb::NiDkgTranscript`) will be deserialized successfully. - Newly created summary blocks on the old version will then again contain `pb::TaggedNiDkgTranscript`s instead. Once this change is deployed to all mainnet subnets, we may remove the deprecated `pb::TaggedNiDkgTranscript` fields entirely. Additionally, this change forces us to fix a state machine setup function in `rs/state_machine_tests/src/lib.rs`, which incorrectly marked both genesis NiDkg transcripts as "high threshold". Therefore, this test setup function broke the (now explicit) invariant that for each (tag, transcript) in `block.dkg_transcript_map` it holds that `tag == transcript.tag`. --- rs/state_machine_tests/src/lib.rs | 8 ++++++-- rs/types/types/src/consensus/dkg.rs | 25 ++++++++----------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/rs/state_machine_tests/src/lib.rs b/rs/state_machine_tests/src/lib.rs index 89545047162..d05a36ab8a6 100644 --- a/rs/state_machine_tests/src/lib.rs +++ b/rs/state_machine_tests/src/lib.rs @@ -422,9 +422,13 @@ fn make_nodes_registry( .build(); // Insert initial DKG transcripts + let mut high_threshold_transcript = ni_dkg_transcript.clone(); + high_threshold_transcript.dkg_id.dkg_tag = NiDkgTag::HighThreshold; + let mut low_threshold_transcript = ni_dkg_transcript; + low_threshold_transcript.dkg_id.dkg_tag = NiDkgTag::LowThreshold; let cup_contents = CatchUpPackageContents { - initial_ni_dkg_transcript_high_threshold: Some(ni_dkg_transcript.clone().into()), - initial_ni_dkg_transcript_low_threshold: Some(ni_dkg_transcript.into()), + initial_ni_dkg_transcript_high_threshold: Some(high_threshold_transcript.into()), + initial_ni_dkg_transcript_low_threshold: Some(low_threshold_transcript.into()), ..Default::default() }; registry_data_provider diff --git a/rs/types/types/src/consensus/dkg.rs b/rs/types/types/src/consensus/dkg.rs index 11615ed5719..5447b27b5cf 100644 --- a/rs/types/types/src/consensus/dkg.rs +++ b/rs/types/types/src/consensus/dkg.rs @@ -293,19 +293,12 @@ impl Summary { } } -fn build_tagged_transcripts_vec( +fn build_transcripts_vec( transcripts: &BTreeMap, -) -> Vec { +) -> Vec { transcripts - .iter() - .map(|(tag, transcript)| pb::TaggedNiDkgTranscript { - tag: pb::NiDkgTag::from(tag) as i32, - transcript: Some(pb::NiDkgTranscript::from(transcript)), - key_id: match tag { - NiDkgTag::HighThresholdForKey(k) => Some(pb::MasterPublicKeyId::from(k)), - _ => None, - }, - }) + .values() + .map(pb::NiDkgTranscript::from) .collect() } @@ -355,12 +348,10 @@ impl From<&Summary> for pb::Summary { .values() .map(pb::NiDkgConfig::from) .collect(), - current_transcripts_deprecated: build_tagged_transcripts_vec( - &summary.current_transcripts, - ), - current_transcripts_new: Vec::new(), - next_transcripts_deprecated: build_tagged_transcripts_vec(&summary.next_transcripts), - next_transcripts_new: Vec::new(), + current_transcripts_new: build_transcripts_vec(&summary.current_transcripts), + current_transcripts_deprecated: Vec::new(), + next_transcripts_new: build_transcripts_vec(&summary.next_transcripts), + next_transcripts_deprecated: Vec::new(), interval_length: summary.interval_length.get(), next_interval_length: summary.next_interval_length.get(), height: summary.height.get(), From 0b30ddf64b77adf41ac0404abc5b67874998c6f9 Mon Sep 17 00:00:00 2001 From: Marko Kosmerl Date: Tue, 21 Jan 2025 09:52:09 -0300 Subject: [PATCH 2/3] chore(system-tests-k8s): misc changes (#3541) * remove docker login * run system tests 4 times during the night time * also run longer system tests on manual dispatch * set concurrency --- .github/workflows/system-tests-k8s.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/system-tests-k8s.yml b/.github/workflows/system-tests-k8s.yml index 7eab15df6d5..15901a233ad 100644 --- a/.github/workflows/system-tests-k8s.yml +++ b/.github/workflows/system-tests-k8s.yml @@ -8,7 +8,10 @@ name: System Tests K8s on: schedule: - - cron: "0 3 * * *" + - cron: "0 1 * * *" # Run at 1 AM + - cron: "0 3 * * *" # Run at 3 AM + - cron: "0 5 * * *" # Run at 5 AM + - cron: "0 7 * * *" # Run at 7 AM pull_request: paths: - '.github/workflows/system-tests-k8s.yml' @@ -23,6 +26,10 @@ on: required: false default: '10' +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name != 'schedule' }} + env: TARGETS: | ${{ github.event_name == 'schedule' && '//...' || @@ -108,20 +115,13 @@ jobs: --privileged --cgroupns host -v /cache:/cache -v /var/sysimage:/var/sysimage -v /var/tmp:/var/tmp timeout-minutes: 150 + # always run after 'system-tests-k8s' job only on manual dispatch and schedule + if: ${{ always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') }} needs: [system-tests-k8s] - # always run after 'system-tests-k8s' job but on schedule event only - if: ${{ always() && github.event_name == 'schedule' }} steps: - name: Checkout uses: actions/checkout@v4 - - name: Login to Dockerhub - shell: bash - run: ./ci/scripts/docker-login.sh - env: - DOCKER_HUB_USER: ${{ vars.DOCKER_HUB_USER }} - DOCKER_HUB_PASSWORD_RO: ${{ secrets.DOCKER_HUB_PASSWORD_RO }} - - name: Set KUBECONFIG shell: bash run: | @@ -158,7 +158,7 @@ jobs: BAZEL_COMMAND: "test" BAZEL_TARGETS: "${{ env.TARGETS }}" BAZEL_CI_CONFIG: "--config=ci --repository_cache=/cache/bazel" - BAZEL_EXTRA_ARGS: "--local_test_jobs=${{ env.JOBS }} --test_tag_filters=k8s --k8s" + BAZEL_EXTRA_ARGS: "--local_test_jobs=${{ env.JOBS }} --test_tag_filters=k8s --k8s --flaky_test_attempts=3" BUILDEVENT_APIKEY: ${{ secrets.HONEYCOMB_API_TOKEN }} - name: Upload bazel-bep From 82576adb0eb2f5ec740bf85e885d111d730db9a7 Mon Sep 17 00:00:00 2001 From: Dimitris Sarlis Date: Tue, 21 Jan 2025 14:02:01 +0100 Subject: [PATCH 3/3] fix: Correct the value returned for replicated vs non replicated execution in system api (#3540) This PR fixes some bugs in how the system API was reporting replicated vs non-replicated mode. Specifically, it was incorrectly reporting that `inspect_message`, a composite query `reply_callback` and `reject_callback` were running in replicated mode, although they are really running in non-replicated mode. --- rs/system_api/src/lib.rs | 11 +++++---- rs/system_api/tests/common/mod.rs | 25 +++++++++++++++++++++ rs/system_api/tests/system_api.rs | 37 +++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/rs/system_api/src/lib.rs b/rs/system_api/src/lib.rs index 4925252e6f7..aa12d435620 100644 --- a/rs/system_api/src/lib.rs +++ b/rs/system_api/src/lib.rs @@ -3396,15 +3396,18 @@ impl SystemApi for SystemApiImpl { let result = match &self.api_type { ApiType::Start { .. } | ApiType::Init { .. } - | ApiType::ReplyCallback { .. } - | ApiType::RejectCallback { .. } | ApiType::Cleanup { .. } | ApiType::PreUpgrade { .. } - | ApiType::InspectMessage { .. } | ApiType::Update { .. } | ApiType::SystemTask { .. } | ApiType::ReplicatedQuery { .. } => Ok(1), - ApiType::NonReplicatedQuery { .. } => Ok(0), + ApiType::ReplyCallback { .. } | ApiType::RejectCallback { .. } => { + match self.execution_parameters.execution_mode { + ExecutionMode::NonReplicated => Ok(0), + ExecutionMode::Replicated => Ok(1), + } + } + ApiType::InspectMessage { .. } | ApiType::NonReplicatedQuery { .. } => Ok(0), }; trace_syscall!(self, ic0_in_replicated_execution, result); result diff --git a/rs/system_api/tests/common/mod.rs b/rs/system_api/tests/common/mod.rs index 84511a5dc29..043f096a127 100644 --- a/rs/system_api/tests/common/mod.rs +++ b/rs/system_api/tests/common/mod.rs @@ -99,6 +99,10 @@ impl ApiTypeBuilder { ) } + pub fn build_replicated_query_api() -> ApiType { + ApiType::replicated_query(UNIX_EPOCH, vec![], user_test_id(1).get()) + } + pub fn build_non_replicated_query_api() -> ApiType { ApiType::non_replicated_query( UNIX_EPOCH, @@ -175,6 +179,27 @@ impl ApiTypeBuilder { 0.into(), ) } + + pub fn build_inspect_message_api() -> ApiType { + ApiType::inspect_message( + PrincipalId::new_anonymous(), + "test".to_string(), + vec![], + UNIX_EPOCH, + ) + } + + pub fn build_start_api() -> ApiType { + ApiType::start(UNIX_EPOCH) + } + + pub fn build_init_api() -> ApiType { + ApiType::init(UNIX_EPOCH, vec![], user_test_id(1).get()) + } + + pub fn build_pre_upgrade_api() -> ApiType { + ApiType::pre_upgrade(UNIX_EPOCH, user_test_id(1).get()) + } } pub fn get_system_api( diff --git a/rs/system_api/tests/system_api.rs b/rs/system_api/tests/system_api.rs index 5cb55e539c1..8e47ccf54ff 100644 --- a/rs/system_api/tests/system_api.rs +++ b/rs/system_api/tests/system_api.rs @@ -2069,3 +2069,40 @@ fn test_save_log_message_keeps_total_log_size_limited() { assert_eq!(log.records().len(), initial_records_number + 1); assert_le!(log.used_space(), MAX_ALLOWED_CANISTER_LOG_BUFFER_SIZE); } + +#[test] +fn in_replicated_execution_works_correctly() { + // The following should execute in replicated mode. + for api_type in &[ + ApiTypeBuilder::build_update_api(), + ApiTypeBuilder::build_system_task_api(), + ApiTypeBuilder::build_start_api(), + ApiTypeBuilder::build_init_api(), + ApiTypeBuilder::build_pre_upgrade_api(), + ApiTypeBuilder::build_replicated_query_api(), + ApiTypeBuilder::build_reply_api(Cycles::new(0)), + ApiTypeBuilder::build_reject_api(RejectContext::new(RejectCode::CanisterReject, "error")), + ] { + let cycles_account_manager = CyclesAccountManagerBuilder::new().build(); + let system_state = SystemStateBuilder::default().build(); + let api = get_system_api(api_type.clone(), &system_state, cycles_account_manager); + assert_eq!(api.ic0_in_replicated_execution(), Ok(1)); + } + + // The following should execute in non-replicated mode. + for api_type in &[ + ApiTypeBuilder::build_non_replicated_query_api(), + ApiTypeBuilder::build_composite_query_api(), + ApiTypeBuilder::build_composite_reply_api(Cycles::new(0)), + ApiTypeBuilder::build_composite_reject_api(RejectContext::new( + RejectCode::CanisterReject, + "error", + )), + ApiTypeBuilder::build_inspect_message_api(), + ] { + let cycles_account_manager = CyclesAccountManagerBuilder::new().build(); + let system_state = SystemStateBuilder::default().build(); + let api = get_system_api(api_type.clone(), &system_state, cycles_account_manager); + assert_eq!(api.ic0_in_replicated_execution(), Ok(0)); + } +}