diff --git a/rs/consensus/src/consensus/block_maker.rs b/rs/consensus/src/consensus/block_maker.rs index 51fd32b5365a..3cffb4a1fb8d 100755 --- a/rs/consensus/src/consensus/block_maker.rs +++ b/rs/consensus/src/consensus/block_maker.rs @@ -1,12 +1,12 @@ #![deny(missing_docs)] use crate::{ consensus::{ - metrics::{BlockMakerMetrics, EcdsaPayloadMetrics}, + metrics::BlockMakerMetrics, status::{self, Status}, ConsensusCrypto, }, dkg::payload_builder::create_payload as create_dkg_payload, - ecdsa, + ecdsa::{self, metrics::EcdsaPayloadMetrics}, }; use ic_consensus_utils::{ find_lowest_ranked_proposals, get_block_hash_string, get_notarization_delay_settings, diff --git a/rs/consensus/src/consensus/metrics.rs b/rs/consensus/src/consensus/metrics.rs index 55901f9614c8..1417bc8c0e18 100644 --- a/rs/consensus/src/consensus/metrics.rs +++ b/rs/consensus/src/consensus/metrics.rs @@ -1,7 +1,6 @@ use ic_consensus_utils::{get_block_hash_string, pool_reader::PoolReader}; use ic_https_outcalls_consensus::payload_builder::CanisterHttpBatchStats; use ic_interfaces::ingress_manager::IngressSelector; -use ic_management_canister_types::MasterPublicKeyId; use ic_metrics::{ buckets::{decimal_buckets, decimal_buckets_with_zero, linear_buckets}, MetricsRegistry, @@ -9,10 +8,7 @@ use ic_metrics::{ use ic_types::{ batch::BatchPayload, consensus::{ - idkg::{ - CompletedReshareRequest, CompletedSignature, EcdsaPayload, HasMasterPublicKeyId, - KeyTranscriptCreation, - }, + idkg::{CompletedReshareRequest, CompletedSignature, EcdsaPayload, KeyTranscriptCreation}, Block, BlockPayload, BlockProposal, ConsensusMessageHashable, HasHeight, HasRank, }, CountBytes, Height, @@ -20,7 +16,12 @@ use ic_types::{ use prometheus::{ GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, }; -use std::{collections::BTreeMap, sync::RwLock}; +use std::sync::RwLock; + +use crate::ecdsa::metrics::{ + count_by_master_public_key_id, expected_keys, key_id_label, CounterPerMasterPublicKeyId, + ECDSA_KEY_ID_LABEL, +}; // For certain metrics, we record metrics based on block's rank. // Since we can only record limited number of them, the follow is @@ -30,9 +31,6 @@ const RANKS_TO_RECORD: [&str; 6] = ["0", "1", "2", "3", "4", "5"]; pub(crate) const CRITICAL_ERROR_PAYLOAD_TOO_LARGE: &str = "consensus_payload_too_large"; pub(crate) const CRITICAL_ERROR_VALIDATION_NOT_PASSED: &str = "consensus_validation_not_passed"; pub(crate) const CRITICAL_ERROR_SUBNET_RECORD_ISSUE: &str = "consensus_subnet_record_issue"; -pub(crate) const CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING: &str = "ecdsa_key_transcript_missing"; -pub(crate) const CRITICAL_ERROR_ECDSA_RETAIN_ACTIVE_TRANSCRIPTS: &str = - "ecdsa_retain_active_transcripts_error"; pub struct BlockMakerMetrics { pub get_payload_calls: IntCounterVec, @@ -159,8 +157,6 @@ impl BatchStats { } } -type CounterPerMasterPublicKeyId = BTreeMap; - // Ecdsa payload stats pub struct EcdsaStats { pub signature_agreements: usize, @@ -222,25 +218,6 @@ impl From<&EcdsaPayload> for EcdsaStats { } } -fn count_by_master_public_key_id( - collection: impl Iterator, - expected_keys: &[MasterPublicKeyId], -) -> CounterPerMasterPublicKeyId { - let mut counter_per_key_id = CounterPerMasterPublicKeyId::new(); - - // To properly report `0` for ecdsa keys which do not appear in the `collection`, we insert the - // default values for all the ecdsa keys which we expect to see in the payload. - for key in expected_keys { - counter_per_key_id.insert(key.clone(), 0); - } - - for item in collection { - *counter_per_key_id.entry(item.key_id().clone()).or_default() += 1; - } - - counter_per_key_id -} - pub struct FinalizerMetrics { pub batches_delivered: IntCounterVec, pub batch_height: IntGauge, @@ -261,8 +238,6 @@ pub struct FinalizerMetrics { pub canister_http_divergences_delivered: IntCounter, } -const ECDSA_KEY_ID_LABEL: &str = "key_id"; - impl FinalizerMetrics { pub fn new(metrics_registry: MetricsRegistry) -> Self { Self { @@ -405,10 +380,6 @@ impl FinalizerMetrics { } } -fn key_id_label(key_id: Option<&MasterPublicKeyId>) -> String { - key_id.map(ToString::to_string).unwrap_or_default() -} - pub struct NotaryMetrics { pub time_to_notary_sign: HistogramVec, } @@ -669,449 +640,3 @@ impl PurgerMetrics { } } } - -#[derive(Clone)] -pub struct EcdsaClientMetrics { - pub on_state_change_duration: HistogramVec, - pub client_metrics: IntCounterVec, - pub client_errors: IntCounterVec, - /// critical error when retain_active_transcripts fails - pub critical_error_ecdsa_retain_active_transcripts: IntCounter, -} - -impl EcdsaClientMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - on_state_change_duration: metrics_registry.histogram_vec( - "ecdsa_on_state_change_duration_seconds", - "The time it took to execute ECDSA on_state_change(), in seconds", - // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, - // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s - decimal_buckets(-4, 2), - &["sub_component"], - ), - client_metrics: metrics_registry.int_counter_vec( - "ecdsa_client_metrics", - "ECDSA client related metrics", - &["type"], - ), - client_errors: metrics_registry.int_counter_vec( - "ecdsa_client_errors", - "ECDSA client related errors", - &["type"], - ), - critical_error_ecdsa_retain_active_transcripts: metrics_registry - .error_counter(CRITICAL_ERROR_ECDSA_RETAIN_ACTIVE_TRANSCRIPTS), - } - } -} - -#[derive(Clone)] -pub struct EcdsaGossipMetrics { - pub dropped_adverts: IntCounterVec, -} - -impl EcdsaGossipMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - dropped_adverts: metrics_registry.int_counter_vec( - "ecdsa_priority_fn_dropped_adverts", - "ECDSA adverts dropped by priority fn", - &["type"], - ), - } - } -} - -#[derive(Clone)] -pub struct EcdsaPreSignerMetrics { - pub on_state_change_duration: HistogramVec, - pub pre_sign_metrics: IntCounterVec, - pub pre_sign_errors: IntCounterVec, -} - -impl EcdsaPreSignerMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - on_state_change_duration: metrics_registry.histogram_vec( - "ecdsa_pre_signer_on_state_change_duration_seconds", - "The time it took to execute pre-signer on_state_change(), in seconds", - // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, - // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s - decimal_buckets(-4, 2), - &["sub_component"], - ), - pre_sign_metrics: metrics_registry.int_counter_vec( - "ecdsa_pre_signer_metrics", - "Pre-signing related metrics", - &["type"], - ), - pre_sign_errors: metrics_registry.int_counter_vec( - "ecdsa_pre_signer_errors", - "Pre-signing related errors", - &["type"], - ), - } - } - - pub fn pre_sign_metrics_inc(&self, label: &str) { - self.pre_sign_metrics.with_label_values(&[label]).inc(); - } - - pub fn pre_sign_errors_inc(&self, label: &str) { - self.pre_sign_errors.with_label_values(&[label]).inc(); - } -} - -#[derive(Clone)] -pub struct EcdsaSignerMetrics { - pub on_state_change_duration: HistogramVec, - pub sign_metrics: IntCounterVec, - pub sign_errors: IntCounterVec, -} - -impl EcdsaSignerMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - on_state_change_duration: metrics_registry.histogram_vec( - "ecdsa_signer_on_state_change_duration_seconds", - "The time it took to execute signer on_state_change(), in seconds", - // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, - // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s - decimal_buckets(-4, 2), - &["sub_component"], - ), - sign_metrics: metrics_registry.int_counter_vec( - "ecdsa_signer_metrics", - "Signing related metrics", - &["type"], - ), - sign_errors: metrics_registry.int_counter_vec( - "ecdsa_signer_errors", - "Signing related errors", - &["type"], - ), - } - } - pub fn sign_metrics_inc(&self, label: &str) { - self.sign_metrics.with_label_values(&[label]).inc(); - } - - pub fn sign_errors_inc(&self, label: &str) { - self.sign_errors.with_label_values(&[label]).inc(); - } -} - -pub(crate) struct EcdsaPayloadMetrics { - payload_metrics: IntGaugeVec, - payload_errors: IntCounterVec, - transcript_builder_metrics: IntCounterVec, - transcript_builder_errors: IntCounterVec, - pub(crate) transcript_builder_duration: HistogramVec, - /// Critical error for failure to create/reshare key transcript - pub(crate) critical_error_ecdsa_key_transcript_missing: IntCounter, -} - -impl EcdsaPayloadMetrics { - pub(crate) fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - payload_metrics: metrics_registry.int_gauge_vec( - "ecdsa_payload_metrics", - "ECDSA payload related metrics", - &["type", ECDSA_KEY_ID_LABEL], - ), - payload_errors: metrics_registry.int_counter_vec( - "ecdsa_payload_errors", - "ECDSA payload related errors", - &["type"], - ), - transcript_builder_metrics: metrics_registry.int_counter_vec( - "ecdsa_transcript_builder_metrics", - "ECDSA transcript builder metrics", - &["type"], - ), - transcript_builder_errors: metrics_registry.int_counter_vec( - "ecdsa_transcript_builder_errors", - "ECDSA transcript builder related errors", - &["type"], - ), - transcript_builder_duration: metrics_registry.histogram_vec( - "ecdsa_transcript_builder_duration_seconds", - "Time taken by transcript builder, in seconds", - // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, - // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s - decimal_buckets(-4, 2), - &["sub_component"], - ), - critical_error_ecdsa_key_transcript_missing: metrics_registry - .error_counter(CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING), - } - } - - pub(crate) fn report(&self, payload: &EcdsaPayload) { - let expected_keys = expected_keys(payload); - - self.payload_metrics_set_without_key_id_label( - "signature_agreements", - payload.signature_agreements.len(), - ); - self.payload_metrics_set( - "available_quadruples", - count_by_master_public_key_id( - payload.available_pre_signatures.values(), - &expected_keys, - ), - ); - self.payload_metrics_set( - "quadruples_in_creation", - count_by_master_public_key_id( - payload.pre_signatures_in_creation.values(), - &expected_keys, - ), - ); - self.payload_metrics_set( - "ongoing_xnet_reshares", - count_by_master_public_key_id(payload.ongoing_xnet_reshares.keys(), &expected_keys), - ); - self.payload_metrics_set( - "xnet_reshare_agreements", - count_by_master_public_key_id(payload.xnet_reshare_agreements.keys(), &expected_keys), - ); - self.payload_metrics_set_without_key_id_label("payload_layout_multiple_keys", 1); - self.payload_metrics_set_without_key_id_label( - "payload_layout_generalized_pre_signatures", - 1, - ); - self.payload_metrics_set_without_key_id_label( - "key_transcripts", - payload.key_transcripts.len(), - ); - self.payload_metrics_set_without_key_id_label( - "key_transcripts_with_ecdsa_key_id", - payload - .key_transcripts - .values() - .filter(|k| k.deprecated_key_id.is_some()) - .count(), - ); - } - - fn payload_metrics_set_without_key_id_label(&self, label: &str, value: usize) { - self.payload_metrics - .with_label_values(&[label, /*key_id=*/ ""]) - .set(value as i64); - } - - fn payload_metrics_set(&self, label: &str, values: CounterPerMasterPublicKeyId) { - for (key_id, value) in values { - self.payload_metrics - .with_label_values(&[label, &key_id_label(Some(&key_id))]) - .set(value as i64); - } - } - - pub(crate) fn payload_metrics_inc(&self, label: &str, key_id: Option<&MasterPublicKeyId>) { - self.payload_metrics - .with_label_values(&[label, &key_id_label(key_id)]) - .inc(); - } - - pub(crate) fn payload_errors_inc(&self, label: &str) { - self.payload_errors.with_label_values(&[label]).inc(); - } - - pub(crate) fn transcript_builder_metrics_inc(&self, label: &str) { - self.transcript_builder_metrics - .with_label_values(&[label]) - .inc(); - } - - pub(crate) fn transcript_builder_metrics_inc_by(&self, value: u64, label: &str) { - self.transcript_builder_metrics - .with_label_values(&[label]) - .inc_by(value); - } - - pub(crate) fn transcript_builder_errors_inc(&self, label: &str) { - self.transcript_builder_errors - .with_label_values(&[label]) - .inc(); - } -} - -pub fn timed_call(label: &str, call_fn: F, metric: &HistogramVec) -> R -where - F: FnOnce() -> R, -{ - let _timer = metric.with_label_values(&[label]).start_timer(); - (call_fn)() -} - -#[derive(Clone)] -pub struct EcdsaComplaintMetrics { - pub on_state_change_duration: HistogramVec, - pub complaint_metrics: IntCounterVec, - pub complaint_errors: IntCounterVec, -} - -impl EcdsaComplaintMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - on_state_change_duration: metrics_registry.histogram_vec( - "ecdsa_complaint_on_state_change_duration_seconds", - "The time it took to execute complaint on_state_change(), in seconds", - // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, - // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s - decimal_buckets(-4, 2), - &["sub_component"], - ), - complaint_metrics: metrics_registry.int_counter_vec( - "ecdsa_complaint_metrics", - "Complaint related metrics", - &["type"], - ), - complaint_errors: metrics_registry.int_counter_vec( - "ecdsa_complaint_errors", - "Complaint related errors", - &["type"], - ), - } - } - - pub fn complaint_metrics_inc(&self, label: &str) { - self.complaint_metrics.with_label_values(&[label]).inc(); - } - - pub fn complaint_errors_inc(&self, label: &str) { - self.complaint_errors.with_label_values(&[label]).inc(); - } -} - -fn expected_keys(payload: &EcdsaPayload) -> Vec { - payload.key_transcripts.keys().cloned().collect() -} - -#[derive(Clone)] -pub struct EcdsaTranscriptMetrics { - pub active_transcripts: IntGauge, - pub support_validation_duration: HistogramVec, - pub support_validation_total_duration: HistogramVec, - pub support_aggregation_duration: HistogramVec, - pub support_aggregation_total_duration: HistogramVec, - pub create_transcript_duration: HistogramVec, - pub create_transcript_total_duration: HistogramVec, - pub transcript_e2e_latency: HistogramVec, -} - -impl EcdsaTranscriptMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - active_transcripts: metrics_registry - .int_gauge("ecdsa_active_transcripts", "Currently active transcripts"), - support_validation_duration: metrics_registry.histogram_vec( - "ecdsa_support_validation_duration", - "Support validation duration, in msec", - decimal_buckets(0, 2), - &["type"], - ), - support_validation_total_duration: metrics_registry.histogram_vec( - "ecdsa_support_validation_total_duration", - "Total support validation duration, in msec", - decimal_buckets(0, 4), - &["type"], - ), - support_aggregation_duration: metrics_registry.histogram_vec( - "ecdsa_support_aggregation_duration", - "Support aggregation duration, in msec", - decimal_buckets(0, 2), - &["type"], - ), - support_aggregation_total_duration: metrics_registry.histogram_vec( - "ecdsa_support_aggregation_total_duration", - "Total support aggregation duration, in msec", - decimal_buckets(0, 4), - &["type"], - ), - create_transcript_duration: metrics_registry.histogram_vec( - "ecdsa_create_transcript_duration", - "Time to create transcript, in msec", - decimal_buckets(0, 5), - &["type"], - ), - create_transcript_total_duration: metrics_registry.histogram_vec( - "ecdsa_create_transcript_total_duration", - "Total time to create transcript, in msec", - decimal_buckets(0, 5), - &["type"], - ), - transcript_e2e_latency: metrics_registry.histogram_vec( - "ecdsa_transcript_e2e_latency", - "End to end latency to build the transcript, in sec", - linear_buckets(0.5, 0.5, 30), - &["type"], - ), - } - } -} - -#[derive(Clone)] -pub struct EcdsaPreSignatureMetrics { - pub pre_signature_e2e_latency: HistogramVec, -} - -impl EcdsaPreSignatureMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - pre_signature_e2e_latency: metrics_registry.histogram_vec( - "ecdsa_quadruple_e2e_latency", - "End to end latency to build the pre-signature, in sec", - linear_buckets(1.0, 0.5, 30), - &["key_id"], - ), - } - } -} - -#[derive(Clone)] -pub struct EcdsaSignatureMetrics { - pub active_signatures: IntGauge, - pub sig_share_validation_duration: Histogram, - pub sig_share_validation_total_duration: Histogram, - pub sig_share_aggregation_duration: Histogram, - pub sig_share_aggregation_total_duration: Histogram, - pub signature_e2e_latency: Histogram, -} - -impl EcdsaSignatureMetrics { - pub fn new(metrics_registry: MetricsRegistry) -> Self { - Self { - active_signatures: metrics_registry - .int_gauge("ecdsa_active_signatures", "Currently active signatures"), - sig_share_validation_duration: metrics_registry.histogram( - "ecdsa_sig_share_validation_duration", - "Sig share validation duration, in msec", - decimal_buckets(0, 2), - ), - sig_share_validation_total_duration: metrics_registry.histogram( - "ecdsa_sig_share_validation_total_duration", - "Total sig share validation duration, in msec", - decimal_buckets(0, 4), - ), - sig_share_aggregation_duration: metrics_registry.histogram( - "ecdsa_sig_share_aggregation_duration", - "Sig share aggregation duration, in msec", - decimal_buckets(0, 2), - ), - sig_share_aggregation_total_duration: metrics_registry.histogram( - "ecdsa_sig_share_aggregation_total_duration", - "Total sig share aggregation duration, in msec", - decimal_buckets(0, 4), - ), - signature_e2e_latency: metrics_registry.histogram( - "ecdsa_signature_e2e_latency", - "End to end latency to build the signature, in sec", - linear_buckets(0.5, 0.5, 30), - ), - } - } -} diff --git a/rs/consensus/src/ecdsa.rs b/rs/consensus/src/ecdsa.rs index 3e5509faeb5b..c1267ddb8e6e 100644 --- a/rs/consensus/src/ecdsa.rs +++ b/rs/consensus/src/ecdsa.rs @@ -169,11 +169,11 @@ //! the first 4-tuple from the available 4 tuples and make an entry in ongoing //! signatures with the signing request and the 4-tuple. -use crate::consensus::metrics::{ +use crate::ecdsa::complaints::{EcdsaComplaintHandler, EcdsaComplaintHandlerImpl}; +use crate::ecdsa::metrics::{ timed_call, EcdsaClientMetrics, EcdsaGossipMetrics, CRITICAL_ERROR_ECDSA_RETAIN_ACTIVE_TRANSCRIPTS, }; -use crate::ecdsa::complaints::{EcdsaComplaintHandler, EcdsaComplaintHandlerImpl}; use crate::ecdsa::pre_signer::{EcdsaPreSigner, EcdsaPreSignerImpl}; use crate::ecdsa::signer::{EcdsaSigner, EcdsaSignerImpl}; use crate::ecdsa::utils::EcdsaBlockReaderImpl; @@ -208,6 +208,7 @@ use std::time::{Duration, Instant}; pub(crate) mod complaints; #[cfg(any(feature = "malicious_code", test))] pub mod malicious_pre_signer; +pub(crate) mod metrics; pub(crate) mod payload_builder; pub(crate) mod payload_verifier; pub(crate) mod pre_signer; diff --git a/rs/consensus/src/ecdsa/complaints.rs b/rs/consensus/src/ecdsa/complaints.rs index 0a2c6aafd0f8..c5ed1e31ca95 100644 --- a/rs/consensus/src/ecdsa/complaints.rs +++ b/rs/consensus/src/ecdsa/complaints.rs @@ -1,6 +1,6 @@ //! The complaint handling -use crate::consensus::metrics::{timed_call, EcdsaComplaintMetrics}; +use crate::ecdsa::metrics::{timed_call, EcdsaComplaintMetrics}; use crate::ecdsa::utils::EcdsaBlockReaderImpl; use ic_consensus_utils::crypto::ConsensusCrypto; diff --git a/rs/consensus/src/ecdsa/malicious_pre_signer.rs b/rs/consensus/src/ecdsa/malicious_pre_signer.rs index fa0c34958c14..3d781a2c7162 100644 --- a/rs/consensus/src/ecdsa/malicious_pre_signer.rs +++ b/rs/consensus/src/ecdsa/malicious_pre_signer.rs @@ -1,6 +1,6 @@ //! The malicious pre signature process manager -use crate::consensus::metrics::EcdsaPreSignerMetrics; +use crate::ecdsa::metrics::EcdsaPreSignerMetrics; use crate::ecdsa::{ pre_signer::EcdsaPreSignerImpl, utils::transcript_op_summary, EcdsaBlockReaderImpl, }; diff --git a/rs/consensus/src/ecdsa/metrics.rs b/rs/consensus/src/ecdsa/metrics.rs new file mode 100644 index 000000000000..8bda83983bfe --- /dev/null +++ b/rs/consensus/src/ecdsa/metrics.rs @@ -0,0 +1,488 @@ +//! Metrics for the ecdsa feature + +use ic_management_canister_types::MasterPublicKeyId; +use ic_metrics::{ + buckets::{decimal_buckets, linear_buckets}, + MetricsRegistry, +}; +use ic_types::consensus::idkg::{EcdsaPayload, HasMasterPublicKeyId}; +use prometheus::{Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec}; +use std::collections::BTreeMap; + +pub const ECDSA_KEY_ID_LABEL: &str = "key_id"; + +pub(crate) const CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING: &str = "ecdsa_key_transcript_missing"; +pub(crate) const CRITICAL_ERROR_ECDSA_RETAIN_ACTIVE_TRANSCRIPTS: &str = + "ecdsa_retain_active_transcripts_error"; + +#[derive(Clone)] +pub struct EcdsaClientMetrics { + pub on_state_change_duration: HistogramVec, + pub client_metrics: IntCounterVec, + pub client_errors: IntCounterVec, + /// critical error when retain_active_transcripts fails + pub critical_error_ecdsa_retain_active_transcripts: IntCounter, +} + +impl EcdsaClientMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + on_state_change_duration: metrics_registry.histogram_vec( + "ecdsa_on_state_change_duration_seconds", + "The time it took to execute ECDSA on_state_change(), in seconds", + // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, + // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s + decimal_buckets(-4, 2), + &["sub_component"], + ), + client_metrics: metrics_registry.int_counter_vec( + "ecdsa_client_metrics", + "ECDSA client related metrics", + &["type"], + ), + client_errors: metrics_registry.int_counter_vec( + "ecdsa_client_errors", + "ECDSA client related errors", + &["type"], + ), + critical_error_ecdsa_retain_active_transcripts: metrics_registry + .error_counter(CRITICAL_ERROR_ECDSA_RETAIN_ACTIVE_TRANSCRIPTS), + } + } +} + +#[derive(Clone)] +pub struct EcdsaGossipMetrics { + pub dropped_adverts: IntCounterVec, +} + +impl EcdsaGossipMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + dropped_adverts: metrics_registry.int_counter_vec( + "ecdsa_priority_fn_dropped_adverts", + "ECDSA adverts dropped by priority fn", + &["type"], + ), + } + } +} + +#[derive(Clone)] +pub struct EcdsaPreSignerMetrics { + pub on_state_change_duration: HistogramVec, + pub pre_sign_metrics: IntCounterVec, + pub pre_sign_errors: IntCounterVec, +} + +impl EcdsaPreSignerMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + on_state_change_duration: metrics_registry.histogram_vec( + "ecdsa_pre_signer_on_state_change_duration_seconds", + "The time it took to execute pre-signer on_state_change(), in seconds", + // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, + // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s + decimal_buckets(-4, 2), + &["sub_component"], + ), + pre_sign_metrics: metrics_registry.int_counter_vec( + "ecdsa_pre_signer_metrics", + "Pre-signing related metrics", + &["type"], + ), + pre_sign_errors: metrics_registry.int_counter_vec( + "ecdsa_pre_signer_errors", + "Pre-signing related errors", + &["type"], + ), + } + } + + pub fn pre_sign_metrics_inc(&self, label: &str) { + self.pre_sign_metrics.with_label_values(&[label]).inc(); + } + + pub fn pre_sign_errors_inc(&self, label: &str) { + self.pre_sign_errors.with_label_values(&[label]).inc(); + } +} + +#[derive(Clone)] +pub struct EcdsaSignerMetrics { + pub on_state_change_duration: HistogramVec, + pub sign_metrics: IntCounterVec, + pub sign_errors: IntCounterVec, +} + +impl EcdsaSignerMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + on_state_change_duration: metrics_registry.histogram_vec( + "ecdsa_signer_on_state_change_duration_seconds", + "The time it took to execute signer on_state_change(), in seconds", + // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, + // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s + decimal_buckets(-4, 2), + &["sub_component"], + ), + sign_metrics: metrics_registry.int_counter_vec( + "ecdsa_signer_metrics", + "Signing related metrics", + &["type"], + ), + sign_errors: metrics_registry.int_counter_vec( + "ecdsa_signer_errors", + "Signing related errors", + &["type"], + ), + } + } + pub fn sign_metrics_inc(&self, label: &str) { + self.sign_metrics.with_label_values(&[label]).inc(); + } + + pub fn sign_errors_inc(&self, label: &str) { + self.sign_errors.with_label_values(&[label]).inc(); + } +} + +pub(crate) struct EcdsaPayloadMetrics { + payload_metrics: IntGaugeVec, + payload_errors: IntCounterVec, + transcript_builder_metrics: IntCounterVec, + transcript_builder_errors: IntCounterVec, + pub(crate) transcript_builder_duration: HistogramVec, + /// Critical error for failure to create/reshare key transcript + pub(crate) critical_error_ecdsa_key_transcript_missing: IntCounter, +} + +impl EcdsaPayloadMetrics { + pub(crate) fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + payload_metrics: metrics_registry.int_gauge_vec( + "ecdsa_payload_metrics", + "ECDSA payload related metrics", + &["type", ECDSA_KEY_ID_LABEL], + ), + payload_errors: metrics_registry.int_counter_vec( + "ecdsa_payload_errors", + "ECDSA payload related errors", + &["type"], + ), + transcript_builder_metrics: metrics_registry.int_counter_vec( + "ecdsa_transcript_builder_metrics", + "ECDSA transcript builder metrics", + &["type"], + ), + transcript_builder_errors: metrics_registry.int_counter_vec( + "ecdsa_transcript_builder_errors", + "ECDSA transcript builder related errors", + &["type"], + ), + transcript_builder_duration: metrics_registry.histogram_vec( + "ecdsa_transcript_builder_duration_seconds", + "Time taken by transcript builder, in seconds", + // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, + // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s + decimal_buckets(-4, 2), + &["sub_component"], + ), + critical_error_ecdsa_key_transcript_missing: metrics_registry + .error_counter(CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING), + } + } + + pub(crate) fn report(&self, payload: &EcdsaPayload) { + let expected_keys = expected_keys(payload); + + self.payload_metrics_set_without_key_id_label( + "signature_agreements", + payload.signature_agreements.len(), + ); + self.payload_metrics_set( + "available_quadruples", + count_by_master_public_key_id( + payload.available_pre_signatures.values(), + &expected_keys, + ), + ); + self.payload_metrics_set( + "quadruples_in_creation", + count_by_master_public_key_id( + payload.pre_signatures_in_creation.values(), + &expected_keys, + ), + ); + self.payload_metrics_set( + "ongoing_xnet_reshares", + count_by_master_public_key_id(payload.ongoing_xnet_reshares.keys(), &expected_keys), + ); + self.payload_metrics_set( + "xnet_reshare_agreements", + count_by_master_public_key_id(payload.xnet_reshare_agreements.keys(), &expected_keys), + ); + self.payload_metrics_set_without_key_id_label("payload_layout_multiple_keys", 1); + self.payload_metrics_set_without_key_id_label( + "payload_layout_generalized_pre_signatures", + 1, + ); + self.payload_metrics_set_without_key_id_label( + "key_transcripts", + payload.key_transcripts.len(), + ); + self.payload_metrics_set_without_key_id_label( + "key_transcripts_with_ecdsa_key_id", + payload + .key_transcripts + .values() + .filter(|k| k.deprecated_key_id.is_some()) + .count(), + ); + } + + fn payload_metrics_set_without_key_id_label(&self, label: &str, value: usize) { + self.payload_metrics + .with_label_values(&[label, /*key_id=*/ ""]) + .set(value as i64); + } + + fn payload_metrics_set(&self, label: &str, values: CounterPerMasterPublicKeyId) { + for (key_id, value) in values { + self.payload_metrics + .with_label_values(&[label, &key_id_label(Some(&key_id))]) + .set(value as i64); + } + } + + pub(crate) fn payload_metrics_inc(&self, label: &str, key_id: Option<&MasterPublicKeyId>) { + self.payload_metrics + .with_label_values(&[label, &key_id_label(key_id)]) + .inc(); + } + + pub(crate) fn payload_errors_inc(&self, label: &str) { + self.payload_errors.with_label_values(&[label]).inc(); + } + + pub(crate) fn transcript_builder_metrics_inc(&self, label: &str) { + self.transcript_builder_metrics + .with_label_values(&[label]) + .inc(); + } + + pub(crate) fn transcript_builder_metrics_inc_by(&self, value: u64, label: &str) { + self.transcript_builder_metrics + .with_label_values(&[label]) + .inc_by(value); + } + + pub(crate) fn transcript_builder_errors_inc(&self, label: &str) { + self.transcript_builder_errors + .with_label_values(&[label]) + .inc(); + } +} + +pub fn timed_call(label: &str, call_fn: F, metric: &HistogramVec) -> R +where + F: FnOnce() -> R, +{ + let _timer = metric.with_label_values(&[label]).start_timer(); + (call_fn)() +} + +#[derive(Clone)] +pub struct EcdsaComplaintMetrics { + pub on_state_change_duration: HistogramVec, + pub complaint_metrics: IntCounterVec, + pub complaint_errors: IntCounterVec, +} + +impl EcdsaComplaintMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + on_state_change_duration: metrics_registry.histogram_vec( + "ecdsa_complaint_on_state_change_duration_seconds", + "The time it took to execute complaint on_state_change(), in seconds", + // 0.1ms, 0.2ms, 0.5ms, 1ms, 2ms, 5ms, 10ms, 20ms, 50ms, 100ms, 200ms, 500ms, + // 1s, 2s, 5s, 10s, 20s, 50s, 100s, 200s, 500s + decimal_buckets(-4, 2), + &["sub_component"], + ), + complaint_metrics: metrics_registry.int_counter_vec( + "ecdsa_complaint_metrics", + "Complaint related metrics", + &["type"], + ), + complaint_errors: metrics_registry.int_counter_vec( + "ecdsa_complaint_errors", + "Complaint related errors", + &["type"], + ), + } + } + + pub fn complaint_metrics_inc(&self, label: &str) { + self.complaint_metrics.with_label_values(&[label]).inc(); + } + + pub fn complaint_errors_inc(&self, label: &str) { + self.complaint_errors.with_label_values(&[label]).inc(); + } +} + +#[derive(Clone)] +pub struct EcdsaTranscriptMetrics { + pub active_transcripts: IntGauge, + pub support_validation_duration: HistogramVec, + pub support_validation_total_duration: HistogramVec, + pub support_aggregation_duration: HistogramVec, + pub support_aggregation_total_duration: HistogramVec, + pub create_transcript_duration: HistogramVec, + pub create_transcript_total_duration: HistogramVec, + pub transcript_e2e_latency: HistogramVec, +} + +impl EcdsaTranscriptMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + active_transcripts: metrics_registry + .int_gauge("ecdsa_active_transcripts", "Currently active transcripts"), + support_validation_duration: metrics_registry.histogram_vec( + "ecdsa_support_validation_duration", + "Support validation duration, in msec", + decimal_buckets(0, 2), + &["type"], + ), + support_validation_total_duration: metrics_registry.histogram_vec( + "ecdsa_support_validation_total_duration", + "Total support validation duration, in msec", + decimal_buckets(0, 4), + &["type"], + ), + support_aggregation_duration: metrics_registry.histogram_vec( + "ecdsa_support_aggregation_duration", + "Support aggregation duration, in msec", + decimal_buckets(0, 2), + &["type"], + ), + support_aggregation_total_duration: metrics_registry.histogram_vec( + "ecdsa_support_aggregation_total_duration", + "Total support aggregation duration, in msec", + decimal_buckets(0, 4), + &["type"], + ), + create_transcript_duration: metrics_registry.histogram_vec( + "ecdsa_create_transcript_duration", + "Time to create transcript, in msec", + decimal_buckets(0, 5), + &["type"], + ), + create_transcript_total_duration: metrics_registry.histogram_vec( + "ecdsa_create_transcript_total_duration", + "Total time to create transcript, in msec", + decimal_buckets(0, 5), + &["type"], + ), + transcript_e2e_latency: metrics_registry.histogram_vec( + "ecdsa_transcript_e2e_latency", + "End to end latency to build the transcript, in sec", + linear_buckets(0.5, 0.5, 30), + &["type"], + ), + } + } +} + +#[derive(Clone)] +pub struct EcdsaPreSignatureMetrics { + pub pre_signature_e2e_latency: HistogramVec, +} + +impl EcdsaPreSignatureMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + pre_signature_e2e_latency: metrics_registry.histogram_vec( + "ecdsa_quadruple_e2e_latency", + "End to end latency to build the pre-signature, in sec", + linear_buckets(1.0, 0.5, 30), + &["key_id"], + ), + } + } +} + +#[derive(Clone)] +pub struct EcdsaSignatureMetrics { + pub active_signatures: IntGauge, + pub sig_share_validation_duration: Histogram, + pub sig_share_validation_total_duration: Histogram, + pub sig_share_aggregation_duration: Histogram, + pub sig_share_aggregation_total_duration: Histogram, + pub signature_e2e_latency: Histogram, +} + +impl EcdsaSignatureMetrics { + pub fn new(metrics_registry: MetricsRegistry) -> Self { + Self { + active_signatures: metrics_registry + .int_gauge("ecdsa_active_signatures", "Currently active signatures"), + sig_share_validation_duration: metrics_registry.histogram( + "ecdsa_sig_share_validation_duration", + "Sig share validation duration, in msec", + decimal_buckets(0, 2), + ), + sig_share_validation_total_duration: metrics_registry.histogram( + "ecdsa_sig_share_validation_total_duration", + "Total sig share validation duration, in msec", + decimal_buckets(0, 4), + ), + sig_share_aggregation_duration: metrics_registry.histogram( + "ecdsa_sig_share_aggregation_duration", + "Sig share aggregation duration, in msec", + decimal_buckets(0, 2), + ), + sig_share_aggregation_total_duration: metrics_registry.histogram( + "ecdsa_sig_share_aggregation_total_duration", + "Total sig share aggregation duration, in msec", + decimal_buckets(0, 4), + ), + signature_e2e_latency: metrics_registry.histogram( + "ecdsa_signature_e2e_latency", + "End to end latency to build the signature, in sec", + linear_buckets(0.5, 0.5, 30), + ), + } + } +} + +/// Returns the key id corresponding to the [`MasterPublicKeyId`] +pub fn key_id_label(key_id: Option<&MasterPublicKeyId>) -> String { + key_id.map(ToString::to_string).unwrap_or_default() +} + +pub fn expected_keys(payload: &EcdsaPayload) -> Vec { + payload.key_transcripts.keys().cloned().collect() +} + +pub type CounterPerMasterPublicKeyId = BTreeMap; + +pub fn count_by_master_public_key_id( + collection: impl Iterator, + expected_keys: &[MasterPublicKeyId], +) -> CounterPerMasterPublicKeyId { + let mut counter_per_key_id = CounterPerMasterPublicKeyId::new(); + + // To properly report `0` for ecdsa keys which do not appear in the `collection`, we insert the + // default values for all the ecdsa keys which we expect to see in the payload. + for key in expected_keys { + counter_per_key_id.insert(key.clone(), 0); + } + + for item in collection { + *counter_per_key_id.entry(item.key_id().clone()).or_default() += 1; + } + + counter_per_key_id +} diff --git a/rs/consensus/src/ecdsa/payload_builder.rs b/rs/consensus/src/ecdsa/payload_builder.rs index 9a8d762d2466..b22ff3741326 100644 --- a/rs/consensus/src/ecdsa/payload_builder.rs +++ b/rs/consensus/src/ecdsa/payload_builder.rs @@ -6,7 +6,7 @@ use super::pre_signer::{EcdsaTranscriptBuilder, EcdsaTranscriptBuilderImpl}; use super::signer::{EcdsaSignatureBuilder, EcdsaSignatureBuilderImpl}; use super::utils::{block_chain_reader, get_chain_key_config_if_enabled, InvalidChainCacheError}; -use crate::consensus::metrics::{EcdsaPayloadMetrics, CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING}; +use crate::ecdsa::metrics::{EcdsaPayloadMetrics, CRITICAL_ERROR_ECDSA_KEY_TRANSCRIPT_MISSING}; pub(super) use errors::EcdsaPayloadError; use errors::MembershipError; use ic_consensus_utils::crypto::ConsensusCrypto; diff --git a/rs/consensus/src/ecdsa/payload_builder/signatures.rs b/rs/consensus/src/ecdsa/payload_builder/signatures.rs index 2f5a74345822..f45c5e57d635 100644 --- a/rs/consensus/src/ecdsa/payload_builder/signatures.rs +++ b/rs/consensus/src/ecdsa/payload_builder/signatures.rs @@ -11,7 +11,7 @@ use ic_types::{ Time, }; -use crate::{consensus::metrics::EcdsaPayloadMetrics, ecdsa::signer::EcdsaSignatureBuilder}; +use crate::{ecdsa::metrics::EcdsaPayloadMetrics, ecdsa::signer::EcdsaSignatureBuilder}; /// Helper to create a reject response to the management canister /// with the given code and message diff --git a/rs/consensus/src/ecdsa/payload_verifier.rs b/rs/consensus/src/ecdsa/payload_verifier.rs index 23d3ffdb268f..559d69d76d1d 100644 --- a/rs/consensus/src/ecdsa/payload_verifier.rs +++ b/rs/consensus/src/ecdsa/payload_verifier.rs @@ -28,7 +28,7 @@ use super::utils::{ block_chain_cache, get_chain_key_config_if_enabled, BuildSignatureInputsError, EcdsaBlockReaderImpl, InvalidChainCacheError, }; -use crate::consensus::metrics::timed_call; +use crate::ecdsa::metrics::timed_call; use crate::ecdsa::payload_builder::{create_data_payload_helper, create_summary_payload}; use crate::ecdsa::utils::build_signature_inputs; use ic_consensus_utils::crypto::ConsensusCrypto; diff --git a/rs/consensus/src/ecdsa/pre_signer.rs b/rs/consensus/src/ecdsa/pre_signer.rs index e2b08dc1bea1..9fe48820514a 100644 --- a/rs/consensus/src/ecdsa/pre_signer.rs +++ b/rs/consensus/src/ecdsa/pre_signer.rs @@ -1,7 +1,7 @@ //! The pre signature process manager -use crate::consensus::metrics::{timed_call, EcdsaPayloadMetrics, EcdsaPreSignerMetrics}; use crate::ecdsa::complaints::EcdsaTranscriptLoader; +use crate::ecdsa::metrics::{timed_call, EcdsaPayloadMetrics, EcdsaPreSignerMetrics}; use crate::ecdsa::utils::{load_transcripts, transcript_op_summary, EcdsaBlockReaderImpl}; use ic_consensus_utils::crypto::ConsensusCrypto; use ic_consensus_utils::RoundRobin; diff --git a/rs/consensus/src/ecdsa/signer.rs b/rs/consensus/src/ecdsa/signer.rs index 5e636d476988..da9c23b3f039 100644 --- a/rs/consensus/src/ecdsa/signer.rs +++ b/rs/consensus/src/ecdsa/signer.rs @@ -1,7 +1,7 @@ //! The signature process manager -use crate::consensus::metrics::{timed_call, EcdsaPayloadMetrics, EcdsaSignerMetrics}; use crate::ecdsa::complaints::EcdsaTranscriptLoader; +use crate::ecdsa::metrics::{timed_call, EcdsaPayloadMetrics, EcdsaSignerMetrics}; use crate::ecdsa::utils::{load_transcripts, EcdsaBlockReaderImpl}; use ic_consensus_utils::crypto::ConsensusCrypto; use ic_consensus_utils::RoundRobin; diff --git a/rs/consensus/src/ecdsa/stats.rs b/rs/consensus/src/ecdsa/stats.rs index 725392ab5dc2..0140468bc100 100644 --- a/rs/consensus/src/ecdsa/stats.rs +++ b/rs/consensus/src/ecdsa/stats.rs @@ -1,6 +1,6 @@ //! ECDSA specific stats. -use crate::consensus::metrics::{ +use crate::ecdsa::metrics::{ EcdsaPreSignatureMetrics, EcdsaSignatureMetrics, EcdsaTranscriptMetrics, }; use ic_management_canister_types::MasterPublicKeyId; diff --git a/rs/consensus/src/ecdsa/utils.rs b/rs/consensus/src/ecdsa/utils.rs index bc04e69ab623..8d4ba39fc952 100644 --- a/rs/consensus/src/ecdsa/utils.rs +++ b/rs/consensus/src/ecdsa/utils.rs @@ -1,7 +1,7 @@ //! Common utils for the ECDSA implementation. -use crate::consensus::metrics::EcdsaPayloadMetrics; use crate::ecdsa::complaints::{EcdsaTranscriptLoader, TranscriptLoadStatus}; +use crate::ecdsa::metrics::EcdsaPayloadMetrics; use ic_consensus_utils::pool_reader::PoolReader; use ic_crypto::get_master_public_key_from_transcript; use ic_interfaces::consensus_pool::ConsensusBlockChain;