Skip to content

Commit

Permalink
feat(EXC-1795): Limit cache disk space (#3366)
Browse files Browse the repository at this point in the history
EXC-1795

Add the ability for the LRU cache to limit space used on disk as well as
in memory and use this in the `CompilationCache` so that it doesn't grow
to large.
  • Loading branch information
adambratschikaye authored Jan 13, 2025
1 parent 8d79795 commit c16efb0
Show file tree
Hide file tree
Showing 13 changed files with 450 additions and 148 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 20 additions & 2 deletions rs/embedders/src/compilation_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ use crate::{
};
use ic_interfaces::execution_environment::{HypervisorError, HypervisorResult};
use ic_replicated_state::canister_state::execution_state::WasmMetadata;
use ic_types::{methods::WasmMethod, NumBytes, NumInstructions};
use ic_types::{methods::WasmMethod, MemoryDiskBytes, NumBytes, NumInstructions};
use ic_utils_lru_cache::LruCache;
use ic_wasm_types::{CanisterModule, WasmHash};

const GB: u64 = 1024 * 1024 * 1024;

/// Stores the serialized modules of wasm code that has already been compiled so
/// that it can be used again without recompiling.
pub enum CompilationCache {
Expand All @@ -36,10 +38,26 @@ pub enum CompilationCache {
},
}

impl MemoryDiskBytes for CompilationCache {
fn memory_bytes(&self) -> usize {
match self {
CompilationCache::Memory { cache } => cache.lock().unwrap().memory_bytes(),
CompilationCache::Disk { cache, .. } => cache.lock().unwrap().memory_bytes(),
}
}

fn disk_bytes(&self) -> usize {
match self {
CompilationCache::Memory { cache } => cache.lock().unwrap().disk_bytes(),
CompilationCache::Disk { cache, .. } => cache.lock().unwrap().disk_bytes(),
}
}
}

impl CompilationCache {
pub fn new(capacity: NumBytes) -> Self {
Self::Memory {
cache: Mutex::new(LruCache::new(capacity)),
cache: Mutex::new(LruCache::new(capacity, NumBytes::from(GB))),
}
}

Expand Down
19 changes: 14 additions & 5 deletions rs/embedders/src/serialized_module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{

use ic_interfaces::execution_environment::{HypervisorError, HypervisorResult};
use ic_replicated_state::canister_state::execution_state::WasmMetadata;
use ic_types::{methods::WasmMethod, CountBytes, NumInstructions};
use ic_types::{methods::WasmMethod, MemoryDiskBytes, NumInstructions};
use ic_wasm_types::WasmEngineError;
use nix::sys::mman::{mmap, MapFlags, ProtFlags};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -82,10 +82,14 @@ pub struct SerializedModule {
pub is_wasm64: bool,
}

impl CountBytes for SerializedModule {
fn count_bytes(&self) -> usize {
impl MemoryDiskBytes for SerializedModule {
fn memory_bytes(&self) -> usize {
self.bytes.0.len()
}

fn disk_bytes(&self) -> usize {
0
}
}

impl SerializedModule {
Expand Down Expand Up @@ -147,10 +151,15 @@ pub struct OnDiskSerializedModule {
pub is_wasm64: bool,
}

impl CountBytes for OnDiskSerializedModule {
fn count_bytes(&self) -> usize {
impl MemoryDiskBytes for OnDiskSerializedModule {
fn memory_bytes(&self) -> usize {
std::mem::size_of::<Self>()
}

fn disk_bytes(&self) -> usize {
(self.bytes.metadata().unwrap().len() + self.initial_state_data.metadata().unwrap().len())
as usize
}
}

impl OnDiskSerializedModule {
Expand Down
36 changes: 28 additions & 8 deletions rs/execution_environment/src/hypervisor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ use ic_replicated_state::{NetworkTopology, ReplicatedState};
use ic_system_api::ExecutionParameters;
use ic_system_api::{sandbox_safe_system_state::SandboxSafeSystemState, ApiType};
use ic_types::{
messages::RequestMetadata, methods::FuncRef, CanisterId, NumBytes, NumInstructions, SubnetId,
Time,
messages::RequestMetadata, methods::FuncRef, CanisterId, MemoryDiskBytes, NumBytes,
NumInstructions, SubnetId, Time,
};
use ic_wasm_types::CanisterModule;
use prometheus::{Histogram, HistogramVec, IntCounter, IntGauge};
use prometheus::{Histogram, HistogramVec, IntCounter, IntGauge, IntGaugeVec};
use std::{
path::{Path, PathBuf},
sync::Arc,
Expand All @@ -50,6 +50,7 @@ pub struct HypervisorMetrics {
mmap_count: HistogramVec,
mprotect_count: HistogramVec,
copy_page_count: HistogramVec,
compilation_cache_size: IntGaugeVec,
}

impl HypervisorMetrics {
Expand Down Expand Up @@ -124,6 +125,8 @@ impl HypervisorMetrics {
decimal_buckets_with_zero(0,8),
&["api_type", "memory_type"]
),
compilation_cache_size: metrics_registry.int_gauge_vec("hypervisor_compilation_cache_size", "Bytes in memory and on disk used by the compilation cache.", &["location"],
),
}
}

Expand Down Expand Up @@ -184,7 +187,12 @@ impl HypervisorMetrics {
}
}

fn observe_compilation_metrics(&self, compilation_result: &CompilationResult) {
fn observe_compilation_metrics(
&self,
compilation_result: &CompilationResult,
cache_memory_size: usize,
cache_disk_size: usize,
) {
let CompilationResult {
largest_function_instruction_count,
compilation_time,
Expand All @@ -194,6 +202,12 @@ impl HypervisorMetrics {
.observe(largest_function_instruction_count.get() as f64);
self.compile.observe(compilation_time.as_secs_f64());
self.max_complexity.observe(*max_complexity as f64);
self.compilation_cache_size
.with_label_values(&["memory"])
.set(cache_memory_size as i64);
self.compilation_cache_size
.with_label_values(&["disk"])
.set(cache_disk_size as i64);
}
}

Expand Down Expand Up @@ -255,8 +269,11 @@ impl Hypervisor {
match creation_result {
Ok((execution_state, compilation_cost, compilation_result)) => {
if let Some(compilation_result) = compilation_result {
self.metrics
.observe_compilation_metrics(&compilation_result);
self.metrics.observe_compilation_metrics(
&compilation_result,
self.compilation_cache.memory_bytes(),
self.compilation_cache.disk_bytes(),
);
}
round_limits.instructions -= as_round_instructions(
compilation_cost_handling.adjusted_compilation_cost(compilation_cost),
Expand Down Expand Up @@ -489,8 +506,11 @@ impl Hypervisor {
execution_state,
);
if let Some(compilation_result) = compilation_result {
self.metrics
.observe_compilation_metrics(&compilation_result);
self.metrics.observe_compilation_metrics(
&compilation_result,
self.compilation_cache.memory_bytes(),
self.compilation_cache.disk_bytes(),
);
}
self.metrics.observe(&execution_result, api_type_str);

Expand Down
38 changes: 25 additions & 13 deletions rs/execution_environment/src/query_handler/query_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use ic_types::{
batch::QueryStats,
ingress::WasmResult,
messages::{Query, QuerySource},
CountBytes, Cycles, PrincipalId, Time, UserId,
Cycles, MemoryDiskBytes, PrincipalId, Time, UserId,
};
use ic_utils_lru_cache::LruCache;
use prometheus::{Histogram, IntCounter, IntGauge};
Expand Down Expand Up @@ -140,10 +140,14 @@ pub(crate) struct EntryKey {
pub method_payload: Vec<u8>,
}

impl CountBytes for EntryKey {
fn count_bytes(&self) -> usize {
impl MemoryDiskBytes for EntryKey {
fn memory_bytes(&self) -> usize {
size_of_val(self) + self.method_name.len() + self.method_payload.len()
}

fn disk_bytes(&self) -> usize {
0
}
}

impl From<&Query> for EntryKey {
Expand Down Expand Up @@ -211,9 +215,13 @@ pub(crate) struct EntryValue {
ignore_canister_balances: bool,
}

impl CountBytes for EntryValue {
fn count_bytes(&self) -> usize {
size_of_val(self) + self.result.count_bytes()
impl MemoryDiskBytes for EntryValue {
fn memory_bytes(&self) -> usize {
size_of_val(self) + self.result.memory_bytes()
}

fn disk_bytes(&self) -> usize {
0
}
}

Expand Down Expand Up @@ -377,9 +385,13 @@ pub(crate) struct QueryCache {
pub(crate) metrics: QueryCacheMetrics,
}

impl CountBytes for QueryCache {
fn count_bytes(&self) -> usize {
size_of_val(self) + self.cache.lock().unwrap().count_bytes()
impl MemoryDiskBytes for QueryCache {
fn memory_bytes(&self) -> usize {
size_of_val(self) + self.cache.lock().unwrap().memory_bytes()
}

fn disk_bytes(&self) -> usize {
0
}
}

Expand All @@ -392,7 +404,7 @@ impl QueryCache {
data_certificate_expiry_time: Duration,
) -> Self {
QueryCache {
cache: Mutex::new(LruCache::new(capacity)),
cache: Mutex::new(LruCache::new(capacity, NumBytes::from(0))),
max_expiry_time,
data_certificate_expiry_time,
metrics: QueryCacheMetrics::new(metrics_registry),
Expand Down Expand Up @@ -422,7 +434,7 @@ impl QueryCache {
// The cache entry is no longer valid, remove it.
cache.pop(key);
// Update the `count_bytes` metric.
self.metrics.count_bytes.set(cache.count_bytes() as i64);
self.metrics.count_bytes.set(cache.memory_bytes() as i64);
}
}
None
Expand Down Expand Up @@ -470,8 +482,8 @@ impl QueryCache {
let d = evicted_value.elapsed_seconds(now);
self.metrics.evicted_entries_duration.observe(d);
}
let count_bytes = cache.count_bytes() as i64;
self.metrics.count_bytes.set(count_bytes);
let memory_bytes = cache.memory_bytes() as i64;
self.metrics.count_bytes.set(memory_bytes);
self.metrics.len.set(cache.len() as i64);
}
}
Loading

0 comments on commit c16efb0

Please sign in to comment.