From 6e847e32f7c9a83ca7fa58b25717551485033271 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 12:24:59 +0000 Subject: [PATCH 01/52] Introduce StringId copies of all functions that deal with Sample This will later allow us to introduce the new StringId code without disturbing existing API clients. This duplication is intended to be only an intermediate step to introducing support for StringId. --- profiling/src/api.rs | 88 +++++++++++++++++++++ profiling/src/internal/profile/mod.rs | 108 ++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 521184ab6..71f9c7a43 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -46,6 +46,28 @@ pub struct Mapping<'a> { pub build_id: &'a str, } +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct StringIdMapping<'a> { + /// Address at which the binary (or DLL) is loaded into memory. + pub memory_start: u64, + + /// The limit of the address range occupied by this mapping. + pub memory_limit: u64, + + /// Offset in the binary that corresponds to the first mapped address. + pub file_offset: u64, + + /// The object this entry is loaded from. This can be a filename on + /// disk for the main binary and shared libraries, or virtual + /// abstractions like "[vdso]". + pub filename: &'a str, + + /// A string that uniquely identifies a particular program version + /// with high probability. E.g., for binaries generated by GNU tools, + /// it could be the contents of the .note.gnu.build-id field. + pub build_id: &'a str, +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct Function<'a> { /// Name of the function, in human-readable form if available. @@ -62,6 +84,22 @@ pub struct Function<'a> { pub start_line: i64, } +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct StringIdFunction<'a> { + /// Name of the function, in human-readable form if available. + pub name: &'a str, + + /// Name of the function, as identified by the system. + /// For instance, it can be a C++ mangled name. + pub system_name: &'a str, + + /// Source file containing the function. + pub filename: &'a str, + + /// Line number in source file. + pub start_line: i64, +} + #[derive(Clone, Debug, Eq, PartialEq)] pub struct Line<'a> { /// The corresponding profile.Function for this line. @@ -85,6 +123,20 @@ pub struct Location<'a> { pub line: i64, } +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct StringIdLocation<'a> { + pub mapping: StringIdMapping<'a>, + pub function: StringIdFunction<'a>, + + /// The instruction address for this location, if available. It + /// should be within [Mapping.memory_start...Mapping.memory_limit] + /// for the corresponding mapping. A non-leaf address may be in the + /// middle of a call instruction. It is up to display tools to find + /// the beginning of the instruction if necessary. + pub address: u64, + pub line: i64, +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct Label<'a> { pub key: &'a str, @@ -103,6 +155,24 @@ pub struct Label<'a> { pub num_unit: Option<&'a str>, } +#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +pub struct StringIdLabel<'a> { + pub key: &'a str, + + /// At most one of the following must be present + pub str: Option<&'a str>, + pub num: i64, + + /// Should only be present when num is present. + /// Specifies the units of num. + /// Use arbitrary string (for example, "requests") as a custom count unit. + /// If no unit is specified, consumer may apply heuristic to deduce the unit. + /// Consumers may also interpret units like "bytes" and "kilobytes" as memory + /// units and units like "seconds" and "nanoseconds" as time units, + /// and apply appropriate unit conversions to these. + pub num_unit: Option<&'a str>, +} + impl Label<'_> { pub fn uses_at_most_one_of_str_and_num(&self) -> bool { self.str.is_none() || (self.num == 0 && self.num_unit.is_none()) @@ -127,6 +197,24 @@ pub struct Sample<'a> { pub labels: Vec>, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct StringIdSample<'a> { + /// The leaf is at locations[0]. + pub locations: Vec>, + + /// The type and unit of each value is defined by the corresponding + /// entry in Profile.sample_type. All samples must have the same + /// number of values, the same as the length of Profile.sample_type. + /// When aggregating multiple samples into a single sample, the + /// result has a list of values that is the element-wise sum of the + /// lists of the originals. + pub values: Vec, + + /// label includes additional context for this sample. It can include + /// things like a thread id, allocation size, etc + pub labels: Vec>, +} + #[derive(Debug)] #[cfg_attr(test, derive(bolero_generator::TypeGenerator))] pub enum UpscalingInfo { diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 8363de3a6..b75a63471 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -117,6 +117,50 @@ impl Profile { Ok(()) } + pub fn add_string_id_sample( + &mut self, + sample: api::StringIdSample, + timestamp: Option, + ) -> anyhow::Result<()> { + anyhow::ensure!( + sample.values.len() == self.sample_types.len(), + "expected {} sample types, but sample had {} sample types", + self.sample_types.len(), + sample.values.len(), + ); + + self.validate_string_id_sample_labels(&sample)?; + let labels: Vec<_> = sample + .labels + .iter() + .map(|label| { + let key = self.intern(label.key); + let internal_label = if let Some(s) = label.str { + let str = self.intern(s); + Label::str(key, str) + } else { + let num = label.num; + let num_unit = label.num_unit.map(|s| self.intern(s)); + Label::num(key, num, num_unit) + }; + + self.labels.dedup(internal_label) + }) + .collect(); + let labels = self.label_sets.dedup(LabelSet::new(labels)); + + let locations = sample + .locations + .iter() + .map(|l| self.add_string_id_location(l)) + .collect(); + + let stacktrace = self.add_stacktrace(locations); + self.observations + .add(Sample::new(labels, stacktrace), timestamp, sample.values)?; + Ok(()) + } + pub fn add_upscaling_rule( &mut self, offset_values: &[usize], @@ -306,6 +350,20 @@ impl Profile { }) } + fn add_string_id_function(&mut self, function: &api::StringIdFunction) -> FunctionId { + let name = self.intern(function.name); + let system_name = self.intern(function.system_name); + let filename = self.intern(function.filename); + + let start_line = function.start_line; + self.functions.dedup(Function { + name, + system_name, + filename, + start_line, + }) + } + fn add_location(&mut self, location: &api::Location) -> LocationId { let mapping_id = self.add_mapping(&location.mapping); let function_id = self.add_function(&location.function); @@ -317,6 +375,17 @@ impl Profile { }) } + fn add_string_id_location(&mut self, location: &api::StringIdLocation) -> LocationId { + let mapping_id = self.add_string_id_mapping(&location.mapping); + let function_id = self.add_string_id_function(&location.function); + self.locations.dedup(Location { + mapping_id, + function_id, + address: location.address, + line: location.line, + }) + } + fn add_mapping(&mut self, mapping: &api::Mapping) -> MappingId { let filename = self.intern(mapping.filename); let build_id = self.intern(mapping.build_id); @@ -330,6 +399,19 @@ impl Profile { }) } + fn add_string_id_mapping(&mut self, mapping: &api::StringIdMapping) -> MappingId { + let filename = self.intern(mapping.filename); + let build_id = self.intern(mapping.build_id); + + self.mappings.dedup(Mapping { + memory_start: mapping.memory_start, + memory_limit: mapping.memory_limit, + file_offset: mapping.file_offset, + filename, + build_id, + }) + } + fn add_stacktrace(&mut self, locations: Vec) -> StackTraceId { self.stack_traces.dedup(StackTrace { locations }) } @@ -500,6 +582,32 @@ impl Profile { fn validate_sample_labels(&mut self, sample: &api::Sample) -> anyhow::Result<()> { let mut seen: HashMap<&str, &api::Label> = HashMap::new(); + for label in sample.labels.iter() { + if let Some(duplicate) = seen.insert(label.key, label) { + anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label); + } + + if label.key == "local root span id" { + anyhow::ensure!( + label.str.is_none() && label.num != 0, + "Invalid \"local root span id\" label: {:?}", + label + ); + } + + anyhow::ensure!( + label.key != "end_timestamp_ns", + "Timestamp should not be passed as a label {:?}", + label + ); + } + Ok(()) + } + + /// Validates labels + fn validate_string_id_sample_labels(&mut self, sample: &api::StringIdSample) -> anyhow::Result<()> { + let mut seen: HashMap<&str, &api::StringIdLabel> = HashMap::new(); + for label in sample.labels.iter() { if let Some(duplicate) = seen.insert(label.key, label) { anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label); From c8a2e6e4089ebd43a65076fe48f16f4f68ab8c2a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 12:31:33 +0000 Subject: [PATCH 02/52] Introduce StringStorage implementation Credit goes to @alexjf, this is lifted from his earlier PR https://github.com/DataDog/libdatadog/pull/607 --- profiling/src/collections/mod.rs | 1 + profiling/src/collections/string_storage.rs | 208 ++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 profiling/src/collections/string_storage.rs diff --git a/profiling/src/collections/mod.rs b/profiling/src/collections/mod.rs index 66281e671..f704281b4 100644 --- a/profiling/src/collections/mod.rs +++ b/profiling/src/collections/mod.rs @@ -2,4 +2,5 @@ // SPDX-License-Identifier: Apache-2.0 pub mod identifiable; +pub mod string_storage; pub mod string_table; diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs new file mode 100644 index 000000000..97b8e700a --- /dev/null +++ b/profiling/src/collections/string_storage.rs @@ -0,0 +1,208 @@ +use std::cell::Cell; +use std::collections::HashMap; +use std::hash::BuildHasherDefault; +use std::ptr; +use std::rc::Rc; + +use super::identifiable::FxIndexSet; +use super::identifiable::Id; +use super::identifiable::StringId; +use super::string_table::StringTable; + +pub trait StringStorage { + /// Interns the `str` as a string, returning the id in the string table. + /// The empty string is guaranteed to have an id of [StringId::ZERO]. + fn intern(&mut self, item: Rc) -> StringId; + fn get_string(&self, id: StringId) -> Rc; + fn len(&self) -> usize; + fn is_empty(&self) -> bool { + self.len() == 0 + } + fn into_iter(self: Box) -> Box>>; + fn clone_empty(&self) -> Box; +} + +pub struct SimpleStringStorage { + set: FxIndexSet>, +} + +impl SimpleStringStorage { + pub fn new() -> Self { + SimpleStringStorage { + set: Default::default(), + } + } +} + +impl Default for SimpleStringStorage { + fn default() -> Self { + Self::new() + } +} + +impl StringStorage for SimpleStringStorage { + fn intern(&mut self, item: Rc) -> StringId { + // For performance, delay converting the [&str] to a [String] until + // after it has been determined to not exist in the set. This avoids + // temporary allocations. + let index = match self.set.get_index_of(&item) { + Some(index) => index, + None => { + let (index, _inserted) = self.set.insert_full(item.clone()); + // This wouldn't make any sense; the item couldn't be found so + // we try to insert it, but suddenly it exists now? + debug_assert!(_inserted); + index + } + }; + StringId::from_offset(index) + } + + fn get_string(&self, id: StringId) -> Rc { + self.set + .get_index(id.to_offset()) + .expect("StringId to have a valid interned index") + .clone() + } + + fn len(&self) -> usize { + self.set.len() + } + + fn into_iter(self: Box) -> Box>> { + Box::new(self.set.into_iter()) + } + + fn clone_empty(&self) -> Box { + Box::new(SimpleStringStorage::new()) + } +} + +#[derive(PartialEq, Debug)] +struct ManagedStringData { + str: Rc, + last_usage_gen: Cell>, + cached_seq_num_for: Cell>, + cached_seq_num: Cell>, + usage_count: Cell, +} + +pub struct ManagedStringStorage { + next_id: u32, + id_to_data: HashMap>, + str_to_id: HashMap, u32, BuildHasherDefault>, + current_gen: u32, +} + +impl ManagedStringStorage { + pub fn new() -> Self { + let mut storage = ManagedStringStorage { + next_id: 0, + id_to_data: Default::default(), + str_to_id: Default::default(), + current_gen: 0, + }; + // Ensure empty string gets id 0 and always has usage > 0 so it's always retained + storage.intern(""); + storage + } + + pub fn advance_gen(&mut self) { + self.id_to_data.retain(|_, data| { + /*let used_in_this_gen = data + .last_usage_gen + .get() + .map_or(false, |last_usage_gen| last_usage_gen == self.current_gen); + if used_in_this_gen || *id == 0 { + // Empty string (id = 0) or anything that was used in the gen + // we are now closing, is kept alive + return true; + }*/ + if data.usage_count.get() > 0 { + return true; + } + + self.str_to_id.remove_entry(&data.str); + false + }); + self.current_gen += 1; + } + + pub fn intern(&mut self, item: &str) -> u32 { + let entry = self.str_to_id.get_key_value(item); + match entry { + Some((_, id)) => { + let usage_count = &self + .id_to_data + .get(id) + .expect("id_to_data and str_to_id should be in sync") + .usage_count; + usage_count.set(usage_count.get() + 1); + *id + } + None => { + let id = self.next_id; + let str: Rc = item.into(); + let data = ManagedStringData { + str: str.clone(), + last_usage_gen: Cell::new(None), + cached_seq_num_for: Cell::new(None), + cached_seq_num: Cell::new(None), + usage_count: Cell::new(1), + }; + self.next_id = self.next_id.checked_add(1).expect("Ran out of string ids!"); + let old_value = self.str_to_id.insert(str.clone(), id); + debug_assert_eq!(old_value, None); + let old_value = self.id_to_data.insert(id, data); + debug_assert_eq!(old_value, None); + id + } + } + } + + pub fn unintern(&self, id: u32) { + let data = self.get_data(id); + let usage_count = &data.usage_count; + usage_count.set(usage_count.get() - 1); + } + + pub fn get_seq_num(&self, id: u32, profile_strings: &mut StringTable) -> StringId { + let data = self.get_data(id); + + let profile_strings_pointer = ptr::addr_of!(*profile_strings); + + match (data.cached_seq_num.get(), data.cached_seq_num_for.get()) { + (Some(seq_num), Some(pointer)) if pointer == profile_strings_pointer => seq_num, + (_, _) => { + let seq_num = profile_strings.intern(data.str.as_ref()); + data.cached_seq_num.set(Some(seq_num)); + data.cached_seq_num_for.set(Some(profile_strings_pointer)); + seq_num + } + } + } + + pub fn get_string(&self, id: u32) -> Rc { + let data = self.get_data(id); + + data.str.clone() + } + + fn get_data(&self, id: u32) -> &ManagedStringData { + let data = match self.id_to_data.get(&id) { + Some(v) => v, + None => { + println!("{:?} {:?}", id, self.id_to_data); + panic!("StringId to have a valid interned id"); + } + }; + data.last_usage_gen.replace(Some(self.current_gen)); + data + } +} + +impl Default for ManagedStringStorage { + fn default() -> Self { + Self::new() + } +} From 795d25f4934568f745927cede4c1758fae0e9e84 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 12:40:26 +0000 Subject: [PATCH 03/52] Introduce StringStorage instance into Profile --- profiling/src/internal/profile/mod.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index b75a63471..8008d118b 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -15,7 +15,10 @@ use crate::serializer::CompressedProtobufSerializer; use anyhow::Context; use std::borrow::Cow; use std::collections::HashMap; +use std::rc::Rc; +use std::sync::RwLock; use std::time::{Duration, SystemTime}; +use crate::collections::string_storage::ManagedStringStorage; pub struct Profile { /// When profiles are reset, the sample-types need to be preserved. This @@ -38,6 +41,7 @@ pub struct Profile { stack_traces: FxIndexSet, start_time: SystemTime, strings: StringTable, + string_storage: Option>>, timestamp_key: StringId, upscaling_rules: UpscalingRules, } @@ -198,6 +202,22 @@ impl Profile { Self::backup_period(period), Self::backup_sample_types(sample_types), start_time, + None, + ) + } + + #[inline] + pub fn with_string_storage( + start_time: SystemTime, + sample_types: &[api::ValueType], + period: Option, + string_storage: Rc>, + ) -> Self { + Self::new_internal( + Self::backup_period(period), + Self::backup_sample_types(sample_types), + start_time, + Some(string_storage), ) } @@ -212,6 +232,7 @@ impl Profile { self.owned_period.take(), self.owned_sample_types.take(), start_time.unwrap_or_else(SystemTime::now), + self.string_storage.clone(), ); std::mem::swap(&mut *self, &mut profile); @@ -505,6 +526,7 @@ impl Profile { owned_period: Option, owned_sample_types: Option>, start_time: SystemTime, + string_storage: Option>>, ) -> Self { let mut profile = Self { owned_period, @@ -521,6 +543,7 @@ impl Profile { stack_traces: Default::default(), start_time, strings: Default::default(), + string_storage, timestamp_key: Default::default(), upscaling_rules: Default::default(), }; From 8d6851389c5b4cdb7e9e18199f6fbdc5510bf5b1 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 12:56:18 +0000 Subject: [PATCH 04/52] Ran `cargo fmt` --- profiling/src/internal/profile/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 8008d118b..478bbc626 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -8,6 +8,7 @@ use self::api::UpscalingInfo; use super::*; use crate::api; use crate::collections::identifiable::*; +use crate::collections::string_storage::ManagedStringStorage; use crate::collections::string_table::StringTable; use crate::iter::{IntoLendingIterator, LendingIterator}; use crate::pprof::sliced_proto::*; @@ -18,7 +19,6 @@ use std::collections::HashMap; use std::rc::Rc; use std::sync::RwLock; use std::time::{Duration, SystemTime}; -use crate::collections::string_storage::ManagedStringStorage; pub struct Profile { /// When profiles are reset, the sample-types need to be preserved. This @@ -121,7 +121,7 @@ impl Profile { Ok(()) } - pub fn add_string_id_sample( + pub fn add_string_id_sample( &mut self, sample: api::StringIdSample, timestamp: Option, @@ -627,8 +627,11 @@ impl Profile { Ok(()) } - /// Validates labels - fn validate_string_id_sample_labels(&mut self, sample: &api::StringIdSample) -> anyhow::Result<()> { + /// Validates labels + fn validate_string_id_sample_labels( + &mut self, + sample: &api::StringIdSample, + ) -> anyhow::Result<()> { let mut seen: HashMap<&str, &api::StringIdLabel> = HashMap::new(); for label in sample.labels.iter() { From d3c7d6d7e9d04906cb9443015f7c6453e086ec92 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 13:32:03 +0000 Subject: [PATCH 05/52] Introduce use of PersistentStringId in StringId* variants I decided to introduce the `PersistentStringId` name to distinguish such ids from the `StringId` type used by things that are in the profile-bound string table. --- profiling/src/api.rs | 40 +++++++++++++++------------ profiling/src/internal/profile/mod.rs | 33 ++++++++++++++-------- 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 71f9c7a43..d90d1e68a 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -4,6 +4,7 @@ use crate::pprof; use std::ops::{Add, Sub}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use crate::collections::identifiable::StringId; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ValueType<'a> { @@ -24,6 +25,11 @@ pub struct Period<'a> { pub value: i64, } +#[derive(Copy, Clone, Default, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct PersistentStringId { + pub value: u32, +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct Mapping<'a> { /// Address at which the binary (or DLL) is loaded into memory. @@ -47,7 +53,7 @@ pub struct Mapping<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub struct StringIdMapping<'a> { +pub struct StringIdMapping { /// Address at which the binary (or DLL) is loaded into memory. pub memory_start: u64, @@ -60,12 +66,12 @@ pub struct StringIdMapping<'a> { /// The object this entry is loaded from. This can be a filename on /// disk for the main binary and shared libraries, or virtual /// abstractions like "[vdso]". - pub filename: &'a str, + pub filename: PersistentStringId, /// A string that uniquely identifies a particular program version /// with high probability. E.g., for binaries generated by GNU tools, /// it could be the contents of the .note.gnu.build-id field. - pub build_id: &'a str, + pub build_id: PersistentStringId, } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] @@ -85,16 +91,16 @@ pub struct Function<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub struct StringIdFunction<'a> { +pub struct StringIdFunction { /// Name of the function, in human-readable form if available. - pub name: &'a str, + pub name: PersistentStringId, /// Name of the function, as identified by the system. /// For instance, it can be a C++ mangled name. - pub system_name: &'a str, + pub system_name: PersistentStringId, /// Source file containing the function. - pub filename: &'a str, + pub filename: PersistentStringId, /// Line number in source file. pub start_line: i64, @@ -124,9 +130,9 @@ pub struct Location<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub struct StringIdLocation<'a> { - pub mapping: StringIdMapping<'a>, - pub function: StringIdFunction<'a>, +pub struct StringIdLocation { + pub mapping: StringIdMapping, + pub function: StringIdFunction, /// The instruction address for this location, if available. It /// should be within [Mapping.memory_start...Mapping.memory_limit] @@ -156,11 +162,11 @@ pub struct Label<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] -pub struct StringIdLabel<'a> { - pub key: &'a str, +pub struct StringIdLabel { + pub key: PersistentStringId, /// At most one of the following must be present - pub str: Option<&'a str>, + pub str: Option, pub num: i64, /// Should only be present when num is present. @@ -170,7 +176,7 @@ pub struct StringIdLabel<'a> { /// Consumers may also interpret units like "bytes" and "kilobytes" as memory /// units and units like "seconds" and "nanoseconds" as time units, /// and apply appropriate unit conversions to these. - pub num_unit: Option<&'a str>, + pub num_unit: Option, } impl Label<'_> { @@ -198,9 +204,9 @@ pub struct Sample<'a> { } #[derive(Clone, Debug, Eq, PartialEq)] -pub struct StringIdSample<'a> { +pub struct StringIdSample { /// The leaf is at locations[0]. - pub locations: Vec>, + pub locations: Vec, /// The type and unit of each value is defined by the corresponding /// entry in Profile.sample_type. All samples must have the same @@ -212,7 +218,7 @@ pub struct StringIdSample<'a> { /// label includes additional context for this sample. It can include /// things like a thread id, allocation size, etc - pub labels: Vec>, + pub labels: Vec, } #[derive(Debug)] diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 478bbc626..f269c9011 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -19,6 +19,7 @@ use std::collections::HashMap; use std::rc::Rc; use std::sync::RwLock; use std::time::{Duration, SystemTime}; +use crate::api::PersistentStringId; pub struct Profile { /// When profiles are reset, the sample-types need to be preserved. This @@ -138,13 +139,13 @@ impl Profile { .labels .iter() .map(|label| { - let key = self.intern(label.key); + let key = self.resolve(label.key); let internal_label = if let Some(s) = label.str { - let str = self.intern(s); + let str = self.resolve(s); Label::str(key, str) } else { let num = label.num; - let num_unit = label.num_unit.map(|s| self.intern(s)); + let num_unit = label.num_unit.map(|s| self.resolve(s)); Label::num(key, num, num_unit) }; @@ -185,6 +186,15 @@ impl Profile { Ok(()) } + pub fn resolve(&mut self, id: PersistentStringId) -> StringId { + self.string_storage + .as_ref() + .expect("resolution from id requires managed string storage") + .read() + .expect("acquisition of write lock on string storage should succeed") + .get_seq_num(id.value, &mut self.strings) + } + /// Creates a profile with `start_time`. /// Initializes the string table to hold: /// - "" (the empty string) @@ -372,9 +382,9 @@ impl Profile { } fn add_string_id_function(&mut self, function: &api::StringIdFunction) -> FunctionId { - let name = self.intern(function.name); - let system_name = self.intern(function.system_name); - let filename = self.intern(function.filename); + let name = self.resolve(function.name); + let system_name = self.resolve(function.system_name); + let filename = self.resolve(function.filename); let start_line = function.start_line; self.functions.dedup(Function { @@ -421,8 +431,8 @@ impl Profile { } fn add_string_id_mapping(&mut self, mapping: &api::StringIdMapping) -> MappingId { - let filename = self.intern(mapping.filename); - let build_id = self.intern(mapping.build_id); + let filename = self.resolve(mapping.filename); + let build_id = self.resolve(mapping.build_id); self.mappings.dedup(Mapping { memory_start: mapping.memory_start, @@ -632,14 +642,15 @@ impl Profile { &mut self, sample: &api::StringIdSample, ) -> anyhow::Result<()> { - let mut seen: HashMap<&str, &api::StringIdLabel> = HashMap::new(); + let mut seen: HashMap = HashMap::new(); for label in sample.labels.iter() { if let Some(duplicate) = seen.insert(label.key, label) { anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label); } - if label.key == "local root span id" { + // FIXME +/* if label.key == "local root span id" { anyhow::ensure!( label.str.is_none() && label.num != 0, "Invalid \"local root span id\" label: {:?}", @@ -651,7 +662,7 @@ impl Profile { label.key != "end_timestamp_ns", "Timestamp should not be passed as a label {:?}", label - ); + );*/ } Ok(()) } From 7716d56149053917d48005701651b076c5c8fb48 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 14:01:51 +0000 Subject: [PATCH 06/52] Ran `cargo fmt` --- profiling/src/api.rs | 2 +- profiling/src/internal/profile/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index d90d1e68a..7e8e6b8a0 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -1,10 +1,10 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::collections::identifiable::StringId; use crate::pprof; use std::ops::{Add, Sub}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use crate::collections::identifiable::StringId; #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ValueType<'a> { diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index f269c9011..bd4a31717 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -7,6 +7,7 @@ mod fuzz_tests; use self::api::UpscalingInfo; use super::*; use crate::api; +use crate::api::PersistentStringId; use crate::collections::identifiable::*; use crate::collections::string_storage::ManagedStringStorage; use crate::collections::string_table::StringTable; @@ -19,7 +20,6 @@ use std::collections::HashMap; use std::rc::Rc; use std::sync::RwLock; use std::time::{Duration, SystemTime}; -use crate::api::PersistentStringId; pub struct Profile { /// When profiles are reset, the sample-types need to be preserved. This @@ -650,7 +650,7 @@ impl Profile { } // FIXME -/* if label.key == "local root span id" { + /* if label.key == "local root span id" { anyhow::ensure!( label.str.is_none() && label.num != 0, "Invalid \"local root span id\" label: {:?}", From 05706257ed737c2d921b481347b4cf9f642d5ea1 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 15:20:44 +0000 Subject: [PATCH 07/52] Fix label validation --- profiling/src/internal/profile/mod.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index bd4a31717..3c4b005ad 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -611,7 +611,6 @@ impl Profile { .collect() } - /// Validates labels fn validate_sample_labels(&mut self, sample: &api::Sample) -> anyhow::Result<()> { let mut seen: HashMap<&str, &api::Label> = HashMap::new(); @@ -637,7 +636,6 @@ impl Profile { Ok(()) } - /// Validates labels fn validate_string_id_sample_labels( &mut self, sample: &api::StringIdSample, @@ -649,8 +647,9 @@ impl Profile { anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label); } - // FIXME - /* if label.key == "local root span id" { + let key_id: StringId = self.resolve(label.key); + + if key_id == self.endpoints.local_root_span_id_label { anyhow::ensure!( label.str.is_none() && label.num != 0, "Invalid \"local root span id\" label: {:?}", @@ -659,10 +658,10 @@ impl Profile { } anyhow::ensure!( - label.key != "end_timestamp_ns", + key_id != self.timestamp_key, "Timestamp should not be passed as a label {:?}", label - );*/ + ); } Ok(()) } From 2887d2fbebcf8956987d6fab364577c4d9713ea5 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 15:24:55 +0000 Subject: [PATCH 08/52] Fix incorrect error message --- profiling/src/internal/profile/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 3c4b005ad..65a72f4d6 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -191,7 +191,7 @@ impl Profile { .as_ref() .expect("resolution from id requires managed string storage") .read() - .expect("acquisition of write lock on string storage should succeed") + .expect("acquisition of read lock on string storage should succeed") .get_seq_num(id.value, &mut self.strings) } From 81fb1ff1e2422245311d12e76ee47ed32c069762 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 15:36:00 +0000 Subject: [PATCH 09/52] Remove duplicate comments from StringId variants --- profiling/src/api.rs | 49 +++++--------------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 7e8e6b8a0..4dbcbf939 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -53,24 +53,12 @@ pub struct Mapping<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +// Same as Mapping, but using StringIds pub struct StringIdMapping { - /// Address at which the binary (or DLL) is loaded into memory. pub memory_start: u64, - - /// The limit of the address range occupied by this mapping. pub memory_limit: u64, - - /// Offset in the binary that corresponds to the first mapped address. pub file_offset: u64, - - /// The object this entry is loaded from. This can be a filename on - /// disk for the main binary and shared libraries, or virtual - /// abstractions like "[vdso]". pub filename: PersistentStringId, - - /// A string that uniquely identifies a particular program version - /// with high probability. E.g., for binaries generated by GNU tools, - /// it could be the contents of the .note.gnu.build-id field. pub build_id: PersistentStringId, } @@ -91,18 +79,11 @@ pub struct Function<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +// Same as Function, but using StringIds pub struct StringIdFunction { - /// Name of the function, in human-readable form if available. pub name: PersistentStringId, - - /// Name of the function, as identified by the system. - /// For instance, it can be a C++ mangled name. pub system_name: PersistentStringId, - - /// Source file containing the function. pub filename: PersistentStringId, - - /// Line number in source file. pub start_line: i64, } @@ -130,15 +111,10 @@ pub struct Location<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +// Same as Location, but using StringIds pub struct StringIdLocation { pub mapping: StringIdMapping, pub function: StringIdFunction, - - /// The instruction address for this location, if available. It - /// should be within [Mapping.memory_start...Mapping.memory_limit] - /// for the corresponding mapping. A non-leaf address may be in the - /// middle of a call instruction. It is up to display tools to find - /// the beginning of the instruction if necessary. pub address: u64, pub line: i64, } @@ -162,6 +138,7 @@ pub struct Label<'a> { } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] +// Same as Label, but using StringIds pub struct StringIdLabel { pub key: PersistentStringId, @@ -170,12 +147,6 @@ pub struct StringIdLabel { pub num: i64, /// Should only be present when num is present. - /// Specifies the units of num. - /// Use arbitrary string (for example, "requests") as a custom count unit. - /// If no unit is specified, consumer may apply heuristic to deduce the unit. - /// Consumers may also interpret units like "bytes" and "kilobytes" as memory - /// units and units like "seconds" and "nanoseconds" as time units, - /// and apply appropriate unit conversions to these. pub num_unit: Option, } @@ -204,20 +175,10 @@ pub struct Sample<'a> { } #[derive(Clone, Debug, Eq, PartialEq)] +// Same as Sample, but using StringIds pub struct StringIdSample { - /// The leaf is at locations[0]. pub locations: Vec, - - /// The type and unit of each value is defined by the corresponding - /// entry in Profile.sample_type. All samples must have the same - /// number of values, the same as the length of Profile.sample_type. - /// When aggregating multiple samples into a single sample, the - /// result has a list of values that is the element-wise sum of the - /// lists of the originals. pub values: Vec, - - /// label includes additional context for this sample. It can include - /// things like a thread id, allocation size, etc pub labels: Vec, } From cc6b6c0395fd31a88dfd16278e17b03098a67496 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 15:44:23 +0000 Subject: [PATCH 10/52] Extract method to avoid redundancy in `add_sample` --- profiling/src/internal/profile/mod.rs | 43 ++++++++++++++------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 65a72f4d6..0f596bc00 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -15,6 +15,7 @@ use crate::iter::{IntoLendingIterator, LendingIterator}; use crate::pprof::sliced_proto::*; use crate::serializer::CompressedProtobufSerializer; use anyhow::Context; +use libc::time; use std::borrow::Cow; use std::collections::HashMap; use std::rc::Rc; @@ -83,13 +84,6 @@ impl Profile { sample: api::Sample, timestamp: Option, ) -> anyhow::Result<()> { - anyhow::ensure!( - sample.values.len() == self.sample_types.len(), - "expected {} sample types, but sample had {} sample types", - self.sample_types.len(), - sample.values.len(), - ); - self.validate_sample_labels(&sample)?; let labels: Vec<_> = sample .labels @@ -108,7 +102,6 @@ impl Profile { self.labels.dedup(internal_label) }) .collect(); - let labels = self.label_sets.dedup(LabelSet::new(labels)); let locations = sample .locations @@ -116,10 +109,7 @@ impl Profile { .map(|l| self.add_location(l)) .collect(); - let stacktrace = self.add_stacktrace(locations); - self.observations - .add(Sample::new(labels, stacktrace), timestamp, sample.values)?; - Ok(()) + self.add_sample_internal(sample.values, labels, locations, timestamp) } pub fn add_string_id_sample( @@ -127,13 +117,6 @@ impl Profile { sample: api::StringIdSample, timestamp: Option, ) -> anyhow::Result<()> { - anyhow::ensure!( - sample.values.len() == self.sample_types.len(), - "expected {} sample types, but sample had {} sample types", - self.sample_types.len(), - sample.values.len(), - ); - self.validate_string_id_sample_labels(&sample)?; let labels: Vec<_> = sample .labels @@ -152,7 +135,6 @@ impl Profile { self.labels.dedup(internal_label) }) .collect(); - let labels = self.label_sets.dedup(LabelSet::new(labels)); let locations = sample .locations @@ -160,9 +142,28 @@ impl Profile { .map(|l| self.add_string_id_location(l)) .collect(); + self.add_sample_internal(sample.values, labels, locations, timestamp) + } + + fn add_sample_internal( + &mut self, + values: Vec, + labels: Vec, + locations: Vec, + timestamp: Option, + ) -> anyhow::Result<()> { + anyhow::ensure!( + values.len() == self.sample_types.len(), + "expected {} sample types, but sample had {} sample types", + self.sample_types.len(), + values.len(), + ); + + let labels = self.label_sets.dedup(LabelSet::new(labels)); + let stacktrace = self.add_stacktrace(locations); self.observations - .add(Sample::new(labels, stacktrace), timestamp, sample.values)?; + .add(Sample::new(labels, stacktrace), timestamp, values)?; Ok(()) } From 8fa065389658ed7db0f03287129ec5079a4d3431 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Nov 2024 17:15:00 +0000 Subject: [PATCH 11/52] Introduce `PersistentStringId::new` to make it easier to create structure --- profiling/src/api.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 4dbcbf939..1a619c3d2 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -30,6 +30,12 @@ pub struct PersistentStringId { pub value: u32, } +impl PersistentStringId { + pub fn new(value: u32) -> Self { + PersistentStringId { value } + } +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] pub struct Mapping<'a> { /// Address at which the binary (or DLL) is loaded into memory. From 9413b95bd8675cf928d4e36e16fe7070154149d1 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 09:49:49 +0000 Subject: [PATCH 12/52] Import + wire up ffi changes for string storage --- profiling-ffi/cbindgen.toml | 1 + profiling-ffi/src/lib.rs | 1 + profiling-ffi/src/profiles.rs | 152 ++++++++++++++++++++++- profiling-ffi/src/string_storage.rs | 183 ++++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 6 deletions(-) create mode 100644 profiling-ffi/src/string_storage.rs diff --git a/profiling-ffi/cbindgen.toml b/profiling-ffi/cbindgen.toml index 4c3c485dd..4b285b1d9 100644 --- a/profiling-ffi/cbindgen.toml +++ b/profiling-ffi/cbindgen.toml @@ -45,6 +45,7 @@ renaming_overrides_prefixing = true "SendResult" = "ddog_prof_Exporter_SendResult" "SerializeResult" = "ddog_prof_Profile_SerializeResult" "Slice_File" = "ddog_prof_Exporter_Slice_File" +"ManagedStringStorage" = "ddog_prof_ManagedStringStorage" [export.mangle] rename_types = "PascalCase" diff --git a/profiling-ffi/src/lib.rs b/profiling-ffi/src/lib.rs index f25481447..e7f29e4a2 100644 --- a/profiling-ffi/src/lib.rs +++ b/profiling-ffi/src/lib.rs @@ -6,6 +6,7 @@ pub use symbolizer_ffi::*; mod exporter; mod profiles; +mod string_storage; // re-export crashtracker ffi #[cfg(feature = "crashtracker-ffi")] diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 6f3b4716b..4816eba04 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -1,8 +1,11 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use crate::string_storage::get_inner_string_storage; +use crate::string_storage::ManagedStringStorage; use anyhow::Context; use datadog_profiling::api; +use datadog_profiling::api::PersistentStringId; use datadog_profiling::internal; use datadog_profiling::internal::ProfiledEndpointsStats; use ddcommon_ffi::slice::{AsBytes, CharSlice, Slice}; @@ -128,9 +131,11 @@ pub struct Period<'a> { #[derive(Copy, Clone, Default)] pub struct Label<'a> { pub key: CharSlice<'a>, + pub key_id: u32, /// At most one of the following must be present pub str: CharSlice<'a>, + pub str_id: u32, pub num: i64, /// Should only be present when num is present. @@ -141,6 +146,7 @@ pub struct Label<'a> { /// units and units like "seconds" and "nanoseconds" as time units, /// and apply appropriate unit conversions to these. pub num_unit: CharSlice<'a>, + pub num_unit_id: u32, } #[repr(C)] @@ -148,13 +154,16 @@ pub struct Label<'a> { pub struct Function<'a> { /// Name of the function, in human-readable form if available. pub name: CharSlice<'a>, + pub name_id: u32, /// Name of the function, as identified by the system. /// For instance, it can be a C++ mangled name. pub system_name: CharSlice<'a>, + pub system_name_id: u32, /// Source file containing the function. pub filename: CharSlice<'a>, + pub filename_id: u32, /// Line number in source file. pub start_line: i64, @@ -202,11 +211,13 @@ pub struct Mapping<'a> { /// disk for the main binary and shared libraries, or virtual /// abstractions like "[vdso]". pub filename: CharSlice<'a>, + pub filename_id: u32, /// A string that uniquely identifies a particular program version /// with high probability. E.g., for binaries generated by GNU tools, /// it could be the contents of the .note.gnu.build-id field. pub build_id: CharSlice<'a>, + pub build_id_id: u32, } #[repr(C)] @@ -244,6 +255,22 @@ impl<'a> TryFrom<&'a Mapping<'a>> for api::Mapping<'a> { } } +impl<'a> TryFrom<&'a Mapping<'a>> for api::StringIdMapping { + type Error = Utf8Error; + + fn try_from(mapping: &'a Mapping<'a>) -> Result { + let filename = PersistentStringId::new(mapping.filename_id); + let build_id = PersistentStringId::new(mapping.build_id_id); + Ok(Self { + memory_start: mapping.memory_start, + memory_limit: mapping.memory_limit, + file_offset: mapping.file_offset, + filename, + build_id, + }) + } +} + impl<'a> From<&'a ValueType<'a>> for api::ValueType<'a> { fn from(vt: &'a ValueType<'a>) -> Self { Self::new( @@ -278,13 +305,18 @@ impl<'a> TryFrom<&'a Function<'a>> for api::Function<'a> { } } -impl<'a> TryFrom<&'a Line<'a>> for api::Line<'a> { +impl<'a> TryFrom<&'a Function<'a>> for api::StringIdFunction { type Error = Utf8Error; - fn try_from(line: &'a Line<'a>) -> Result { + fn try_from(function: &'a Function<'a>) -> Result { + let name = PersistentStringId::new(function.name_id); + let system_name = PersistentStringId::new(function.system_name_id); + let filename = PersistentStringId::new(function.filename_id); Ok(Self { - function: api::Function::try_from(&line.function)?, - line: line.line, + name, + system_name, + filename, + start_line: function.start_line, }) } } @@ -304,6 +336,21 @@ impl<'a> TryFrom<&'a Location<'a>> for api::Location<'a> { } } +impl<'a> TryFrom<&'a Location<'a>> for api::StringIdLocation { + type Error = Utf8Error; + + fn try_from(location: &'a Location<'a>) -> Result { + let mapping = api::StringIdMapping::try_from(&location.mapping)?; + let function = api::StringIdFunction::try_from(&location.function)?; + Ok(Self { + mapping, + function, + address: location.address, + line: location.line, + }) + } +} + impl<'a> TryFrom<&'a Label<'a>> for api::Label<'a> { type Error = Utf8Error; @@ -327,6 +374,33 @@ impl<'a> TryFrom<&'a Label<'a>> for api::Label<'a> { } } +impl<'a> TryFrom<&'a Label<'a>> for api::StringIdLabel { + type Error = Utf8Error; + + fn try_from(label: &'a Label<'a>) -> Result { + let key = PersistentStringId::new(label.key_id); + let str = label.str_id; + let str = if str == 0 { + None + } else { + Some(PersistentStringId::new(str)) + }; + let num_unit = label.num_unit_id; + let num_unit = if num_unit == 0 { + None + } else { + Some(PersistentStringId::new(num_unit)) + }; + + Ok(Self { + key, + str, + num: label.num, + num_unit, + }) + } +} + impl<'a> TryFrom> for api::Sample<'a> { type Error = Utf8Error; @@ -352,6 +426,31 @@ impl<'a> TryFrom> for api::Sample<'a> { } } +impl TryFrom> for api::StringIdSample { + type Error = Utf8Error; + + fn try_from(sample: Sample<'_>) -> Result { + let mut locations: Vec = Vec::with_capacity(sample.locations.len()); + + for location in sample.locations.as_slice().iter() { + locations.push(location.try_into()?) + } + + let values: Vec = sample.values.into_slice().to_vec(); + + let mut labels: Vec = Vec::with_capacity(sample.labels.len()); + for label in sample.labels.as_slice().iter() { + labels.push(label.try_into()?); + } + + Ok(Self { + locations, + values, + labels, + }) + } +} + /// Create a new profile with the given sample types. Must call /// `ddog_prof_Profile_drop` when you are done with the profile. /// @@ -380,6 +479,27 @@ pub unsafe extern "C" fn ddog_prof_Profile_new( ProfileNewResult::Ok(ffi_profile) } +/// Same as `ddog_profile_new` but also configures a `string_storage` for the profile. +pub unsafe extern "C" fn ddog_prof_Profile_with_string_storage( + sample_types: Slice, + period: Option<&Period>, + start_time: Option<&Timespec>, + string_storage: ManagedStringStorage, +) -> ProfileNewResult { + let types: Vec = sample_types.into_slice().iter().map(Into::into).collect(); + let start_time = start_time.map_or_else(SystemTime::now, SystemTime::from); + let period = period.map(Into::into); + let string_storage = match get_inner_string_storage(string_storage, true) { + Ok(string_storage) => string_storage, + Err(err) => return ProfileNewResult::Err(err.into()), + }; + + let internal_profile = + internal::Profile::with_string_storage(start_time, &types, period, string_storage); + let ffi_profile = Profile::new(internal_profile); + ProfileNewResult::Ok(ffi_profile) +} + /// # Safety /// The `profile` can be null, but if non-null it must point to a Profile /// made by this module, which has not previously been dropped. @@ -433,8 +553,16 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( ) -> ProfileResult { (|| { let profile = profile_ptr_to_inner(profile)?; - let sample = sample.try_into()?; - profile.add_sample(sample, timestamp) + let uses_string_ids = sample + .labels + .first() + .map_or(false, |label| label.key.is_empty() && label.key_id > 0); + + if uses_string_ids { + profile.add_string_id_sample(sample.try_into()?, timestamp) + } else { + profile.add_sample(sample.try_into()?, timestamp) + } })() .context("ddog_prof_Profile_add failed") .into() @@ -808,8 +936,11 @@ mod tests { mapping, function: Function { name: "{main}".into(), + name_id: 0, system_name: "{main}".into(), + system_name_id: 0, filename: "index.php".into(), + filename_id: 0, start_line: 0, }, ..Default::default() @@ -870,8 +1001,11 @@ mod tests { mapping, function: Function { name: "{main}".into(), + name_id: 0, system_name: "{main}".into(), + system_name_id: 0, filename: "index.php".into(), + filename_id: 0, start_line: 0, }, ..Default::default() @@ -880,8 +1014,11 @@ mod tests { mapping, function: Function { name: "test".into(), + name_id: 0, system_name: "test".into(), + system_name_id: 0, filename: "index.php".into(), + filename_id: 0, start_line: 3, }, line: 4, @@ -890,9 +1027,12 @@ mod tests { let values: Vec = vec![1]; let labels = vec![Label { key: Slice::from("pid"), + key_id: 0, str: Slice::from(""), + str_id: 0, num: 101, num_unit: Slice::from(""), + num_unit_id: 0, }]; let main_sample = Sample { diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs new file mode 100644 index 000000000..9cd499bce --- /dev/null +++ b/profiling-ffi/src/string_storage.rs @@ -0,0 +1,183 @@ +use anyhow::Context; +use libc::c_void; +use std::{ffi::CStr, rc::Rc, sync::RwLock}; + +use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; +use ddcommon_ffi::{CharSlice, Error, StringWrapper}; + +#[repr(C)] +pub struct ManagedStringStorage { + // This may be null, but if not it will point to a valid Profile. + inner: *const c_void, /* Actually *RwLock but cbindgen doesn't + * opaque RwLock */ +} + +#[no_mangle] +#[must_use] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_new() -> ManagedStringStorage { + let storage = InternalManagedStringStorage::new(); + + ManagedStringStorage { + inner: Rc::into_raw(Rc::new(RwLock::new(storage))) as *const c_void, + } +} + +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_drop(storage: ManagedStringStorage) { + if let Ok(storage) = get_inner_string_storage(storage, false) { + drop(storage); + } +} + +#[repr(C)] +#[allow(dead_code)] +pub enum ManagedStringStorageInternResult { + Ok(u32), + Err(Error), +} + +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( + storage: ManagedStringStorage, + string: Option<&CharSlice>, +) -> ManagedStringStorageInternResult { + (|| { + let storage = get_inner_string_storage(storage, true)?; + + let string: &CharSlice = string.expect("non null string"); + let string: &str = CStr::from_ptr(string.as_ptr()) + .to_str() + .expect("valid utf8 string"); + + let string_id = storage + .write() + .expect("acquisition of write lock on string storage should succeed") + .intern(string); + + anyhow::Ok(string_id) + })() + .context("ddog_prof_Profile_serialize failed") + .into() +} + +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( + storage: ManagedStringStorage, + id: u32, +) -> ManagedStringStorageResult { + (|| { + let storage = get_inner_string_storage(storage, true)?; + storage + .read() + .expect("acquisition of read lock on string storage should succeed") + .unintern(id); + anyhow::Ok(()) + })() + .context("ddog_prof_Profile_serialize failed") + .into() +} + +#[repr(C)] +#[allow(dead_code)] +pub enum StringWrapperResult { + Ok(StringWrapper), + Err(Error), +} + +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( + storage: ManagedStringStorage, + id: u32, +) -> StringWrapperResult { + (|| { + let storage = get_inner_string_storage(storage, true)?; + let string: String = (*storage + .read() + .expect("acquisition of read lock on string storage should succeed") + .get_string(id)) + .to_owned(); + + anyhow::Ok(string) + })() + .context("ddog_prof_Profile_serialize failed") + .into() +} + +#[repr(C)] +#[allow(dead_code)] +pub enum ManagedStringStorageResult { + Ok(()), + Err(Error), +} + +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( + storage: ManagedStringStorage, +) -> ManagedStringStorageResult { + (|| { + let storage = get_inner_string_storage(storage, true)?; + + storage + .write() + .expect("acquisition of write lock on string storage should succeed") + .advance_gen(); + + anyhow::Ok(()) + })() + .context("ddog_prof_Profile_serialize failed") + .into() +} + +pub unsafe fn get_inner_string_storage( + storage: ManagedStringStorage, + cloned: bool, +) -> anyhow::Result>> { + if storage.inner.is_null() { + anyhow::bail!("storage inner pointer is null"); + } + + let storage_ptr = storage.inner; + + if cloned { + // By incrementing strong count here we ensure that the returned Rc represents a "clone" of + // the original and will thus not trigger a drop of the underlying data when out of + // scope. NOTE: We can't simply do Rc::from_raw(storage_ptr).clone() because when we + // return, the Rc created through `Rc::from_raw` would go out of scope and decrement + // strong count. + Rc::increment_strong_count(storage_ptr); + } + Ok(Rc::from_raw( + storage_ptr as *const RwLock, + )) +} + +impl From> for ManagedStringStorageInternResult { + fn from(value: anyhow::Result) -> Self { + match value { + Ok(v) => Self::Ok(v), + Err(err) => Self::Err(err.into()), + } + } +} + +impl From> for StringWrapperResult { + fn from(value: anyhow::Result) -> Self { + match value { + Ok(v) => Self::Ok(v.into()), + Err(err) => Self::Err(err.into()), + } + } +} + +impl From> for ManagedStringStorageResult { + fn from(value: anyhow::Result<()>) -> Self { + match value { + Ok(v) => Self::Ok(v), + Err(err) => Self::Err(err.into()), + } + } +} From 82b15934a8cef577003677d16d31819186fdd28f Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 10:08:34 +0000 Subject: [PATCH 13/52] Fix incorrect operation names on errors --- profiling-ffi/src/string_storage.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 9cd499bce..6d484c8c1 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -57,7 +57,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( anyhow::Ok(string_id) })() - .context("ddog_prof_Profile_serialize failed") + .context("ddog_prof_ManagedStringStorage_intern failed") .into() } @@ -75,7 +75,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( .unintern(id); anyhow::Ok(()) })() - .context("ddog_prof_Profile_serialize failed") + .context("ddog_prof_ManagedStringStorage_unintern failed") .into() } @@ -102,7 +102,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( anyhow::Ok(string) })() - .context("ddog_prof_Profile_serialize failed") + .context("ddog_prof_ManagedStringStorage_get_string failed") .into() } @@ -128,7 +128,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( anyhow::Ok(()) })() - .context("ddog_prof_Profile_serialize failed") + .context("ddog_prof_ManagedStringStorage_advance_gen failed") .into() } From a65c31dc482f5bd0b3732f1afb9b4ac03bc1aa9a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 10:18:57 +0000 Subject: [PATCH 14/52] Isolate `SimpleStringStorage` into seprate module This makes it clear it's only kept around for experiments. --- profiling/src/collections/string_storage.rs | 92 +++++++++++---------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index 97b8e700a..f67926d1a 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -22,59 +22,65 @@ pub trait StringStorage { fn clone_empty(&self) -> Box; } -pub struct SimpleStringStorage { - set: FxIndexSet>, -} +mod experimental { + use crate::collections::identifiable::{FxIndexSet, Id, StringId}; + use crate::collections::string_storage::StringStorage; + use std::rc::Rc; -impl SimpleStringStorage { - pub fn new() -> Self { - SimpleStringStorage { - set: Default::default(), - } + pub struct SimpleStringStorage { + set: FxIndexSet>, } -} -impl Default for SimpleStringStorage { - fn default() -> Self { - Self::new() - } -} - -impl StringStorage for SimpleStringStorage { - fn intern(&mut self, item: Rc) -> StringId { - // For performance, delay converting the [&str] to a [String] until - // after it has been determined to not exist in the set. This avoids - // temporary allocations. - let index = match self.set.get_index_of(&item) { - Some(index) => index, - None => { - let (index, _inserted) = self.set.insert_full(item.clone()); - // This wouldn't make any sense; the item couldn't be found so - // we try to insert it, but suddenly it exists now? - debug_assert!(_inserted); - index + impl SimpleStringStorage { + pub fn new() -> Self { + SimpleStringStorage { + set: Default::default(), } - }; - StringId::from_offset(index) + } } - fn get_string(&self, id: StringId) -> Rc { - self.set - .get_index(id.to_offset()) - .expect("StringId to have a valid interned index") - .clone() + impl Default for SimpleStringStorage { + fn default() -> Self { + Self::new() + } } - fn len(&self) -> usize { - self.set.len() - } + impl StringStorage for SimpleStringStorage { + fn intern(&mut self, item: Rc) -> StringId { + // For performance, delay converting the [&str] to a [String] until + // after it has been determined to not exist in the set. This avoids + // temporary allocations. + let index = match self.set.get_index_of(&item) { + Some(index) => index, + None => { + let (index, _inserted) = self.set.insert_full(item.clone()); + // This wouldn't make any sense; the item couldn't be found so + // we try to insert it, but suddenly it exists now? + debug_assert!(_inserted); + index + } + }; + StringId::from_offset(index) + } - fn into_iter(self: Box) -> Box>> { - Box::new(self.set.into_iter()) - } + fn get_string(&self, id: StringId) -> Rc { + self.set + .get_index(id.to_offset()) + .expect("StringId to have a valid interned index") + .clone() + } - fn clone_empty(&self) -> Box { - Box::new(SimpleStringStorage::new()) + fn len(&self) -> usize { + self.set.len() + } + + fn into_iter(self: Box) -> Box>> { + Box::new(self.set.into_iter()) + } + + fn clone_empty(&self) -> Box { + Box::new(SimpleStringStorage::new()) + } } } From 36d5d3220be9cc35a65c429173d0e6e73e4defc6 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 10:21:02 +0000 Subject: [PATCH 15/52] Mark `last_usage_gen` as not in use --- profiling/src/collections/string_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index f67926d1a..bfa9df259 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -87,7 +87,7 @@ mod experimental { #[derive(PartialEq, Debug)] struct ManagedStringData { str: Rc, - last_usage_gen: Cell>, + last_usage_gen: Cell>, // TODO: This is not actually being used; maybe remove? cached_seq_num_for: Cell>, cached_seq_num: Cell>, usage_count: Cell, From 64d6277a5f6b69a66ed04bccda714b66a170b7de Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 11:03:28 +0000 Subject: [PATCH 16/52] Add TODO to ManagedStringStorage_intern --- profiling-ffi/src/string_storage.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 6d484c8c1..b1f01a71c 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -38,6 +38,8 @@ pub enum ManagedStringStorageInternResult { #[must_use] #[no_mangle] +// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead of +// just a single string at a time. pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( storage: ManagedStringStorage, string: Option<&CharSlice>, From 624ebd0d427ca7b0d1789959baae70f6ac0d823e Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 11:26:26 +0000 Subject: [PATCH 17/52] Make sure header gets refreshed when things change --- profiling-ffi/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/profiling-ffi/build.rs b/profiling-ffi/build.rs index ddf5cbfcb..aac058679 100644 --- a/profiling-ffi/build.rs +++ b/profiling-ffi/build.rs @@ -5,6 +5,7 @@ extern crate build_common; use build_common::generate_and_configure_header; fn main() { + println!("cargo:rerun-if-changed=src"); let header_name = "profiling.h"; generate_and_configure_header(header_name); } From 30d9a65832ab625ec8f5e8b681526b66d3e09f06 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 11:27:31 +0000 Subject: [PATCH 18/52] Fix `ddog_prof_Profile_with_string_storage` not being correctly published in header --- profiling-ffi/src/profiles.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 4816eba04..5e4f04436 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -480,6 +480,8 @@ pub unsafe extern "C" fn ddog_prof_Profile_new( } /// Same as `ddog_profile_new` but also configures a `string_storage` for the profile. +#[no_mangle] +#[must_use] pub unsafe extern "C" fn ddog_prof_Profile_with_string_storage( sample_types: Slice, period: Option<&Period>, From f9f37038367d66e911c3f32a8d0091e0ff145355 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 11:30:45 +0000 Subject: [PATCH 19/52] Remove unused includes --- profiling/src/api.rs | 1 - profiling/src/collections/string_storage.rs | 2 -- profiling/src/internal/profile/mod.rs | 1 - 3 files changed, 4 deletions(-) diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 1a619c3d2..9bf10521a 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -1,7 +1,6 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::collections::identifiable::StringId; use crate::pprof; use std::ops::{Add, Sub}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index bfa9df259..2cd4fde57 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -4,8 +4,6 @@ use std::hash::BuildHasherDefault; use std::ptr; use std::rc::Rc; -use super::identifiable::FxIndexSet; -use super::identifiable::Id; use super::identifiable::StringId; use super::string_table::StringTable; diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 0f596bc00..f791a487c 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -15,7 +15,6 @@ use crate::iter::{IntoLendingIterator, LendingIterator}; use crate::pprof::sliced_proto::*; use crate::serializer::CompressedProtobufSerializer; use anyhow::Context; -use libc::time; use std::borrow::Cow; use std::collections::HashMap; use std::rc::Rc; From 4aa5f44c064793a90bc0422b5882f9182c5a707f Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 6 Nov 2024 11:30:55 +0000 Subject: [PATCH 20/52] Remove unused struct --- profiling-ffi/src/profiles.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 5e4f04436..9685d64b4 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -169,16 +169,6 @@ pub struct Function<'a> { pub start_line: i64, } -#[repr(C)] -#[derive(Copy, Clone)] -pub struct Line<'a> { - /// The corresponding profile.Function for this line. - pub function: Function<'a>, - - /// Line number in source code. - pub line: i64, -} - #[repr(C)] #[derive(Copy, Clone, Default)] pub struct Location<'a> { From 0a9920809beb0fe41c15a321d62b32711166dde3 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 8 Nov 2024 12:25:35 +0000 Subject: [PATCH 21/52] Add short-circuit for looking up empty strings --- profiling-ffi/src/string_storage.rs | 4 ++-- profiling/src/internal/profile/mod.rs | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index b1f01a71c..319ca04bc 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -38,8 +38,8 @@ pub enum ManagedStringStorageInternResult { #[must_use] #[no_mangle] -// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead of -// just a single string at a time. +// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead +// of just a single string at a time. pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( storage: ManagedStringStorage, string: Option<&CharSlice>, diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index f791a487c..69c7be174 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -187,6 +187,10 @@ impl Profile { } pub fn resolve(&mut self, id: PersistentStringId) -> StringId { + if id.value == 0 { + return StringId::ZERO; + } + self.string_storage .as_ref() .expect("resolution from id requires managed string storage") From e105dd174c523c2e9646d0fb19640b25948cc215 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 10:33:09 +0000 Subject: [PATCH 22/52] Replace `ManagedStringStorageResult` with equivalent `MaybeError` --- profiling-ffi/src/string_storage.rs | 42 ++++++++++++----------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 319ca04bc..e87514395 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -3,7 +3,7 @@ use libc::c_void; use std::{ffi::CStr, rc::Rc, sync::RwLock}; use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; -use ddcommon_ffi::{CharSlice, Error, StringWrapper}; +use ddcommon_ffi::{CharSlice, Error, MaybeError, StringWrapper}; #[repr(C)] pub struct ManagedStringStorage { @@ -68,8 +68,8 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( storage: ManagedStringStorage, id: u32, -) -> ManagedStringStorageResult { - (|| { +) -> MaybeError { + let result = (|| { let storage = get_inner_string_storage(storage, true)?; storage .read() @@ -77,8 +77,12 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( .unintern(id); anyhow::Ok(()) })() - .context("ddog_prof_ManagedStringStorage_unintern failed") - .into() + .context("ddog_prof_ManagedStringStorage_unintern failed"); + + match result { + Ok(_) => MaybeError::None, + Err(e) => MaybeError::Some(e.into()), + } } #[repr(C)] @@ -108,19 +112,12 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( .into() } -#[repr(C)] -#[allow(dead_code)] -pub enum ManagedStringStorageResult { - Ok(()), - Err(Error), -} - #[must_use] #[no_mangle] pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( storage: ManagedStringStorage, -) -> ManagedStringStorageResult { - (|| { +) -> MaybeError { + let result = (|| { let storage = get_inner_string_storage(storage, true)?; storage @@ -130,8 +127,12 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( anyhow::Ok(()) })() - .context("ddog_prof_ManagedStringStorage_advance_gen failed") - .into() + .context("ddog_prof_ManagedStringStorage_advance_gen failed"); + + match result { + Ok(_) => MaybeError::None, + Err(e) => MaybeError::Some(e.into()), + } } pub unsafe fn get_inner_string_storage( @@ -174,12 +175,3 @@ impl From> for StringWrapperResult { } } } - -impl From> for ManagedStringStorageResult { - fn from(value: anyhow::Result<()>) -> Self { - match value { - Ok(v) => Self::Ok(v), - Err(err) => Self::Err(err.into()), - } - } -} From 04d76d21b3a0d81bf516a7421468fc9267d7a73d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 10:35:47 +0000 Subject: [PATCH 23/52] Add TODO to `get_string` API --- profiling-ffi/src/string_storage.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index e87514395..b05a590ba 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -94,6 +94,9 @@ pub enum StringWrapperResult { #[must_use] #[no_mangle] +/// TODO: @ivoanjo It's not clear to me if the string pointer we return here is the exact one from +/// the string storage (still managed via string storage), or if we're allocating a copy (would +/// need a manual drop?). pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( storage: ManagedStringStorage, id: u32, From b273e472aa7f5331a2cb2d6fc01866eebffd7dcd Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 10:40:58 +0000 Subject: [PATCH 24/52] Have intern receive a CharSlice unconditionally It doesn't really make sense to make it optional to have a CharSlice here. --- profiling-ffi/src/string_storage.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index b05a590ba..bfa1e4530 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -38,16 +38,15 @@ pub enum ManagedStringStorageInternResult { #[must_use] #[no_mangle] -// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead -// of just a single string at a time. +/// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead +/// of just a single string at a time. pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( storage: ManagedStringStorage, - string: Option<&CharSlice>, + string: CharSlice, ) -> ManagedStringStorageInternResult { (|| { let storage = get_inner_string_storage(storage, true)?; - let string: &CharSlice = string.expect("non null string"); let string: &str = CStr::from_ptr(string.as_ptr()) .to_str() .expect("valid utf8 string"); From 560316f7d39c8ef70223faaa6d4e68c1919b1bd9 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 10:47:18 +0000 Subject: [PATCH 25/52] Introduce `ManagedStringStorageNewResult` This makes this API a bit more future-proof, as well as a bit more consistent with other such APIs in libdatadog. --- profiling-ffi/src/string_storage.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index bfa1e4530..62be3a17e 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -12,14 +12,22 @@ pub struct ManagedStringStorage { * opaque RwLock */ } +#[allow(dead_code)] +#[repr(C)] +pub enum ManagedStringStorageNewResult { + Ok(ManagedStringStorage), + #[allow(dead_code)] + Err(Error), +} + #[no_mangle] #[must_use] -pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_new() -> ManagedStringStorage { +pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_new() -> ManagedStringStorageNewResult { let storage = InternalManagedStringStorage::new(); - ManagedStringStorage { + ManagedStringStorageNewResult::Ok(ManagedStringStorage { inner: Rc::into_raw(Rc::new(RwLock::new(storage))) as *const c_void, - } + }) } #[no_mangle] From 66cf6fb9007b26d0d2af7dc71f91927a461a9196 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 10:58:59 +0000 Subject: [PATCH 26/52] Add TODO about shape of ManagedStringStorage argument --- profiling-ffi/src/profiles.rs | 1 + profiling-ffi/src/string_storage.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 9685d64b4..adba7a3db 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -472,6 +472,7 @@ pub unsafe extern "C" fn ddog_prof_Profile_new( /// Same as `ddog_profile_new` but also configures a `string_storage` for the profile. #[no_mangle] #[must_use] +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_Profile_with_string_storage( sample_types: Slice, period: Option<&Period>, diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 62be3a17e..ca78db607 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -31,6 +31,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_new() -> ManagedStringSt } #[no_mangle] +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_drop(storage: ManagedStringStorage) { if let Ok(storage) = get_inner_string_storage(storage, false) { drop(storage); @@ -48,6 +49,7 @@ pub enum ManagedStringStorageInternResult { #[no_mangle] /// TODO: Consider having a variant of intern (and unintern?) that takes an array as input, instead /// of just a single string at a time. +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( storage: ManagedStringStorage, string: CharSlice, @@ -72,6 +74,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( #[must_use] #[no_mangle] +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( storage: ManagedStringStorage, id: u32, @@ -104,6 +107,7 @@ pub enum StringWrapperResult { /// TODO: @ivoanjo It's not clear to me if the string pointer we return here is the exact one from /// the string storage (still managed via string storage), or if we're allocating a copy (would /// need a manual drop?). +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( storage: ManagedStringStorage, id: u32, @@ -124,6 +128,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( #[must_use] #[no_mangle] +/// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( storage: ManagedStringStorage, ) -> MaybeError { From 08f5760e0f630d0d8d1730b56676e3e3d18d7b18 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 11:10:10 +0000 Subject: [PATCH 27/52] Rename `PersistentStringId` -> `ManagedStringId` --- profiling-ffi/src/profiles.rs | 18 +++++++++--------- profiling/src/api.rs | 22 +++++++++++----------- profiling/src/internal/profile/mod.rs | 6 +++--- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index adba7a3db..0b97384e2 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -5,7 +5,7 @@ use crate::string_storage::get_inner_string_storage; use crate::string_storage::ManagedStringStorage; use anyhow::Context; use datadog_profiling::api; -use datadog_profiling::api::PersistentStringId; +use datadog_profiling::api::ManagedStringId; use datadog_profiling::internal; use datadog_profiling::internal::ProfiledEndpointsStats; use ddcommon_ffi::slice::{AsBytes, CharSlice, Slice}; @@ -249,8 +249,8 @@ impl<'a> TryFrom<&'a Mapping<'a>> for api::StringIdMapping { type Error = Utf8Error; fn try_from(mapping: &'a Mapping<'a>) -> Result { - let filename = PersistentStringId::new(mapping.filename_id); - let build_id = PersistentStringId::new(mapping.build_id_id); + let filename = ManagedStringId::new(mapping.filename_id); + let build_id = ManagedStringId::new(mapping.build_id_id); Ok(Self { memory_start: mapping.memory_start, memory_limit: mapping.memory_limit, @@ -299,9 +299,9 @@ impl<'a> TryFrom<&'a Function<'a>> for api::StringIdFunction { type Error = Utf8Error; fn try_from(function: &'a Function<'a>) -> Result { - let name = PersistentStringId::new(function.name_id); - let system_name = PersistentStringId::new(function.system_name_id); - let filename = PersistentStringId::new(function.filename_id); + let name = ManagedStringId::new(function.name_id); + let system_name = ManagedStringId::new(function.system_name_id); + let filename = ManagedStringId::new(function.filename_id); Ok(Self { name, system_name, @@ -368,18 +368,18 @@ impl<'a> TryFrom<&'a Label<'a>> for api::StringIdLabel { type Error = Utf8Error; fn try_from(label: &'a Label<'a>) -> Result { - let key = PersistentStringId::new(label.key_id); + let key = ManagedStringId::new(label.key_id); let str = label.str_id; let str = if str == 0 { None } else { - Some(PersistentStringId::new(str)) + Some(ManagedStringId::new(str)) }; let num_unit = label.num_unit_id; let num_unit = if num_unit == 0 { None } else { - Some(PersistentStringId::new(num_unit)) + Some(ManagedStringId::new(num_unit)) }; Ok(Self { diff --git a/profiling/src/api.rs b/profiling/src/api.rs index 9bf10521a..e90c5039f 100644 --- a/profiling/src/api.rs +++ b/profiling/src/api.rs @@ -25,13 +25,13 @@ pub struct Period<'a> { } #[derive(Copy, Clone, Default, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] -pub struct PersistentStringId { +pub struct ManagedStringId { pub value: u32, } -impl PersistentStringId { +impl ManagedStringId { pub fn new(value: u32) -> Self { - PersistentStringId { value } + ManagedStringId { value } } } @@ -63,8 +63,8 @@ pub struct StringIdMapping { pub memory_start: u64, pub memory_limit: u64, pub file_offset: u64, - pub filename: PersistentStringId, - pub build_id: PersistentStringId, + pub filename: ManagedStringId, + pub build_id: ManagedStringId, } #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] @@ -86,9 +86,9 @@ pub struct Function<'a> { #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] // Same as Function, but using StringIds pub struct StringIdFunction { - pub name: PersistentStringId, - pub system_name: PersistentStringId, - pub filename: PersistentStringId, + pub name: ManagedStringId, + pub system_name: ManagedStringId, + pub filename: ManagedStringId, pub start_line: i64, } @@ -145,14 +145,14 @@ pub struct Label<'a> { #[derive(Copy, Clone, Debug, Default, Eq, PartialEq)] // Same as Label, but using StringIds pub struct StringIdLabel { - pub key: PersistentStringId, + pub key: ManagedStringId, /// At most one of the following must be present - pub str: Option, + pub str: Option, pub num: i64, /// Should only be present when num is present. - pub num_unit: Option, + pub num_unit: Option, } impl Label<'_> { diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 69c7be174..816976740 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -7,7 +7,7 @@ mod fuzz_tests; use self::api::UpscalingInfo; use super::*; use crate::api; -use crate::api::PersistentStringId; +use crate::api::ManagedStringId; use crate::collections::identifiable::*; use crate::collections::string_storage::ManagedStringStorage; use crate::collections::string_table::StringTable; @@ -186,7 +186,7 @@ impl Profile { Ok(()) } - pub fn resolve(&mut self, id: PersistentStringId) -> StringId { + pub fn resolve(&mut self, id: ManagedStringId) -> StringId { if id.value == 0 { return StringId::ZERO; } @@ -644,7 +644,7 @@ impl Profile { &mut self, sample: &api::StringIdSample, ) -> anyhow::Result<()> { - let mut seen: HashMap = HashMap::new(); + let mut seen: HashMap = HashMap::new(); for label in sample.labels.iter() { if let Some(duplicate) = seen.insert(label.key, label) { From 621ed3abedf9228dcf0fc6607b4da2ca9838160b Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 13:40:47 +0000 Subject: [PATCH 28/52] Also expose `ManagedStringId` to ffi instead of u32 --- profiling-ffi/cbindgen.toml | 1 + profiling-ffi/src/profiles.rs | 63 ++++++++++++++--------------- profiling-ffi/src/string_storage.rs | 24 ++++++----- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/profiling-ffi/cbindgen.toml b/profiling-ffi/cbindgen.toml index 4b285b1d9..337486cb2 100644 --- a/profiling-ffi/cbindgen.toml +++ b/profiling-ffi/cbindgen.toml @@ -46,6 +46,7 @@ renaming_overrides_prefixing = true "SerializeResult" = "ddog_prof_Profile_SerializeResult" "Slice_File" = "ddog_prof_Exporter_Slice_File" "ManagedStringStorage" = "ddog_prof_ManagedStringStorage" +"ManagedStringId" = "ddog_prof_ManagedStringId" [export.mangle] rename_types = "PascalCase" diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 0b97384e2..538aa149c 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -131,11 +131,11 @@ pub struct Period<'a> { #[derive(Copy, Clone, Default)] pub struct Label<'a> { pub key: CharSlice<'a>, - pub key_id: u32, + pub key_id: ManagedStringId, /// At most one of the following must be present pub str: CharSlice<'a>, - pub str_id: u32, + pub str_id: ManagedStringId, pub num: i64, /// Should only be present when num is present. @@ -146,7 +146,7 @@ pub struct Label<'a> { /// units and units like "seconds" and "nanoseconds" as time units, /// and apply appropriate unit conversions to these. pub num_unit: CharSlice<'a>, - pub num_unit_id: u32, + pub num_unit_id: ManagedStringId, } #[repr(C)] @@ -154,16 +154,16 @@ pub struct Label<'a> { pub struct Function<'a> { /// Name of the function, in human-readable form if available. pub name: CharSlice<'a>, - pub name_id: u32, + pub name_id: ManagedStringId, /// Name of the function, as identified by the system. /// For instance, it can be a C++ mangled name. pub system_name: CharSlice<'a>, - pub system_name_id: u32, + pub system_name_id: ManagedStringId, /// Source file containing the function. pub filename: CharSlice<'a>, - pub filename_id: u32, + pub filename_id: ManagedStringId, /// Line number in source file. pub start_line: i64, @@ -201,13 +201,13 @@ pub struct Mapping<'a> { /// disk for the main binary and shared libraries, or virtual /// abstractions like "[vdso]". pub filename: CharSlice<'a>, - pub filename_id: u32, + pub filename_id: ManagedStringId, /// A string that uniquely identifies a particular program version /// with high probability. E.g., for binaries generated by GNU tools, /// it could be the contents of the .note.gnu.build-id field. pub build_id: CharSlice<'a>, - pub build_id_id: u32, + pub build_id_id: ManagedStringId, } #[repr(C)] @@ -249,8 +249,8 @@ impl<'a> TryFrom<&'a Mapping<'a>> for api::StringIdMapping { type Error = Utf8Error; fn try_from(mapping: &'a Mapping<'a>) -> Result { - let filename = ManagedStringId::new(mapping.filename_id); - let build_id = ManagedStringId::new(mapping.build_id_id); + let filename = ManagedStringId::new(mapping.filename_id.value); + let build_id = ManagedStringId::new(mapping.build_id_id.value); Ok(Self { memory_start: mapping.memory_start, memory_limit: mapping.memory_limit, @@ -299,9 +299,9 @@ impl<'a> TryFrom<&'a Function<'a>> for api::StringIdFunction { type Error = Utf8Error; fn try_from(function: &'a Function<'a>) -> Result { - let name = ManagedStringId::new(function.name_id); - let system_name = ManagedStringId::new(function.system_name_id); - let filename = ManagedStringId::new(function.filename_id); + let name = ManagedStringId::new(function.name_id.value); + let system_name = ManagedStringId::new(function.system_name_id.value); + let filename = ManagedStringId::new(function.filename_id.value); Ok(Self { name, system_name, @@ -368,14 +368,14 @@ impl<'a> TryFrom<&'a Label<'a>> for api::StringIdLabel { type Error = Utf8Error; fn try_from(label: &'a Label<'a>) -> Result { - let key = ManagedStringId::new(label.key_id); - let str = label.str_id; + let key = ManagedStringId::new(label.key_id.value); + let str = label.str_id.value; let str = if str == 0 { None } else { Some(ManagedStringId::new(str)) }; - let num_unit = label.num_unit_id; + let num_unit = label.num_unit_id.value; let num_unit = if num_unit == 0 { None } else { @@ -546,10 +546,9 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( ) -> ProfileResult { (|| { let profile = profile_ptr_to_inner(profile)?; - let uses_string_ids = sample - .labels - .first() - .map_or(false, |label| label.key.is_empty() && label.key_id > 0); + let uses_string_ids = sample.labels.first().map_or(false, |label| { + label.key.is_empty() && label.key_id.value > 0 + }); if uses_string_ids { profile.add_string_id_sample(sample.try_into()?, timestamp) @@ -929,11 +928,11 @@ mod tests { mapping, function: Function { name: "{main}".into(), - name_id: 0, + name_id: ManagedStringId { value: 0 }, system_name: "{main}".into(), - system_name_id: 0, + system_name_id: ManagedStringId { value: 0 }, filename: "index.php".into(), - filename_id: 0, + filename_id: ManagedStringId { value: 0 }, start_line: 0, }, ..Default::default() @@ -994,11 +993,11 @@ mod tests { mapping, function: Function { name: "{main}".into(), - name_id: 0, + name_id: ManagedStringId { value: 0 }, system_name: "{main}".into(), - system_name_id: 0, + system_name_id: ManagedStringId { value: 0 }, filename: "index.php".into(), - filename_id: 0, + filename_id: ManagedStringId { value: 0 }, start_line: 0, }, ..Default::default() @@ -1007,11 +1006,11 @@ mod tests { mapping, function: Function { name: "test".into(), - name_id: 0, + name_id: ManagedStringId { value: 0 }, system_name: "test".into(), - system_name_id: 0, + system_name_id: ManagedStringId { value: 0 }, filename: "index.php".into(), - filename_id: 0, + filename_id: ManagedStringId { value: 0 }, start_line: 3, }, line: 4, @@ -1020,12 +1019,12 @@ mod tests { let values: Vec = vec![1]; let labels = vec![Label { key: Slice::from("pid"), - key_id: 0, + key_id: ManagedStringId { value: 0 }, str: Slice::from(""), - str_id: 0, + str_id: ManagedStringId { value: 0 }, num: 101, num_unit: Slice::from(""), - num_unit_id: 0, + num_unit_id: ManagedStringId { value: 0 }, }]; let main_sample = Sample { diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index ca78db607..57c216fa7 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -1,9 +1,13 @@ use anyhow::Context; +use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; +use ddcommon_ffi::{CharSlice, Error, MaybeError, StringWrapper}; use libc::c_void; use std::{ffi::CStr, rc::Rc, sync::RwLock}; -use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; -use ddcommon_ffi::{CharSlice, Error, MaybeError, StringWrapper}; +#[repr(C)] +pub struct ManagedStringId { + pub value: u32, +} #[repr(C)] pub struct ManagedStringStorage { @@ -41,7 +45,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_drop(storage: ManagedStr #[repr(C)] #[allow(dead_code)] pub enum ManagedStringStorageInternResult { - Ok(u32), + Ok(ManagedStringId), Err(Error), } @@ -66,7 +70,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( .expect("acquisition of write lock on string storage should succeed") .intern(string); - anyhow::Ok(string_id) + anyhow::Ok(ManagedStringId { value: string_id }) })() .context("ddog_prof_ManagedStringStorage_intern failed") .into() @@ -77,14 +81,14 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( storage: ManagedStringStorage, - id: u32, + id: ManagedStringId, ) -> MaybeError { let result = (|| { let storage = get_inner_string_storage(storage, true)?; storage .read() .expect("acquisition of read lock on string storage should succeed") - .unintern(id); + .unintern(id.value); anyhow::Ok(()) })() .context("ddog_prof_ManagedStringStorage_unintern failed"); @@ -110,14 +114,14 @@ pub enum StringWrapperResult { /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( storage: ManagedStringStorage, - id: u32, + id: ManagedStringId, ) -> StringWrapperResult { (|| { let storage = get_inner_string_storage(storage, true)?; let string: String = (*storage .read() .expect("acquisition of read lock on string storage should succeed") - .get_string(id)) + .get_string(id.value)) .to_owned(); anyhow::Ok(string) @@ -173,8 +177,8 @@ pub unsafe fn get_inner_string_storage( )) } -impl From> for ManagedStringStorageInternResult { - fn from(value: anyhow::Result) -> Self { +impl From> for ManagedStringStorageInternResult { + fn from(value: anyhow::Result) -> Self { match value { Ok(v) => Self::Ok(v), Err(err) => Self::Err(err.into()), From 4be030b3414f01f5b9b3ab44349f2cd72ff9a8a4 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 11 Nov 2024 15:11:53 +0000 Subject: [PATCH 29/52] Avoid using `expect` in string storage ffi APIs --- profiling-ffi/src/string_storage.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 57c216fa7..cb7149bbd 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -63,11 +63,13 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( let string: &str = CStr::from_ptr(string.as_ptr()) .to_str() - .expect("valid utf8 string"); + .map_err(|_| anyhow::anyhow!("invalid utf8 string"))?; let string_id = storage .write() - .expect("acquisition of write lock on string storage should succeed") + .map_err(|_| { + anyhow::anyhow!("acquisition of write lock on string storage should succeed") + })? .intern(string); anyhow::Ok(ManagedStringId { value: string_id }) @@ -87,7 +89,9 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( let storage = get_inner_string_storage(storage, true)?; storage .read() - .expect("acquisition of read lock on string storage should succeed") + .map_err(|_| { + anyhow::anyhow!("acquisition of read lock on string storage should succeed") + })? .unintern(id.value); anyhow::Ok(()) })() @@ -120,7 +124,9 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( let storage = get_inner_string_storage(storage, true)?; let string: String = (*storage .read() - .expect("acquisition of read lock on string storage should succeed") + .map_err(|_| { + anyhow::anyhow!("acquisition of read lock on string storage should succeed") + })? .get_string(id.value)) .to_owned(); @@ -141,7 +147,9 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( storage .write() - .expect("acquisition of write lock on string storage should succeed") + .map_err(|_| { + anyhow::anyhow!("acquisition of write lock on string storage should succeed") + })? .advance_gen(); anyhow::Ok(()) From 95555be6a94ac0a59bb25061889b65056d5c8e5d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 26 Nov 2024 13:23:50 +0000 Subject: [PATCH 30/52] Panic on `get_seq_num` == 0 to make sure callers never do it --- profiling/src/collections/string_storage.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index 2cd4fde57..edba47980 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -171,6 +171,10 @@ impl ManagedStringStorage { } pub fn get_seq_num(&self, id: u32, profile_strings: &mut StringTable) -> StringId { + if id == 0 { + panic!("For performance reasons, get_set_num should not be called with id == 0. Please hardcode a fast path check in the caller to avoid this call") + } + let data = self.get_data(id); let profile_strings_pointer = ptr::addr_of!(*profile_strings); From 6a658b4ea76407a3ff74a765ccd67b82634e48a6 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 26 Nov 2024 15:10:14 +0000 Subject: [PATCH 31/52] Disallow attempts to unintern with id == 0 To avoid callers needing to pay the concurrency cost for such a trivial case, I've added a panic to allow us to find such usages, and added a check in the FFI directly. --- profiling-ffi/src/string_storage.rs | 4 ++++ profiling/src/collections/string_storage.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index cb7149bbd..d06053d7b 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -85,6 +85,10 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( storage: ManagedStringStorage, id: ManagedStringId, ) -> MaybeError { + if id.value == 0 { + return MaybeError::None; + } + let result = (|| { let storage = get_inner_string_storage(storage, true)?; storage diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index edba47980..384bf6b93 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -165,6 +165,10 @@ impl ManagedStringStorage { } pub fn unintern(&self, id: u32) { + if id == 0 { + panic!("For performance reasons, unintern should not be called with id == 0. Please hardcode a fast path check in the caller to avoid this call") + } + let data = self.get_data(id); let usage_count = &data.usage_count; usage_count.set(usage_count.get() - 1); From 00a76f2e042e101292725e64f598f884bf1c7621 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 26 Nov 2024 15:13:01 +0000 Subject: [PATCH 32/52] Fix incorrect comment --- profiling-ffi/src/string_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index d06053d7b..3dfaac402 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -11,7 +11,7 @@ pub struct ManagedStringId { #[repr(C)] pub struct ManagedStringStorage { - // This may be null, but if not it will point to a valid Profile. + // This may be null, but if not it will point to a valid InternalManagedStringStorage. inner: *const c_void, /* Actually *RwLock but cbindgen doesn't * opaque RwLock */ } From 0b3f57dcf4b9e69c39a5b0e88fc67c1195d5217a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 26 Nov 2024 15:55:59 +0000 Subject: [PATCH 33/52] Add missing license headers to new files --- profiling-ffi/src/string_storage.rs | 3 +++ profiling/src/collections/string_storage.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 3dfaac402..e9bd0e437 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -1,3 +1,6 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + use anyhow::Context; use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; use ddcommon_ffi::{CharSlice, Error, MaybeError, StringWrapper}; diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index 384bf6b93..79833d94d 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -1,3 +1,6 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + use std::cell::Cell; use std::collections::HashMap; use std::hash::BuildHasherDefault; From 172a0a20655f5a1e871a09614575f792b8c1ac65 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 3 Jan 2025 11:03:15 +0000 Subject: [PATCH 34/52] Revert "Make sure header gets refreshed when things change" This reverts commit 624ebd0d427ca7b0d1789959baae70f6ac0d823e. During PR review, I didn't quite remember why and when this was needed, so let's remove until we figure out exactly why/if this is needed. --- profiling-ffi/build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/profiling-ffi/build.rs b/profiling-ffi/build.rs index aac058679..ddf5cbfcb 100644 --- a/profiling-ffi/build.rs +++ b/profiling-ffi/build.rs @@ -5,7 +5,6 @@ extern crate build_common; use build_common::generate_and_configure_header; fn main() { - println!("cargo:rerun-if-changed=src"); let header_name = "profiling.h"; generate_and_configure_header(header_name); } From d41499978d1e3015f794b1c175dffc179272db96 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 3 Jan 2025 11:12:43 +0000 Subject: [PATCH 35/52] Remove redundant must_use as pointed out by clippy A `MaybeError` is apparently already must_use so this is redundant. TIL. --- profiling-ffi/src/string_storage.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index e9bd0e437..270a2e495 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -81,7 +81,6 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( .into() } -#[must_use] #[no_mangle] /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( @@ -143,7 +142,6 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( .into() } -#[must_use] #[no_mangle] /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_advance_gen( From fdfaa1f675bf240abeb8a4f431a3f31a3326624a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 3 Jan 2025 11:45:01 +0000 Subject: [PATCH 36/52] Apply another clippy suggestion --- profiling-ffi/src/profiles.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index 538aa149c..bc3897c5d 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -546,7 +546,7 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( ) -> ProfileResult { (|| { let profile = profile_ptr_to_inner(profile)?; - let uses_string_ids = sample.labels.first().map_or(false, |label| { + let uses_string_ids = sample.labels.first().is_some_and(|label| { label.key.is_empty() && label.key_id.value > 0 }); From c634964ef47678101f63d17676ecac41666af933 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 3 Jan 2025 11:49:32 +0000 Subject: [PATCH 37/52] More linting... Why can't all of this be applied in one step... :/ --- profiling-ffi/src/profiles.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/profiling-ffi/src/profiles.rs b/profiling-ffi/src/profiles.rs index bc3897c5d..c79a6b2de 100644 --- a/profiling-ffi/src/profiles.rs +++ b/profiling-ffi/src/profiles.rs @@ -546,9 +546,10 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( ) -> ProfileResult { (|| { let profile = profile_ptr_to_inner(profile)?; - let uses_string_ids = sample.labels.first().is_some_and(|label| { - label.key.is_empty() && label.key_id.value > 0 - }); + let uses_string_ids = sample + .labels + .first() + .is_some_and(|label| label.key.is_empty() && label.key_id.value > 0); if uses_string_ids { profile.add_string_id_sample(sample.try_into()?, timestamp) From d4194a2e78e47bae68d565d54fdb641a4b46d2cd Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 6 Jan 2025 10:02:26 +0000 Subject: [PATCH 38/52] Improve API description when reading string from managed string storage --- profiling-ffi/src/string_storage.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 270a2e495..82877b5b2 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -118,9 +118,10 @@ pub enum StringWrapperResult { #[must_use] #[no_mangle] -/// TODO: @ivoanjo It's not clear to me if the string pointer we return here is the exact one from -/// the string storage (still managed via string storage), or if we're allocating a copy (would -/// need a manual drop?). +/// Returns a string given its id. +/// This API is mostly for testing, overall you should avoid reading back strings from libdatadog once they've been +/// interned and you should always operate on the id. +/// Remember to `ddog_StringWrapper_drop` the string once you're done with it. /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( storage: ManagedStringStorage, From 28782b5d2d6ec3508e234792ec94fd1ab8fa4b10 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 6 Jan 2025 10:09:12 +0000 Subject: [PATCH 39/52] Remove `StringStorage` / `SimpleStringStorage` experimental trait/implementation These are not needed currently, so let's simplify rather than having a lot of unused appendages in this PR. --- profiling/src/collections/string_storage.rs | 75 --------------------- 1 file changed, 75 deletions(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index 79833d94d..a335132a8 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -10,81 +10,6 @@ use std::rc::Rc; use super::identifiable::StringId; use super::string_table::StringTable; -pub trait StringStorage { - /// Interns the `str` as a string, returning the id in the string table. - /// The empty string is guaranteed to have an id of [StringId::ZERO]. - fn intern(&mut self, item: Rc) -> StringId; - fn get_string(&self, id: StringId) -> Rc; - fn len(&self) -> usize; - fn is_empty(&self) -> bool { - self.len() == 0 - } - fn into_iter(self: Box) -> Box>>; - fn clone_empty(&self) -> Box; -} - -mod experimental { - use crate::collections::identifiable::{FxIndexSet, Id, StringId}; - use crate::collections::string_storage::StringStorage; - use std::rc::Rc; - - pub struct SimpleStringStorage { - set: FxIndexSet>, - } - - impl SimpleStringStorage { - pub fn new() -> Self { - SimpleStringStorage { - set: Default::default(), - } - } - } - - impl Default for SimpleStringStorage { - fn default() -> Self { - Self::new() - } - } - - impl StringStorage for SimpleStringStorage { - fn intern(&mut self, item: Rc) -> StringId { - // For performance, delay converting the [&str] to a [String] until - // after it has been determined to not exist in the set. This avoids - // temporary allocations. - let index = match self.set.get_index_of(&item) { - Some(index) => index, - None => { - let (index, _inserted) = self.set.insert_full(item.clone()); - // This wouldn't make any sense; the item couldn't be found so - // we try to insert it, but suddenly it exists now? - debug_assert!(_inserted); - index - } - }; - StringId::from_offset(index) - } - - fn get_string(&self, id: StringId) -> Rc { - self.set - .get_index(id.to_offset()) - .expect("StringId to have a valid interned index") - .clone() - } - - fn len(&self) -> usize { - self.set.len() - } - - fn into_iter(self: Box) -> Box>> { - Box::new(self.set.into_iter()) - } - - fn clone_empty(&self) -> Box { - Box::new(SimpleStringStorage::new()) - } - } -} - #[derive(PartialEq, Debug)] struct ManagedStringData { str: Rc, From fe29cf9bc3862c3c213e51e98c767ce42cd724be Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 6 Jan 2025 10:10:30 +0000 Subject: [PATCH 40/52] Cargo fmt --- profiling-ffi/src/string_storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 82877b5b2..57ddc7671 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -119,8 +119,8 @@ pub enum StringWrapperResult { #[must_use] #[no_mangle] /// Returns a string given its id. -/// This API is mostly for testing, overall you should avoid reading back strings from libdatadog once they've been -/// interned and you should always operate on the id. +/// This API is mostly for testing, overall you should avoid reading back strings from libdatadog +/// once they've been interned and you should always operate on the id. /// Remember to `ddog_StringWrapper_drop` the string once you're done with it. /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( From 03e7649222346f850fac13c66dc3a150dfacad4a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 09:41:36 +0000 Subject: [PATCH 41/52] Use `NonZeroU32` in managed string table functions that shouldn't be called with id 0 This is much nicer than having a weird panic in there. --- profiling-ffi/src/string_storage.rs | 11 +++++++---- profiling/src/collections/string_storage.rs | 21 +++++++++------------ profiling/src/internal/profile/mod.rs | 10 ++++++---- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 57ddc7671..2c59424be 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -5,6 +5,7 @@ use anyhow::Context; use datadog_profiling::collections::string_storage::ManagedStringStorage as InternalManagedStringStorage; use ddcommon_ffi::{CharSlice, Error, MaybeError, StringWrapper}; use libc::c_void; +use std::num::NonZeroU32; use std::{ffi::CStr, rc::Rc, sync::RwLock}; #[repr(C)] @@ -87,9 +88,11 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( storage: ManagedStringStorage, id: ManagedStringId, ) -> MaybeError { - if id.value == 0 { - return MaybeError::None; - } + let non_empty_string_id = if let Some(valid_id) = NonZeroU32::new(id.value) { + valid_id + } else { + return MaybeError::None; // Empty string, nothing to do + }; let result = (|| { let storage = get_inner_string_storage(storage, true)?; @@ -98,7 +101,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( .map_err(|_| { anyhow::anyhow!("acquisition of read lock on string storage should succeed") })? - .unintern(id.value); + .unintern(non_empty_string_id); anyhow::Ok(()) })() .context("ddog_prof_ManagedStringStorage_unintern failed"); diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index a335132a8..028c3cd1d 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -4,6 +4,7 @@ use std::cell::Cell; use std::collections::HashMap; use std::hash::BuildHasherDefault; +use std::num::NonZeroU32; use std::ptr; use std::rc::Rc; @@ -92,22 +93,18 @@ impl ManagedStringStorage { } } - pub fn unintern(&self, id: u32) { - if id == 0 { - panic!("For performance reasons, unintern should not be called with id == 0. Please hardcode a fast path check in the caller to avoid this call") - } - - let data = self.get_data(id); + // Here id is a NonZeroU32 because an id of 0 is the empty string and that can never be + // uninterned (and it should be skipped instead in the caller) + pub fn unintern(&self, id: NonZeroU32) { + let data = self.get_data(id.into()); let usage_count = &data.usage_count; usage_count.set(usage_count.get() - 1); } - pub fn get_seq_num(&self, id: u32, profile_strings: &mut StringTable) -> StringId { - if id == 0 { - panic!("For performance reasons, get_set_num should not be called with id == 0. Please hardcode a fast path check in the caller to avoid this call") - } - - let data = self.get_data(id); + // Here id is a NonZeroU32 because an id of 0 which StringTable always maps to 0 as well so this + // entire call can be skipped + pub fn get_seq_num(&self, id: NonZeroU32, profile_strings: &mut StringTable) -> StringId { + let data = self.get_data(id.into()); let profile_strings_pointer = ptr::addr_of!(*profile_strings); diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 816976740..eb7b4fe08 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -187,16 +187,18 @@ impl Profile { } pub fn resolve(&mut self, id: ManagedStringId) -> StringId { - if id.value == 0 { - return StringId::ZERO; - } + let non_empty_string_id = if let Some(valid_id) = NonZeroU32::new(id.value) { + valid_id + } else { + return StringId::ZERO; // Both string tables use zero for the empty string + }; self.string_storage .as_ref() .expect("resolution from id requires managed string storage") .read() .expect("acquisition of read lock on string storage should succeed") - .get_seq_num(id.value, &mut self.strings) + .get_seq_num(non_empty_string_id, &mut self.strings) } /// Creates a profile with `start_time`. From 79901ebc14563ebd6ac49fdd4bdda04761cedb1a Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 10:17:50 +0000 Subject: [PATCH 42/52] Minor tweak to comment --- profiling-ffi/src/string_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index 2c59424be..e9017f425 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -123,7 +123,7 @@ pub enum StringWrapperResult { #[no_mangle] /// Returns a string given its id. /// This API is mostly for testing, overall you should avoid reading back strings from libdatadog -/// once they've been interned and you should always operate on the id. +/// once they've been interned and should instead always operate on the id. /// Remember to `ddog_StringWrapper_drop` the string once you're done with it. /// TODO: @ivoanjo Should this take a `*mut ManagedStringStorage` like Profile APIs do? pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( From 1f2b953cd3fd90df72f6f98a460a682420947b22 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 10:56:15 +0000 Subject: [PATCH 43/52] Simplify cached field into tuple, as both are related --- profiling/src/collections/string_storage.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index 028c3cd1d..eb20bf460 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -15,8 +15,7 @@ use super::string_table::StringTable; struct ManagedStringData { str: Rc, last_usage_gen: Cell>, // TODO: This is not actually being used; maybe remove? - cached_seq_num_for: Cell>, - cached_seq_num: Cell>, + cached_seq_num: Cell>, usage_count: Cell, } @@ -79,7 +78,6 @@ impl ManagedStringStorage { let data = ManagedStringData { str: str.clone(), last_usage_gen: Cell::new(None), - cached_seq_num_for: Cell::new(None), cached_seq_num: Cell::new(None), usage_count: Cell::new(1), }; @@ -108,12 +106,12 @@ impl ManagedStringStorage { let profile_strings_pointer = ptr::addr_of!(*profile_strings); - match (data.cached_seq_num.get(), data.cached_seq_num_for.get()) { - (Some(seq_num), Some(pointer)) if pointer == profile_strings_pointer => seq_num, - (_, _) => { + match data.cached_seq_num.get() { + Some((pointer, seq_num)) if pointer == profile_strings_pointer => seq_num, + _ => { let seq_num = profile_strings.intern(data.str.as_ref()); - data.cached_seq_num.set(Some(seq_num)); - data.cached_seq_num_for.set(Some(profile_strings_pointer)); + data.cached_seq_num + .set(Some((profile_strings_pointer, seq_num))); seq_num } } From 2f1dc924dead0aad93f8eabbf9a1be5a2e47d2fc Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 11:05:31 +0000 Subject: [PATCH 44/52] Remove `last_usage_gen` from string storage since we're not using it for now --- profiling/src/collections/string_storage.rs | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index eb20bf460..e79cb2b17 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -14,7 +14,6 @@ use super::string_table::StringTable; #[derive(PartialEq, Debug)] struct ManagedStringData { str: Rc, - last_usage_gen: Cell>, // TODO: This is not actually being used; maybe remove? cached_seq_num: Cell>, usage_count: Cell, } @@ -41,21 +40,11 @@ impl ManagedStringStorage { pub fn advance_gen(&mut self) { self.id_to_data.retain(|_, data| { - /*let used_in_this_gen = data - .last_usage_gen - .get() - .map_or(false, |last_usage_gen| last_usage_gen == self.current_gen); - if used_in_this_gen || *id == 0 { - // Empty string (id = 0) or anything that was used in the gen - // we are now closing, is kept alive - return true; - }*/ - if data.usage_count.get() > 0 { - return true; + let retain = data.usage_count.get() > 0; + if !retain { + self.str_to_id.remove_entry(&data.str); } - - self.str_to_id.remove_entry(&data.str); - false + retain }); self.current_gen += 1; } @@ -77,7 +66,6 @@ impl ManagedStringStorage { let str: Rc = item.into(); let data = ManagedStringData { str: str.clone(), - last_usage_gen: Cell::new(None), cached_seq_num: Cell::new(None), usage_count: Cell::new(1), }; @@ -131,7 +119,6 @@ impl ManagedStringStorage { panic!("StringId to have a valid interned id"); } }; - data.last_usage_gen.replace(Some(self.current_gen)); data } } From 3e94997acd4d1e3f56a6725a210fd2bcbb23cbf5 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 11:30:08 +0000 Subject: [PATCH 45/52] Check for string storage initialization to make check inside resolve safer With the current structure of the code, the `expect` inside `resolve` should never fail; hopefully we don't introduce a bug in the future that changes this. (I know that ideally in Rust we would represent such constraints in the type system, but I don't think I could do so without a lot of other changes, so I decided to go for the more self-contained solution for now.) --- profiling/src/internal/profile/mod.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index eb7b4fe08..468f7afce 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -116,6 +116,11 @@ impl Profile { sample: api::StringIdSample, timestamp: Option, ) -> anyhow::Result<()> { + anyhow::ensure!( + self.string_storage.is_some(), + "Current sample makes use of ManagedStringIds but profile was not created using a managed string table" + ); + self.validate_string_id_sample_labels(&sample)?; let labels: Vec<_> = sample .labels @@ -195,6 +200,8 @@ impl Profile { self.string_storage .as_ref() + // Safety: We always get here through a direct or indirect call to add_string_id_sample, + // which already ensured that the string storage exists. .expect("resolution from id requires managed string storage") .read() .expect("acquisition of read lock on string storage should succeed") From 8a1ebccc2738bbaa44741947370cf377b2e414aa Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 11:32:56 +0000 Subject: [PATCH 46/52] Document that failure to acquire lock is unlikely --- profiling/src/internal/profile/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 468f7afce..90698ef46 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -204,6 +204,9 @@ impl Profile { // which already ensured that the string storage exists. .expect("resolution from id requires managed string storage") .read() + // Safety: This failure is unlikely as it only happens if the lock is poisoned (and for + // the lock to become poisoned, another unlikely failure already happened + // before) .expect("acquisition of read lock on string storage should succeed") .get_seq_num(non_empty_string_id, &mut self.strings) } From 800d5ee3421c81e61653c927a7ab8b6fda7e48c5 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 11:44:31 +0000 Subject: [PATCH 47/52] Allow intern to fail cleanly rather than blowing up In particular, in the unlikely event that we would overflow the id, we signal an error back to the caller rather than impacting the application. The caller is expected to stop using this string table and create a new one instead. In the future we may make it easy to do so, by e.g. having an API to create a new table from the existing strings or something like that. --- profiling-ffi/src/string_storage.rs | 2 +- profiling/src/collections/string_storage.rs | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index e9017f425..b39046908 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -74,7 +74,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( .map_err(|_| { anyhow::anyhow!("acquisition of write lock on string storage should succeed") })? - .intern(string); + .intern(string)?; anyhow::Ok(ManagedStringId { value: string_id }) })() diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index e79cb2b17..be805a2f2 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -34,7 +34,8 @@ impl ManagedStringStorage { current_gen: 0, }; // Ensure empty string gets id 0 and always has usage > 0 so it's always retained - storage.intern(""); + // Safety: On an empty managed string table intern should never fail. + storage.intern("").expect("Initialization to succeed"); storage } @@ -49,17 +50,19 @@ impl ManagedStringStorage { self.current_gen += 1; } - pub fn intern(&mut self, item: &str) -> u32 { + pub fn intern(&mut self, item: &str) -> anyhow::Result { let entry = self.str_to_id.get_key_value(item); match entry { Some((_, id)) => { let usage_count = &self .id_to_data .get(id) - .expect("id_to_data and str_to_id should be in sync") + .ok_or_else(|| { + anyhow::anyhow!("BUG: id_to_data and str_to_id should be in sync") + })? .usage_count; usage_count.set(usage_count.get() + 1); - *id + Ok(*id) } None => { let id = self.next_id; @@ -69,12 +72,15 @@ impl ManagedStringStorage { cached_seq_num: Cell::new(None), usage_count: Cell::new(1), }; - self.next_id = self.next_id.checked_add(1).expect("Ran out of string ids!"); + self.next_id = self + .next_id + .checked_add(1) + .ok_or_else(|| anyhow::anyhow!("Ran out of string ids!"))?; let old_value = self.str_to_id.insert(str.clone(), id); debug_assert_eq!(old_value, None); let old_value = self.id_to_data.insert(id, data); debug_assert_eq!(old_value, None); - id + Ok(id) } } } From 921151170863f903c0a6f49a041ca38aeadafac0 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 12:19:37 +0000 Subject: [PATCH 48/52] Refactor: Allow Profile::resolve to fail This will enable us to propagate failures when a ManagedStringId is not known, which will improve debugability and code quality by allowing us to signal the error. --- profiling/src/internal/profile/mod.rs | 96 +++++++++++++++------------ 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 90698ef46..2c6e63cbb 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -118,33 +118,34 @@ impl Profile { ) -> anyhow::Result<()> { anyhow::ensure!( self.string_storage.is_some(), - "Current sample makes use of ManagedStringIds but profile was not created using a managed string table" + "Current sample makes use of ManagedStringIds but profile was not created using a string table" ); self.validate_string_id_sample_labels(&sample)?; - let labels: Vec<_> = sample - .labels - .iter() - .map(|label| { - let key = self.resolve(label.key); - let internal_label = if let Some(s) = label.str { - let str = self.resolve(s); - Label::str(key, str) + + let mut labels = Vec::with_capacity(sample.labels.len()); + for label in &sample.labels { + let key = self.resolve(label.key)?; + let internal_label = if let Some(s) = label.str { + let str = self.resolve(s)?; + Label::str(key, str) + } else { + let num = label.num; + let num_unit = if let Some(s) = label.num_unit { + Some(self.resolve(s)?) } else { - let num = label.num; - let num_unit = label.num_unit.map(|s| self.resolve(s)); - Label::num(key, num, num_unit) + None }; + Label::num(key, num, num_unit) + }; - self.labels.dedup(internal_label) - }) - .collect(); + labels.push(self.labels.dedup(internal_label)); + } - let locations = sample - .locations - .iter() - .map(|l| self.add_string_id_location(l)) - .collect(); + let mut locations = Vec::with_capacity(sample.locations.len()); + for location in &sample.locations { + locations.push(self.add_string_id_location(location)?); + } self.add_sample_internal(sample.values, labels, locations, timestamp) } @@ -191,24 +192,26 @@ impl Profile { Ok(()) } - pub fn resolve(&mut self, id: ManagedStringId) -> StringId { + pub fn resolve(&mut self, id: ManagedStringId) -> anyhow::Result { let non_empty_string_id = if let Some(valid_id) = NonZeroU32::new(id.value) { valid_id } else { - return StringId::ZERO; // Both string tables use zero for the empty string + return Ok(StringId::ZERO); // Both string tables use zero for the empty string }; - self.string_storage + let string_id = self.string_storage .as_ref() // Safety: We always get here through a direct or indirect call to add_string_id_sample, // which already ensured that the string storage exists. - .expect("resolution from id requires managed string storage") + .ok_or_else(|| anyhow::anyhow!("Current sample makes use of ManagedStringIds but profile was not created using a string table"))? .read() // Safety: This failure is unlikely as it only happens if the lock is poisoned (and for // the lock to become poisoned, another unlikely failure already happened // before) .expect("acquisition of read lock on string storage should succeed") - .get_seq_num(non_empty_string_id, &mut self.strings) + .get_seq_num(non_empty_string_id, &mut self.strings); + + Ok(string_id) } /// Creates a profile with `start_time`. @@ -397,18 +400,21 @@ impl Profile { }) } - fn add_string_id_function(&mut self, function: &api::StringIdFunction) -> FunctionId { - let name = self.resolve(function.name); - let system_name = self.resolve(function.system_name); - let filename = self.resolve(function.filename); + fn add_string_id_function( + &mut self, + function: &api::StringIdFunction, + ) -> anyhow::Result { + let name = self.resolve(function.name)?; + let system_name = self.resolve(function.system_name)?; + let filename = self.resolve(function.filename)?; let start_line = function.start_line; - self.functions.dedup(Function { + Ok(self.functions.dedup(Function { name, system_name, filename, start_line, - }) + })) } fn add_location(&mut self, location: &api::Location) -> LocationId { @@ -422,15 +428,18 @@ impl Profile { }) } - fn add_string_id_location(&mut self, location: &api::StringIdLocation) -> LocationId { - let mapping_id = self.add_string_id_mapping(&location.mapping); - let function_id = self.add_string_id_function(&location.function); - self.locations.dedup(Location { + fn add_string_id_location( + &mut self, + location: &api::StringIdLocation, + ) -> anyhow::Result { + let mapping_id = self.add_string_id_mapping(&location.mapping)?; + let function_id = self.add_string_id_function(&location.function)?; + Ok(self.locations.dedup(Location { mapping_id, function_id, address: location.address, line: location.line, - }) + })) } fn add_mapping(&mut self, mapping: &api::Mapping) -> MappingId { @@ -446,17 +455,20 @@ impl Profile { }) } - fn add_string_id_mapping(&mut self, mapping: &api::StringIdMapping) -> MappingId { - let filename = self.resolve(mapping.filename); - let build_id = self.resolve(mapping.build_id); + fn add_string_id_mapping( + &mut self, + mapping: &api::StringIdMapping, + ) -> anyhow::Result { + let filename = self.resolve(mapping.filename)?; + let build_id = self.resolve(mapping.build_id)?; - self.mappings.dedup(Mapping { + Ok(self.mappings.dedup(Mapping { memory_start: mapping.memory_start, memory_limit: mapping.memory_limit, file_offset: mapping.file_offset, filename, build_id, - }) + })) } fn add_stacktrace(&mut self, locations: Vec) -> StackTraceId { @@ -663,7 +675,7 @@ impl Profile { anyhow::bail!("Duplicate label on sample: {:?} {:?}", duplicate, label); } - let key_id: StringId = self.resolve(label.key); + let key_id: StringId = self.resolve(label.key)?; if key_id == self.endpoints.local_root_span_id_label { anyhow::ensure!( From 8387b85a51699243afc51cfd3aff0cd30cc40a34 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 7 Jan 2025 12:32:38 +0000 Subject: [PATCH 49/52] Properly handle invalid ManagedStringIds by returning errors --- profiling-ffi/src/string_storage.rs | 13 +++----- profiling/src/collections/string_storage.rs | 37 +++++++++++---------- profiling/src/internal/profile/mod.rs | 2 +- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index b39046908..ef07b1b80 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -96,13 +96,10 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_unintern( let result = (|| { let storage = get_inner_string_storage(storage, true)?; - storage - .read() - .map_err(|_| { - anyhow::anyhow!("acquisition of read lock on string storage should succeed") - })? - .unintern(non_empty_string_id); - anyhow::Ok(()) + let read_locked_storage = storage.read().map_err(|_| { + anyhow::anyhow!("acquisition of read lock on string storage should succeed") + })?; + read_locked_storage.unintern(non_empty_string_id) })() .context("ddog_prof_ManagedStringStorage_unintern failed"); @@ -137,7 +134,7 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_get_string( .map_err(|_| { anyhow::anyhow!("acquisition of read lock on string storage should succeed") })? - .get_string(id.value)) + .get_string(id.value)?) .to_owned(); anyhow::Ok(string) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index be805a2f2..d934744c5 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -87,45 +87,46 @@ impl ManagedStringStorage { // Here id is a NonZeroU32 because an id of 0 is the empty string and that can never be // uninterned (and it should be skipped instead in the caller) - pub fn unintern(&self, id: NonZeroU32) { - let data = self.get_data(id.into()); + pub fn unintern(&self, id: NonZeroU32) -> anyhow::Result<()> { + let data = self.get_data(id.into())?; let usage_count = &data.usage_count; usage_count.set(usage_count.get() - 1); + Ok(()) } // Here id is a NonZeroU32 because an id of 0 which StringTable always maps to 0 as well so this // entire call can be skipped - pub fn get_seq_num(&self, id: NonZeroU32, profile_strings: &mut StringTable) -> StringId { - let data = self.get_data(id.into()); + pub fn get_seq_num( + &self, + id: NonZeroU32, + profile_strings: &mut StringTable, + ) -> anyhow::Result { + let data = self.get_data(id.into())?; let profile_strings_pointer = ptr::addr_of!(*profile_strings); match data.cached_seq_num.get() { - Some((pointer, seq_num)) if pointer == profile_strings_pointer => seq_num, + Some((pointer, seq_num)) if pointer == profile_strings_pointer => Ok(seq_num), _ => { let seq_num = profile_strings.intern(data.str.as_ref()); data.cached_seq_num .set(Some((profile_strings_pointer, seq_num))); - seq_num + Ok(seq_num) } } } - pub fn get_string(&self, id: u32) -> Rc { - let data = self.get_data(id); + pub fn get_string(&self, id: u32) -> anyhow::Result> { + let data = self.get_data(id)?; - data.str.clone() + Ok(data.str.clone()) } - fn get_data(&self, id: u32) -> &ManagedStringData { - let data = match self.id_to_data.get(&id) { - Some(v) => v, - None => { - println!("{:?} {:?}", id, self.id_to_data); - panic!("StringId to have a valid interned id"); - } - }; - data + fn get_data(&self, id: u32) -> anyhow::Result<&ManagedStringData> { + match self.id_to_data.get(&id) { + Some(v) => Ok(v), + None => Err(anyhow::anyhow!("ManagedStringId {} is not valid", id)), + } } } diff --git a/profiling/src/internal/profile/mod.rs b/profiling/src/internal/profile/mod.rs index 2c6e63cbb..22953e7d9 100644 --- a/profiling/src/internal/profile/mod.rs +++ b/profiling/src/internal/profile/mod.rs @@ -209,7 +209,7 @@ impl Profile { // the lock to become poisoned, another unlikely failure already happened // before) .expect("acquisition of read lock on string storage should succeed") - .get_seq_num(non_empty_string_id, &mut self.strings); + .get_seq_num(non_empty_string_id, &mut self.strings)?; Ok(string_id) } From c2be33605d2e371b26270c3644f1b1b7afb0c197 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 8 Jan 2025 17:54:46 +0000 Subject: [PATCH 50/52] Avoid updating reference counts on the empty string This string is supposed to live for as long as the managed string storage does. Treating it specially in intern matches what we do in other functions and ensures that we can never overflow the reference count (or something weird like that). --- profiling/src/collections/string_storage.rs | 45 ++++++++++++--------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index d934744c5..a5161c12d 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -35,7 +35,7 @@ impl ManagedStringStorage { }; // Ensure empty string gets id 0 and always has usage > 0 so it's always retained // Safety: On an empty managed string table intern should never fail. - storage.intern("").expect("Initialization to succeed"); + storage.intern_new("").expect("Initialization to succeed"); storage } @@ -51,6 +51,11 @@ impl ManagedStringStorage { } pub fn intern(&mut self, item: &str) -> anyhow::Result { + if item.is_empty() { + // We don't increase ref-counts on the empty string + return Ok(0); + } + let entry = self.str_to_id.get_key_value(item); match entry { Some((_, id)) => { @@ -64,27 +69,29 @@ impl ManagedStringStorage { usage_count.set(usage_count.get() + 1); Ok(*id) } - None => { - let id = self.next_id; - let str: Rc = item.into(); - let data = ManagedStringData { - str: str.clone(), - cached_seq_num: Cell::new(None), - usage_count: Cell::new(1), - }; - self.next_id = self - .next_id - .checked_add(1) - .ok_or_else(|| anyhow::anyhow!("Ran out of string ids!"))?; - let old_value = self.str_to_id.insert(str.clone(), id); - debug_assert_eq!(old_value, None); - let old_value = self.id_to_data.insert(id, data); - debug_assert_eq!(old_value, None); - Ok(id) - } + None => self.intern_new(item), } } + pub fn intern_new(&mut self, item: &str) -> anyhow::Result { + let id = self.next_id; + let str: Rc = item.into(); + let data = ManagedStringData { + str: str.clone(), + cached_seq_num: Cell::new(None), + usage_count: Cell::new(1), + }; + self.next_id = self + .next_id + .checked_add(1) + .ok_or_else(|| anyhow::anyhow!("Ran out of string ids!"))?; + let old_value = self.str_to_id.insert(str.clone(), id); + debug_assert_eq!(old_value, None); + let old_value = self.id_to_data.insert(id, data); + debug_assert_eq!(old_value, None); + Ok(id) + } + // Here id is a NonZeroU32 because an id of 0 is the empty string and that can never be // uninterned (and it should be skipped instead in the caller) pub fn unintern(&self, id: NonZeroU32) -> anyhow::Result<()> { From ce63b4ef311548a800807540a8131b104f29b7c8 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 8 Jan 2025 17:59:45 +0000 Subject: [PATCH 51/52] Add fast-path for empty string interning --- profiling-ffi/src/string_storage.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/profiling-ffi/src/string_storage.rs b/profiling-ffi/src/string_storage.rs index ef07b1b80..a08436850 100644 --- a/profiling-ffi/src/string_storage.rs +++ b/profiling-ffi/src/string_storage.rs @@ -62,6 +62,11 @@ pub unsafe extern "C" fn ddog_prof_ManagedStringStorage_intern( storage: ManagedStringStorage, string: CharSlice, ) -> ManagedStringStorageInternResult { + // Empty strings always get assigned id 0, no need to check. + if string.len() == 0 { + return anyhow::Ok(ManagedStringId { value: 0 }).into(); + } + (|| { let storage = get_inner_string_storage(storage, true)?; From 20b946994fa2047d415429d8d65122d1b64a22f5 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 13 Jan 2025 16:53:08 +0000 Subject: [PATCH 52/52] Make `intern_new` private Adding it as `pub` was an oversight, since the intent is for this to be an inner helper that's only used by `intern` and by `new`. Having this as `pub` is quite dangerous as this method can easily be used to break a lot of the assumptions for the string storage. --- profiling/src/collections/string_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/src/collections/string_storage.rs b/profiling/src/collections/string_storage.rs index a5161c12d..50eabe3b1 100644 --- a/profiling/src/collections/string_storage.rs +++ b/profiling/src/collections/string_storage.rs @@ -73,7 +73,7 @@ impl ManagedStringStorage { } } - pub fn intern_new(&mut self, item: &str) -> anyhow::Result { + fn intern_new(&mut self, item: &str) -> anyhow::Result { let id = self.next_id; let str: Rc = item.into(); let data = ManagedStringData {