From 5eea3ca0ef621c341bfcb6561a66fd3d4040231b Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sat, 4 Jan 2025 12:49:03 +0100 Subject: [PATCH] Add raw-node feature and RawNode type to capture subtrees from the source. --- .github/workflows/ci.yml | 5 +- Cargo.toml | 10 ++- src/lib.rs | 26 +++++-- src/raw_node.rs | 155 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 8 deletions(-) create mode 100644 src/raw_node.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0ad0aa..2843aed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,8 @@ jobs: components: rustfmt, clippy - uses: Swatinem/rust-cache@v2 - run: cargo fmt -- --check - - run: cargo clippy --all-targets -- --deny warnings + - run: cargo clippy --all-targets --no-default-features -- --deny warnings + - run: cargo clippy --all-targets --all-features -- --deny warnings test: runs-on: ubuntu-latest @@ -19,4 +20,4 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - uses: Swatinem/rust-cache@v2 - - run: cargo test + - run: cargo test --all-features diff --git a/Cargo.toml b/Cargo.toml index 30fce51..c5fe699 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" repository = "https://github.com/adamreichold/serde-roxmltree" documentation = "https://docs.rs/serde-roxmltree" readme = "README.md" -version = "0.8.3" +version = "0.8.4" edition = "2021" [dependencies] @@ -18,3 +18,11 @@ serde = "1.0" [dev-dependencies] serde = { version = "1.0", features = ["derive"] } + +[features] +default = [] +# Capture subtrees from the source +raw-node = [] + +[package.metadata.docs.rs] +all-features = true diff --git a/src/lib.rs b/src/lib.rs index 37e8181..3ce3eaf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,6 +37,8 @@ //! # Ok::<(), Box>(()) //! ``` //! +//! Subtrees can be captured from the source by enabling the `raw-node` feature and using the [`RawNode`] type. +//! //! Fields of structures map to child elements and attributes: //! //! ``` @@ -175,12 +177,16 @@ //! ``` //! //! [namespaces]: https://www.w3.org/TR/REC-xml-names/ -#![forbid(unsafe_code)] #![deny( + unsafe_code, missing_docs, missing_copy_implementations, missing_debug_implementations )] + +#[cfg(feature = "raw-node")] +mod raw_node; + use std::char::ParseCharError; use std::error::Error as StdError; use std::fmt; @@ -195,6 +201,9 @@ use serde::de; pub use roxmltree; +#[cfg(feature = "raw-node")] +pub use raw_node::RawNode; + /// Deserialize an instance of type `T` directly from XML text pub fn from_str(text: &str) -> Result where @@ -704,14 +713,21 @@ where fn deserialize_struct( self, - _name: &'static str, + #[allow(unused_variables)] name: &'static str, _fields: &'static [&'static str], visitor: V, ) -> Result where V: de::Visitor<'de>, { - self.deserialize_map(visitor) + #[cfg(feature = "raw-node")] + let res = + raw_node::deserialize_struct(self, name, move |this| this.deserialize_map(visitor)); + + #[cfg(not(feature = "raw-node"))] + let res = self.deserialize_map(visitor); + + res } fn deserialize_enum( @@ -1160,14 +1176,14 @@ mod tests { #[test] fn borrowed_str() { - let document = Document::parse("foobar").unwrap(); + let doc = Document::parse("foobar").unwrap(); #[derive(Deserialize)] struct Root<'a> { child: &'a str, } - let val = from_doc::(&document).unwrap(); + let val = from_doc::(&doc).unwrap(); assert_eq!(val.child, "foobar"); } diff --git a/src/raw_node.rs b/src/raw_node.rs new file mode 100644 index 0000000..6062d58 --- /dev/null +++ b/src/raw_node.rs @@ -0,0 +1,155 @@ +use std::cell::Cell; +use std::fmt; +use std::marker::PhantomData; +use std::mem::transmute; +use std::ops::Deref; +use std::ptr; + +use roxmltree::Node; +use serde::de; + +use crate::{Deserializer, Source}; + +/// Captures subtrees from the source +/// +/// This type must borrow from the source during serialization and therefore requires the use of the [`from_doc`][crate::from_doc] or [`from_node`][crate::from_node] entry points. +/// It will however recover only the source `document` or `node` lifetime and not the full `input` lifetime. +/// +/// ``` +/// use roxmltree::Document; +/// use serde::Deserialize; +/// use serde_roxmltree::{from_doc, RawNode}; +/// +/// #[derive(Deserialize)] +/// struct Record<'a> { +/// #[serde(borrow)] +/// subtree: RawNode<'a>, +/// } +/// +/// let document = Document::parse(r#"foo"#)?; +/// +/// let record = from_doc::(&document)?; +/// assert!(record.subtree.has_tag_name("subtree")); +/// # +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct RawNode<'a>(pub Node<'a, 'a>); + +impl<'a> Deref for RawNode<'a> { + type Target = Node<'a, 'a>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'de, 'a> de::Deserialize<'de> for RawNode<'a> +where + 'de: 'a, +{ + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct Visitor<'a>(PhantomData<&'a ()>); + + impl<'de, 'a> de::Visitor<'de> for Visitor<'a> + where + 'de: 'a, + { + type Value = RawNode<'a>; + + fn expecting(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str("struct RawNode") + } + + fn visit_map(self, _map: M) -> Result + where + M: de::MapAccess<'de>, + { + match CURR_NODE.get() { + #[allow(unsafe_code)] + // SAFETY: This is set only while `deserialize_struct` is active. + Some(curr_node) => Ok(RawNode(unsafe { + transmute::, Node<'a, 'a>>(curr_node) + })), + None => Err(de::Error::custom("no current node")), + } + } + } + + deserializer.deserialize_struct(RAW_NODE_NAME, &[], Visitor(PhantomData)) + } +} + +pub fn deserialize_struct<'de, 'input, 'temp, O, F, R>( + this: Deserializer<'de, 'input, 'temp, O>, + name: &'static str, + f: F, +) -> R +where + F: FnOnce(Deserializer<'de, 'input, 'temp, O>) -> R, +{ + let _reset_curr_node = match &this.source { + Source::Node(node) if ptr::eq(name, RAW_NODE_NAME) => { + #[allow(unsafe_code)] + // SAFETY: The guard will reset this before `deserialize_struct` returns. + CURR_NODE.set(Some(unsafe { + transmute::, Node<'static, 'static>>(*node) + })); + + Some(ResetCurrNode) + } + _ => None, + }; + + f(this) +} + +static RAW_NODE_NAME: &str = "RawNode"; + +thread_local! { + static CURR_NODE: Cell>> = const { Cell::new(None) }; +} + +struct ResetCurrNode; + +impl Drop for ResetCurrNode { + fn drop(&mut self) { + CURR_NODE.set(None); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use roxmltree::Document; + use serde::Deserialize; + + use crate::from_doc; + + #[test] + fn raw_node_captures_subtree() { + #[derive(Debug, Deserialize)] + struct Root<'a> { + #[serde(borrow)] + foo: RawNode<'a>, + } + + let doc = Document::parse(r#"23baz"#).unwrap(); + let val = from_doc::(&doc).unwrap(); + + assert!(val.foo.0.is_element()); + assert!(val.foo.0.has_tag_name("foo")); + + let children = val.foo.0.children().collect::>(); + assert_eq!(children.len(), 2); + assert!(children[0].is_element()); + assert!(children[0].has_tag_name("bar")); + assert_eq!(children[0].attribute("qux").unwrap(), "42"); + assert!(children[1].is_text()); + assert_eq!(children[1].text().unwrap(), "baz"); + } +}