diff --git a/examples/21-simple-mono-vertex.yaml b/examples/21-simple-mono-vertex.yaml index 2a437b44b..9ca99cf1b 100644 --- a/examples/21-simple-mono-vertex.yaml +++ b/examples/21-simple-mono-vertex.yaml @@ -14,4 +14,4 @@ spec: sink: udsink: container: - image: quay.io/numaio/numaflow-rs/sink-log:stable + image: quay.io/numaio/numaflow-rs/sink-log:stable \ No newline at end of file diff --git a/rust/.rustfmt.toml b/rust/.rustfmt.toml index 3a26366d4..36c419bb3 100644 --- a/rust/.rustfmt.toml +++ b/rust/.rustfmt.toml @@ -1 +1 @@ -edition = "2021" +edition = "2021" \ No newline at end of file diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a6493870a..a5d40a88e 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -17,6 +17,18 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -26,6 +38,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -43,9 +61,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arc-swap" @@ -53,6 +71,12 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arraydeque" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d902e3d592a523def97af8f317b08ce16b7ab854c1985a0c671e6f15cebc236" + [[package]] name = "async-nats" version = "0.35.1" @@ -77,7 +101,7 @@ dependencies = [ "serde_json", "serde_nanos", "serde_repr", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", "tokio", "tokio-rustls 0.26.0", @@ -88,9 +112,9 @@ dependencies = [ [[package]] name = "async-nats" -version = "0.37.0" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3bdd6ea595b2ea504500a3566071beb81125fc15d40a6f6bffa43575f64152" +checksum = "76433c4de73442daedb3a59e991d94e85c14ebfc33db53dfcd347a21cd6ef4f8" dependencies = [ "base64 0.22.1", "bytes", @@ -99,6 +123,7 @@ dependencies = [ "nkeys", "nuid", "once_cell", + "pin-project", "portable-atomic", "rand", "regex", @@ -110,11 +135,12 @@ dependencies = [ "serde_json", "serde_nanos", "serde_repr", - "thiserror 1.0.64", + "thiserror 1.0.69", "time", "tokio", "tokio-rustls 0.26.0", "tokio-util", + "tokio-websockets", "tracing", "tryhard", "url", @@ -139,7 +165,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -150,7 +176,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -167,21 +193,20 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-rs" -version = "1.10.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd82dba44d209fddb11c190e0a94b78651f95299598e472215667417a03ff1d" +checksum = "f47bb8cc16b669d267eeccf585aea077d0882f4777b1c1f740217885d6e6e5a3" dependencies = [ "aws-lc-sys", - "mirai-annotations", "paste", "zeroize", ] [[package]] name = "aws-lc-sys" -version = "0.22.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7a4168111d7eb622a31b214057b8509c0a7e1794f44c546d742330dc793972" +checksum = "a2101df3813227bbaaaa0b04cd61c534c7954b22bd68d399b440be937dc63ff7" dependencies = [ "bindgen", "cc", @@ -194,9 +219,9 @@ dependencies = [ [[package]] name = "axum" -version = "0.7.7" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "504e3947307ac8326a5437504c517c4b56716c9d98fac0028c2acc7ca47d70ae" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", "axum-core", @@ -205,7 +230,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "itoa", "matchit", @@ -218,7 +243,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tokio", "tower 0.5.1", "tower-layer", @@ -241,7 +266,7 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", "tracing", @@ -255,7 +280,7 @@ checksum = "57d123550fa8d071b7255cb0cc04dc302baa6c8c4a79f55701552684d8399bce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -270,10 +295,10 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "pin-project-lite", - "rustls 0.23.14", + "rustls 0.23.19", "rustls-pemfile 2.2.0", "rustls-pki-types", "tokio", @@ -334,9 +359,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.4" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags 2.6.0", "cexpr", @@ -345,13 +370,13 @@ dependencies = [ "lazy_static", "lazycell", "log", - "prettyplease 0.2.22", + "prettyplease 0.2.25", "proc-macro2", "quote", "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.89", + "syn 2.0.90", "which", ] @@ -399,18 +424,18 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" dependencies = [ "serde", ] [[package]] name = "cc" -version = "1.1.26" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cbd4ab9fef358caa9c599eae3105af638ead5fb47a718315d8e03c852b9f0d" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" dependencies = [ "jobserver", "libc", @@ -432,6 +457,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -460,9 +491,9 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" dependencies = [ "cc", ] @@ -483,14 +514,13 @@ dependencies = [ [[package]] name = "config" -version = "0.14.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7328b20597b53c2454f0b1919720c25c7339051c02b72b7e05409e00b14132be" +checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf" dependencies = [ "async-trait", "convert_case", "json5", - "lazy_static", "nom", "pathdiff", "ron", @@ -498,7 +528,7 @@ dependencies = [ "serde", "serde_json", "toml", - "yaml-rust", + "yaml-rust2", ] [[package]] @@ -546,6 +576,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -554,9 +594,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -615,7 +655,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -655,6 +695,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "dlv-list" version = "0.5.2" @@ -706,9 +757,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] @@ -721,19 +772,19 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "fiat-crypto" @@ -824,7 +875,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -880,8 +931,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -908,7 +961,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.6.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -917,9 +970,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", @@ -927,7 +980,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.6.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -942,15 +995,28 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" -version = "0.13.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + +[[package]] +name = "hashlink" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] [[package]] name = "headers" @@ -988,12 +1054,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "home" version = "0.5.9" @@ -1073,9 +1133,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.30" +version = "0.14.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" +checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" dependencies = [ "bytes", "futures-channel", @@ -1097,14 +1157,14 @@ dependencies = [ [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.6", + "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "httparse", @@ -1126,7 +1186,7 @@ dependencies = [ "futures-util", "headers", "http 1.1.0", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-rustls 0.27.3", "hyper-util", "pin-project-lite", @@ -1144,7 +1204,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "rustls 0.21.12", "tokio", "tokio-rustls 0.24.1", @@ -1158,25 +1218,25 @@ checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "log", - "rustls 0.23.14", - "rustls-native-certs 0.8.0", + "rustls 0.23.19", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", "tower-service", - "webpki-roots 0.26.6", + "webpki-roots 0.26.7", ] [[package]] name = "hyper-timeout" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "pin-project-lite", "tokio", @@ -1185,16 +1245,16 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.1", - "hyper 1.4.1", + "hyper 1.5.1", "pin-project-lite", "socket2", "tokio", @@ -1225,14 +1285,143 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -1247,12 +1436,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown 0.15.2", ] [[package]] @@ -1290,9 +1479,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" @@ -1305,10 +1494,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.70" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" +checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -1335,7 +1525,7 @@ dependencies = [ "pest_derive", "regex", "serde_json", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -1377,7 +1567,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-http-proxy", "hyper-rustls 0.27.3", "hyper-timeout", @@ -1386,13 +1576,13 @@ dependencies = [ "k8s-openapi", "kube-core", "pem", - "rustls 0.23.14", + "rustls 0.23.19", "rustls-pemfile 2.2.0", "secrecy", "serde", "serde_json", "serde_yaml", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tokio-util", "tower 0.4.13", @@ -1413,7 +1603,7 @@ dependencies = [ "serde", "serde-value", "serde_json", - "thiserror 1.0.64", + "thiserror 1.0.69", ] [[package]] @@ -1430,32 +1620,32 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.159" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets 0.52.6", ] -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - [[package]] name = "linux-raw-sys" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.12" @@ -1526,22 +1716,15 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi", "libc", "wasi", "windows-sys 0.52.0", ] -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "multimap" version = "0.8.3" @@ -1636,9 +1819,7 @@ dependencies = [ name = "numaflow" version = "0.1.0" dependencies = [ - "backoff", "numaflow-core", - "numaflow-pb", "servesink", "serving", "tokio", @@ -1658,7 +1839,7 @@ dependencies = [ "prost-types 0.13.3", "serde", "serde_json", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tokio-stream", "tokio-util", @@ -1672,7 +1853,7 @@ dependencies = [ name = "numaflow-core" version = "0.1.0" dependencies = [ - "async-nats 0.37.0", + "async-nats 0.38.0", "axum", "axum-server", "backoff", @@ -1696,12 +1877,12 @@ dependencies = [ "pulsar", "rand", "rcgen", - "rustls 0.23.14", + "rustls 0.23.19", "semver", "serde", "serde_json", "tempfile", - "thiserror 1.0.64", + "thiserror 2.0.3", "tokio", "tokio-stream", "tokio-util", @@ -1784,12 +1965,12 @@ dependencies = [ [[package]] name = "ordered-multimap" -version = "0.6.0" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ed8acf08e98e744e5384c8bc63ceb0364e68a6854187221c18df61c4797690e" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" dependencies = [ "dlv-list", - "hashbrown 0.13.2", + "hashbrown 0.14.5", ] [[package]] @@ -1829,9 +2010,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pathdiff" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] name = "pem" @@ -1871,20 +2052,20 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdbef9d1d47087a895abd220ed25eb4ad973a5e26f6a4367b038c25e28dfc2d9" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", - "thiserror 1.0.64", + "thiserror 1.0.69", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d3a6e3394ec80feb3b6393c725571754c6188490265c61aaf260810d6b95aa0" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ "pest", "pest_generator", @@ -1892,22 +2073,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94429506bde1ca69d1b5601962c73f4172ab4726571a59ea95931218cb0e930e" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "pest_meta" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8a071862e93690b6e34e9a5fb8e33ff3734473ac0245b27232222c4906a33f" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" dependencies = [ "once_cell", "pest", @@ -1921,34 +2102,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.6.0", + "indexmap 2.7.0", ] [[package]] name = "pin-project" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -1968,9 +2149,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "powerfmt" @@ -1999,12 +2180,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2036,7 +2217,7 @@ checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2094,11 +2275,11 @@ dependencies = [ "multimap 0.10.0", "once_cell", "petgraph", - "prettyplease 0.2.22", + "prettyplease 0.2.25", "prost 0.13.3", "prost-types 0.13.3", "regex", - "syn 2.0.89", + "syn 2.0.90", "tempfile", ] @@ -2125,7 +2306,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2179,45 +2360,49 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ "bytes", "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.19", "socket2", - "thiserror 1.0.64", + "thiserror 2.0.3", "tokio", "tracing", ] [[package]] name = "quinn-proto" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" dependencies = [ "bytes", + "getrandom", "rand", "ring", - "rustc-hash 2.0.0", - "rustls 0.23.14", + "rustc-hash 2.1.0", + "rustls 0.23.19", + "rustls-pki-types", "slab", - "thiserror 1.0.64", + "thiserror 2.0.3", "tinyvec", "tracing", + "web-time", ] [[package]] name = "quinn-udp" -version = "0.5.5" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" dependencies = [ + "cfg_aliases", "libc", "once_cell", "socket2", @@ -2313,13 +2498,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.8", + "regex-automata 0.4.9", "regex-syntax 0.8.5", ] @@ -2334,9 +2519,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -2369,7 +2554,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-rustls 0.24.2", "ipnet", "js-sys", @@ -2399,9 +2584,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -2410,7 +2595,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-rustls 0.27.3", "hyper-util", "ipnet", @@ -2421,13 +2606,13 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.14", + "rustls 0.23.19", "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tokio", "tokio-rustls 0.26.0", "tower-service", @@ -2435,7 +2620,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 0.26.6", + "webpki-roots 0.26.7", "windows-registry", ] @@ -2468,9 +2653,9 @@ dependencies = [ [[package]] name = "rust-ini" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e2a3bcec1f113553ef1c88aae6c020a369d03d55b58de9869a0908930385091" +checksum = "3e0698206bcb8882bf2a9ecb4c1e7785db57ff052297085a6efd4fe42302068a" dependencies = [ "cfg-if", "ordered-multimap", @@ -2490,9 +2675,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc_version" @@ -2505,9 +2690,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags 2.6.0", "errno", @@ -2530,9 +2715,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.14" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "aws-lc-rs", "log", @@ -2554,20 +2739,19 @@ dependencies = [ "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 2.11.1", ] [[package]] name = "rustls-native-certs" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" dependencies = [ "openssl-probe", - "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.0.1", ] [[package]] @@ -2590,9 +2774,12 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55" +checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -2618,9 +2805,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "ryu" @@ -2630,9 +2817,9 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "schannel" -version = "0.1.24" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -2670,7 +2857,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.6.0", - "core-foundation", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" +dependencies = [ + "bitflags 2.6.0", + "core-foundation 0.10.0", "core-foundation-sys", "libc", "security-framework-sys", @@ -2678,9 +2878,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -2719,14 +2919,14 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ "itoa", "memchr", @@ -2761,7 +2961,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2791,7 +2991,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "itoa", "ryu", "serde", @@ -2803,7 +3003,7 @@ name = "servesink" version = "0.1.0" dependencies = [ "numaflow 0.1.1", - "reqwest 0.12.8", + "reqwest 0.12.9", "tokio", "tonic", "tracing", @@ -2831,7 +3031,7 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 1.0.64", + "thiserror 1.0.69", "tokio", "tower 0.4.13", "tower-http", @@ -2932,9 +3132,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2956,6 +3156,12 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "subtle" version = "2.6.1" @@ -2975,9 +3181,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.89" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -2992,13 +3198,24 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -3006,7 +3223,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" dependencies = [ "bitflags 1.3.2", - "core-foundation", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -3022,9 +3239,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", "fastrand", @@ -3035,11 +3252,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.64", + "thiserror-impl 1.0.69", ] [[package]] @@ -3053,13 +3270,13 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3070,7 +3287,7 @@ checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3123,6 +3340,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -3164,7 +3391,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3194,7 +3421,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.14", + "rustls 0.23.19", "rustls-pki-types", "tokio", ] @@ -3223,6 +3450,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-websockets" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-core", + "futures-sink", + "http 1.1.0", + "httparse", + "rand", + "ring", + "rustls-native-certs 0.8.1", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.0", + "tokio-util", +] + [[package]] name = "toml" version = "0.8.19" @@ -3250,7 +3498,7 @@ version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_spanned", "toml_datetime", @@ -3268,11 +3516,11 @@ dependencies = [ "axum", "base64 0.22.1", "bytes", - "h2 0.4.6", + "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-timeout", "hyper-util", "percent-encoding", @@ -3293,12 +3541,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" dependencies = [ - "prettyplease 0.2.22", + "prettyplease 0.2.25", "proc-macro2", "prost-build 0.13.3", "prost-types 0.13.3", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3371,9 +3619,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -3383,20 +3631,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -3415,9 +3663,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", @@ -3439,7 +3687,7 @@ checksum = "70977707304198400eb4835a78f6a9f928bf41bba420deb8fdb175cd965d77a7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3473,33 +3721,15 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" - -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-segmentation" @@ -3533,20 +3763,32 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom", "rand", @@ -3582,9 +3824,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.93" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" +checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" dependencies = [ "cfg-if", "once_cell", @@ -3593,36 +3835,37 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.93" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" +checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.43" +version = "0.4.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.93" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" +checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3630,28 +3873,38 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.93" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" +checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.93" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" +checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" [[package]] name = "web-sys" -version = "0.3.70" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" dependencies = [ "js-sys", "wasm-bindgen", @@ -3665,9 +3918,9 @@ checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" [[package]] name = "webpki-roots" -version = "0.26.6" +version = "0.26.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" +checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" dependencies = [ "rustls-pki-types", ] @@ -3913,12 +4166,26 @@ dependencies = [ ] [[package]] -name = "yaml-rust" -version = "0.4.5" +name = "write16" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yaml-rust2" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8902160c4e6f2fb145dbe9d6760a75e3c9522d8bf796ed7047c85919ac7115f8" dependencies = [ - "linked-hash-map", + "arraydeque", + "encoding_rs", + "hashlink", ] [[package]] @@ -3930,6 +4197,30 @@ dependencies = [ "time", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -3948,7 +4239,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", + "synstructure", ] [[package]] @@ -3956,3 +4268,25 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index b9a11c653..3cbd68ef2 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -18,11 +18,9 @@ version = "0.1.0" edition = "2021" [dependencies] -tokio = "1.39.2" -backoff = { path = "backoff" } +tokio = "1.41.1" servesink = { path = "servesink" } serving = { path = "serving" } numaflow-core = { path = "numaflow-core" } -numaflow-pb = { path = "numaflow-pb" } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/rust/Dockerfile b/rust/Dockerfile index 3fcd606fa..ce50094ca 100644 --- a/rust/Dockerfile +++ b/rust/Dockerfile @@ -20,6 +20,9 @@ COPY ./backoff/Cargo.toml ./backoff/Cargo.toml RUN cargo new numaflow-models COPY ./numaflow-models/Cargo.toml ./numaflow-models/Cargo.toml +RUN cargo new numaflow-pb +COPY ./numaflow-pb/Cargo.toml ./numaflow-pb/Cargo.toml + RUN cargo new numaflow-core COPY numaflow-core/Cargo.toml ./numaflow-core/Cargo.toml @@ -40,9 +43,7 @@ COPY ./backoff/src ./backoff/src COPY ./numaflow-models/src ./numaflow-models/src COPY ./serving/src ./serving/src COPY numaflow-core/src ./numaflow-core/src -COPY numaflow-core/build.rs ./numaflow-core/build.rs -COPY numaflow-core/proto ./numaflow-core/proto - +COPY ./numaflow-pb/src ./numaflow-pb/src # Build the real binaries RUN touch src/bin/main.rs && \ cargo build --workspace --all --release diff --git a/rust/backoff/Cargo.toml b/rust/backoff/Cargo.toml index 9c2904925..c82508001 100644 --- a/rust/backoff/Cargo.toml +++ b/rust/backoff/Cargo.toml @@ -5,4 +5,4 @@ edition = "2021" [dependencies] pin-project = "1.1.5" -tokio = { version = "1.38.0", features = ["full"] } +tokio = { version = "1.41.1", features = ["full"] } diff --git a/rust/numaflow-core/Cargo.toml b/rust/numaflow-core/Cargo.toml index 6df96fb14..c72b20b5d 100644 --- a/rust/numaflow-core/Cargo.toml +++ b/rust/numaflow-core/Cargo.toml @@ -13,8 +13,8 @@ axum = "0.7.5" axum-server = { version = "0.7.1", features = ["tls-rustls"] } tonic = "0.12.3" bytes = "1.7.1" -thiserror = "1.0.63" -tokio = { version = "1.39.3", features = ["full"] } +thiserror = "2.0.3" +tokio = { version = "1.41.1", features = ["full"] } tracing = "0.1.40" tokio-util = "0.7.11" tokio-stream = "0.1.15" @@ -41,7 +41,7 @@ log = "0.4.22" futures = "0.3.30" pin-project = "1.1.5" rand = "0.8.5" -async-nats = "0.37.0" +async-nats = "0.38.0" numaflow-pulsar = {path = "../numaflow-extns/pulsar"} [dev-dependencies] @@ -50,4 +50,3 @@ numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", rev = "ddd8795 pulsar = {version = "6.3.0", default-features = false, features = ["tokio-rustls-runtime"]} [build-dependencies] - diff --git a/rust/numaflow-core/src/config.rs b/rust/numaflow-core/src/config.rs index e36ab4dc2..167c2f1cd 100644 --- a/rust/numaflow-core/src/config.rs +++ b/rust/numaflow-core/src/config.rs @@ -18,6 +18,58 @@ pub(crate) mod monovertex; /// Pipeline specific configs. pub(crate) mod pipeline; +pub const NUMAFLOW_MONO_VERTEX_NAME: &str = "NUMAFLOW_MONO_VERTEX_NAME"; +const NUMAFLOW_VERTEX_NAME: &str = "NUMAFLOW_VERTEX_NAME"; +const NUMAFLOW_REPLICA: &str = "NUMAFLOW_REPLICA"; +static VERTEX_NAME: OnceLock = OnceLock::new(); + +/// fetch the vertex name from the environment variable +pub(crate) fn get_vertex_name() -> &'static str { + VERTEX_NAME.get_or_init(|| { + env::var(NUMAFLOW_MONO_VERTEX_NAME) + .or_else(|_| env::var(NUMAFLOW_VERTEX_NAME)) + .unwrap_or_default() + }) +} + +static IS_MONO_VERTEX: OnceLock = OnceLock::new(); + +/// returns true if the vertex is a mono vertex +pub(crate) fn is_mono_vertex() -> bool { + *IS_MONO_VERTEX.get_or_init(|| env::var(NUMAFLOW_MONO_VERTEX_NAME).is_ok()) +} + +static COMPONENT_TYPE: OnceLock = OnceLock::new(); + +/// fetch the component type from the environment variable +pub(crate) fn get_component_type() -> &'static str { + COMPONENT_TYPE.get_or_init(|| { + if is_mono_vertex() { + "mono-vertex".to_string() + } else { + "pipeline".to_string() + } + }) +} + +static PIPELINE_NAME: OnceLock = OnceLock::new(); + +pub(crate) fn get_pipeline_name() -> &'static str { + PIPELINE_NAME.get_or_init(|| env::var("NUMAFLOW_PIPELINE_NAME").unwrap_or_default()) +} + +static VERTEX_REPLICA: OnceLock = OnceLock::new(); + +/// fetch the vertex replica information from the environment variable +pub(crate) fn get_vertex_replica() -> &'static u16 { + VERTEX_REPLICA.get_or_init(|| { + env::var(NUMAFLOW_REPLICA) + .unwrap_or_default() + .parse() + .unwrap_or_default() + }) +} + /// Exposes the [Settings] via lazy loading. pub fn config() -> &'static Settings { static CONF: OnceLock = OnceLock::new(); diff --git a/rust/numaflow-core/src/config/components.rs b/rust/numaflow-core/src/config/components.rs index 840ad39e5..adb2784e2 100644 --- a/rust/numaflow-core/src/config/components.rs +++ b/rust/numaflow-core/src/config/components.rs @@ -5,13 +5,14 @@ pub(crate) mod source { use std::{fmt::Debug, time::Duration}; - use crate::error::Error; - use crate::Result; use bytes::Bytes; use numaflow_models::models::{GeneratorSource, PulsarSource, Source}; use numaflow_pulsar::source::{PulsarAuth, PulsarSourceConfig}; use tracing::warn; + use crate::error::Error; + use crate::Result; + #[derive(Debug, Clone, PartialEq)] pub(crate) struct SourceConfig { pub(crate) source_type: SourceType, @@ -74,9 +75,11 @@ pub(crate) mod source { tracing::warn!("JWT Token authentication is specified, but token is empty"); break 'out None; }; - let secret = - crate::shared::utils::get_secret_from_volume(&token.name, &token.key) - .unwrap(); + let secret = crate::shared::create_components::get_secret_from_volume( + &token.name, + &token.key, + ) + .unwrap(); Some(PulsarAuth::JWT(secret)) } None => None, @@ -359,6 +362,7 @@ pub(crate) mod transformer { #[derive(Debug, Clone, PartialEq)] pub(crate) struct TransformerConfig { + pub(crate) concurrency: usize, pub(crate) transformer_type: TransformerType, } @@ -609,6 +613,7 @@ mod transformer_tests { fn test_transformer_config_user_defined() { let user_defined_config = UserDefinedConfig::default(); let transformer_config = TransformerConfig { + concurrency: 1, transformer_type: TransformerType::UserDefined(user_defined_config.clone()), }; if let TransformerType::UserDefined(config) = transformer_config.transformer_type { diff --git a/rust/numaflow-core/src/config/monovertex.rs b/rust/numaflow-core/src/config/monovertex.rs index 0d1b0c1a9..356e97d82 100644 --- a/rust/numaflow-core/src/config/monovertex.rs +++ b/rust/numaflow-core/src/config/monovertex.rs @@ -12,8 +12,8 @@ use crate::config::components::transformer::{ TransformerConfig, TransformerType, UserDefinedConfig, }; use crate::config::components::{sink, source}; +use crate::config::get_vertex_replica; use crate::error::Error; -use crate::message::get_vertex_replica; use crate::Result; const DEFAULT_BATCH_SIZE: u64 = 500; @@ -94,6 +94,7 @@ impl MonovertexConfig { .as_ref() .and_then(|source| source.transformer.as_ref()) .map(|_| TransformerConfig { + concurrency: batch_size as usize, // FIXME: introduce a new config called udf concurrency in the spec transformer_type: TransformerType::UserDefined(UserDefinedConfig::default()), }); diff --git a/rust/numaflow-core/src/config/pipeline.rs b/rust/numaflow-core/src/config/pipeline.rs index 4767c0aa7..c05ca73d3 100644 --- a/rust/numaflow-core/src/config/pipeline.rs +++ b/rust/numaflow-core/src/config/pipeline.rs @@ -11,9 +11,9 @@ use crate::config::components::metrics::MetricsConfig; use crate::config::components::sink::SinkConfig; use crate::config::components::source::SourceConfig; use crate::config::components::transformer::{TransformerConfig, TransformerType}; +use crate::config::get_vertex_replica; use crate::config::pipeline::isb::{BufferReaderConfig, BufferWriterConfig}; use crate::error::Error; -use crate::message::get_vertex_replica; use crate::Result; const DEFAULT_BATCH_SIZE: u64 = 500; @@ -31,7 +31,7 @@ pub(crate) struct PipelineConfig { pub(crate) replica: u16, pub(crate) batch_size: usize, // FIXME(cr): we cannot leak this as a paf, we need to use a different terminology. - pub(crate) paf_batch_size: usize, + pub(crate) paf_concurrency: usize, pub(crate) read_timeout: Duration, pub(crate) js_client_config: isb::jetstream::ClientConfig, // TODO: make it enum, since we can have different ISB implementations pub(crate) from_vertex_config: Vec, @@ -47,7 +47,7 @@ impl Default for PipelineConfig { vertex_name: "default-vtx".to_string(), replica: 0, batch_size: DEFAULT_BATCH_SIZE as usize, - paf_batch_size: (DEFAULT_BATCH_SIZE * 2) as usize, + paf_concurrency: (DEFAULT_BATCH_SIZE * 2) as usize, read_timeout: Duration::from_secs(DEFAULT_TIMEOUT_IN_MS as u64), js_client_config: isb::jetstream::ClientConfig::default(), from_vertex_config: vec![], @@ -150,6 +150,7 @@ impl PipelineConfig { let vertex: VertexType = if let Some(source) = vertex_obj.spec.source { let transformer_config = source.transformer.as_ref().map(|_| TransformerConfig { + concurrency: batch_size as usize, // FIXME: introduce a separate field in the spec transformer_type: TransformerType::UserDefined(Default::default()), }); @@ -211,8 +212,12 @@ impl PipelineConfig { let partition_count = edge.to_vertex_partition_count.unwrap_or_default() as u16; let buffer_name = format!("{}-{}-{}", namespace, pipeline_name, edge.to); - let streams: Vec<(String, u16)> = (0..partition_count) - .map(|i| (format!("{}-{}", buffer_name, i), i)) + let streams: Vec<(&'static str, u16)> = (0..partition_count) + .map(|i| { + let stream: &'static str = + Box::leak(Box::new(format!("{}-{}", buffer_name, i))); + (stream, i) + }) .collect(); from_vertex_config.push(FromVertexConfig { @@ -265,7 +270,7 @@ impl PipelineConfig { Ok(PipelineConfig { batch_size: batch_size as usize, - paf_batch_size: env::var("PAF_BATCH_SIZE") + paf_concurrency: env::var("PAF_BATCH_SIZE") .unwrap_or("30000".to_string()) .parse() .unwrap(), @@ -297,7 +302,7 @@ mod tests { vertex_name: "default-vtx".to_string(), replica: 0, batch_size: DEFAULT_BATCH_SIZE as usize, - paf_batch_size: (DEFAULT_BATCH_SIZE * 2) as usize, + paf_concurrency: (DEFAULT_BATCH_SIZE * 2) as usize, read_timeout: Duration::from_secs(DEFAULT_TIMEOUT_IN_MS as u64), js_client_config: isb::jetstream::ClientConfig::default(), from_vertex_config: vec![], @@ -343,7 +348,7 @@ mod tests { vertex_name: "out".to_string(), replica: 0, batch_size: 500, - paf_batch_size: 30000, + paf_concurrency: 30000, read_timeout: Duration::from_secs(1), js_client_config: isb::jetstream::ClientConfig { url: "localhost:4222".to_string(), @@ -389,7 +394,7 @@ mod tests { vertex_name: "in".to_string(), replica: 0, batch_size: 1000, - paf_batch_size: 30000, + paf_concurrency: 30000, read_timeout: Duration::from_secs(1), js_client_config: isb::jetstream::ClientConfig { url: "localhost:4222".to_string(), @@ -442,7 +447,7 @@ mod tests { vertex_name: "in".to_string(), replica: 0, batch_size: 50, - paf_batch_size: 30000, + paf_concurrency: 30000, read_timeout: Duration::from_secs(1), js_client_config: isb::jetstream::ClientConfig { url: "localhost:4222".to_string(), diff --git a/rust/numaflow-core/src/config/pipeline/isb.rs b/rust/numaflow-core/src/config/pipeline/isb.rs index c010f9a15..30c72351c 100644 --- a/rust/numaflow-core/src/config/pipeline/isb.rs +++ b/rust/numaflow-core/src/config/pipeline/isb.rs @@ -75,7 +75,7 @@ impl fmt::Display for BufferFullStrategy { #[derive(Debug, Clone, PartialEq)] pub(crate) struct BufferReaderConfig { pub(crate) partitions: u16, - pub(crate) streams: Vec<(String, u16)>, + pub(crate) streams: Vec<(&'static str, u16)>, pub(crate) wip_ack_interval: Duration, } @@ -83,7 +83,7 @@ impl Default for BufferReaderConfig { fn default() -> Self { BufferReaderConfig { partitions: DEFAULT_PARTITIONS, - streams: vec![("default-0".to_string(), DEFAULT_PARTITION_IDX)], + streams: vec![("default-0", DEFAULT_PARTITION_IDX)], wip_ack_interval: Duration::from_millis(DEFAULT_WIP_ACK_INTERVAL_MILLIS), } } @@ -138,7 +138,7 @@ mod tests { fn test_default_buffer_reader_config() { let expected = BufferReaderConfig { partitions: DEFAULT_PARTITIONS, - streams: vec![("default-0".to_string(), DEFAULT_PARTITION_IDX)], + streams: vec![("default-0", DEFAULT_PARTITION_IDX)], wip_ack_interval: Duration::from_millis(DEFAULT_WIP_ACK_INTERVAL_MILLIS), }; let config = BufferReaderConfig::default(); diff --git a/rust/numaflow-core/src/lib.rs b/rust/numaflow-core/src/lib.rs index e324c0ff3..f90633d06 100644 --- a/rust/numaflow-core/src/lib.rs +++ b/rust/numaflow-core/src/lib.rs @@ -9,9 +9,10 @@ use crate::config::{config, CustomResourceType}; mod error; pub(crate) use crate::error::{Error, Result}; -/// MonoVertex is a simplified version of the [Pipeline] spec which is ideal for high TPS, low latency +/// [MonoVertex] is a simplified version of the [Pipeline] spec which is ideal for high TPS, low latency /// use-cases which do not require [ISB]. /// +/// [MonoVertex]: https://numaflow.numaproj.io/core-concepts/monovertex/ /// [Pipeline]: https://numaflow.numaproj.io/core-concepts/pipeline/ /// [ISB]: https://numaflow.numaproj.io/core-concepts/inter-step-buffer/ pub mod monovertex; diff --git a/rust/numaflow-core/src/message.rs b/rust/numaflow-core/src/message.rs index 3d8407ec5..10cb3063a 100644 --- a/rust/numaflow-core/src/message.rs +++ b/rust/numaflow-core/src/message.rs @@ -1,7 +1,6 @@ use std::cmp::PartialEq; use std::collections::HashMap; -use std::sync::OnceLock; -use std::{env, fmt}; +use std::fmt; use async_nats::HeaderValue; use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; @@ -17,35 +16,12 @@ use prost::Message as ProtoMessage; use serde::{Deserialize, Serialize}; use tokio::sync::oneshot; -use crate::shared::utils::{prost_timestamp_from_utc, utc_from_timestamp}; -use crate::Error; +use crate::shared::grpc::prost_timestamp_from_utc; +use crate::shared::grpc::utc_from_timestamp; use crate::Result; +use crate::{config, Error}; -const NUMAFLOW_MONO_VERTEX_NAME: &str = "NUMAFLOW_MONO_VERTEX_NAME"; -const NUMAFLOW_VERTEX_NAME: &str = "NUMAFLOW_VERTEX_NAME"; -const NUMAFLOW_REPLICA: &str = "NUMAFLOW_REPLICA"; - -static VERTEX_NAME: OnceLock = OnceLock::new(); - -pub(crate) fn get_vertex_name() -> &'static str { - VERTEX_NAME.get_or_init(|| { - env::var(NUMAFLOW_MONO_VERTEX_NAME) - .or_else(|_| env::var(NUMAFLOW_VERTEX_NAME)) - .unwrap_or_default() - }) -} - -static VERTEX_REPLICA: OnceLock = OnceLock::new(); - -// fetch the vertex replica information from the environment variable -pub(crate) fn get_vertex_replica() -> &'static u16 { - VERTEX_REPLICA.get_or_init(|| { - env::var(NUMAFLOW_REPLICA) - .unwrap_or_default() - .parse() - .unwrap_or_default() - }) -} +const DROP: &str = "U+005C__DROP__"; /// A message that is sent from the source to the sink. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -103,7 +79,7 @@ impl TryFrom for Message { let event_time = Utc::now(); let offset = None; let id = MessageID { - vertex_name: get_vertex_name().to_string(), + vertex_name: config::get_vertex_name().to_string(), offset: "0".to_string(), index: 0, }; @@ -119,6 +95,13 @@ impl TryFrom for Message { } } +impl Message { + // Check if the message should be dropped. + pub(crate) fn dropped(&self) -> bool { + self.keys.len() == 1 && self.keys[0] == DROP + } +} + /// IntOffset is integer based offset enum type. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct IntOffset { @@ -163,6 +146,7 @@ impl fmt::Display for StringOffset { } } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub(crate) enum ReadAck { /// Message was successfully processed. Ack, @@ -319,7 +303,7 @@ impl TryFrom for Message { offset: Some(source_offset.clone()), event_time: utc_from_timestamp(result.event_time), id: MessageID { - vertex_name: get_vertex_name().to_string(), + vertex_name: config::get_vertex_name().to_string(), offset: source_offset.to_string(), index: 0, }, diff --git a/rust/numaflow-core/src/metrics.rs b/rust/numaflow-core/src/metrics.rs index 0cce19f95..81c8a3cca 100644 --- a/rust/numaflow-core/src/metrics.rs +++ b/rust/numaflow-core/src/metrics.rs @@ -27,11 +27,10 @@ use tonic::transport::Channel; use tonic::Request; use tracing::{debug, error, info}; -use crate::source::SourceHandle; +use crate::config::{get_pipeline_name, get_vertex_name, get_vertex_replica}; +use crate::source::Source; use crate::Error; -pub const COMPONENT_MVTX: &str = "mono-vertex"; - // SDK information const SDK_INFO: &str = "sdk_info"; const COMPONENT: &str = "component"; @@ -74,7 +73,7 @@ const DROPPED_TOTAL: &str = "dropped"; const FALLBACK_SINK_WRITE_TOTAL: &str = "write"; // pending as gauge -const SOURCE_PENDING: &str = "pending"; +const PENDING: &str = "pending"; // processing times as timers const E2E_TIME: &str = "processing_time"; @@ -185,7 +184,7 @@ pub(crate) struct MonoVtxMetrics { pub(crate) dropped_total: Family, Counter>, // gauge - pub(crate) source_pending: Family, Gauge>, + pub(crate) pending: Family, Gauge>, // timers pub(crate) e2e_time: Family, Histogram>, @@ -201,6 +200,7 @@ pub(crate) struct MonoVtxMetrics { // TODO: Add the metrics for the pipeline pub(crate) struct PipelineMetrics { pub(crate) forwarder: PipelineForwarderMetrics, + pub(crate) isb: PipelineISBMetrics, } /// Family of metrics for the sink @@ -221,7 +221,18 @@ pub(crate) struct TransformerMetrics { } pub(crate) struct PipelineForwarderMetrics { - pub(crate) data_read: Family, Counter>, + pub(crate) read_total: Family, Counter>, + pub(crate) read_time: Family, Histogram>, + pub(crate) ack_total: Family, Counter>, + pub(crate) ack_time: Family, Histogram>, + pub(crate) write_total: Family, Counter>, + pub(crate) read_bytes_total: Family, Counter>, + pub(crate) processed_time: Family, Histogram>, + pub(crate) pending: Family, Gauge>, +} + +pub(crate) struct PipelineISBMetrics { + pub(crate) paf_resolution_time: Family, Histogram>, } /// Exponential bucket distribution with range. @@ -254,7 +265,7 @@ impl MonoVtxMetrics { ack_total: Family::, Counter>::default(), dropped_total: Family::, Counter>::default(), // gauge - source_pending: Family::, Gauge>::default(), + pending: Family::, Gauge>::default(), // timers // exponential buckets in the range 100 microseconds to 15 minutes e2e_time: Family::, Histogram>::new_with_constructor(|| { @@ -312,9 +323,9 @@ impl MonoVtxMetrics { // gauges registry.register( - SOURCE_PENDING, + PENDING, "A Gauge to keep track of the total number of pending messages for the monovtx", - metrics.source_pending.clone(), + metrics.pending.clone(), ); // timers registry.register( @@ -370,7 +381,26 @@ impl PipelineMetrics { fn new() -> Self { let metrics = Self { forwarder: PipelineForwarderMetrics { - data_read: Default::default(), + read_total: Family::, Counter>::default(), + processed_time: Family::, Histogram>::new_with_constructor( + || Histogram::new(exponential_buckets_range(100.0, 60000000.0 * 15.0, 10)), + ), + read_time: Family::, Histogram>::new_with_constructor(|| { + Histogram::new(exponential_buckets_range(100.0, 60000000.0 * 15.0, 10)) + }), + read_bytes_total: Family::, Counter>::default(), + ack_total: Family::, Counter>::default(), + ack_time: Family::, Histogram>::new_with_constructor(|| { + Histogram::new(exponential_buckets_range(100.0, 60000000.0 * 15.0, 10)) + }), + pending: Family::, Gauge>::default(), + write_total: Family::, Counter>::default(), + }, + isb: PipelineISBMetrics { + paf_resolution_time: + Family::, Histogram>::new_with_constructor(|| { + Histogram::new(exponential_buckets_range(100.0, 60000000.0 * 15.0, 10)) + }), }, }; let mut registry = global_registry().registry.lock(); @@ -380,7 +410,37 @@ impl PipelineMetrics { forwarder_registry.register( PIPELINE_FORWARDER_READ_TOTAL, "Total number of Data Messages Read", - metrics.forwarder.data_read.clone(), + metrics.forwarder.read_total.clone(), + ); + forwarder_registry.register( + READ_TIME, + "Time taken to read data", + metrics.forwarder.read_time.clone(), + ); + forwarder_registry.register( + READ_BYTES_TOTAL, + "Total number of bytes read", + metrics.forwarder.read_bytes_total.clone(), + ); + forwarder_registry.register( + E2E_TIME, + "Time taken to process data", + metrics.forwarder.processed_time.clone(), + ); + forwarder_registry.register( + ACK_TOTAL, + "Total number of Ack Messages", + metrics.forwarder.ack_total.clone(), + ); + forwarder_registry.register( + ACK_TIME, + "Time taken to ack data", + metrics.forwarder.ack_time.clone(), + ); + forwarder_registry.register( + PENDING, + "Number of pending messages", + metrics.forwarder.pending.clone(), ); metrics } @@ -391,16 +451,16 @@ static MONOVTX_METRICS: OnceLock = OnceLock::new(); // forward_metrics is a helper function used to fetch the // MonoVtxMetrics object -pub(crate) fn forward_mvtx_metrics() -> &'static MonoVtxMetrics { +pub(crate) fn monovertex_metrics() -> &'static MonoVtxMetrics { MONOVTX_METRICS.get_or_init(MonoVtxMetrics::new) } /// PIPELINE_METRICS is the PipelineMetrics object which stores the metrics static PIPELINE_METRICS: OnceLock = OnceLock::new(); -// forward_pipeline_metrics is a helper function used to fetch the +// pipeline_metrics is a helper function used to fetch the // PipelineMetrics object -pub(crate) fn forward_pipeline_metrics() -> &'static PipelineMetrics { +pub(crate) fn pipeline_metrics() -> &'static PipelineMetrics { PIPELINE_METRICS.get_or_init(PipelineMetrics::new) } @@ -427,14 +487,11 @@ static MONOVTX_METRICS_LABELS: OnceLock> = OnceLock::new() // forward_metrics_labels is a helper function used to fetch the // MONOVTX_METRICS_LABELS object -pub(crate) fn mvtx_forward_metric_labels( - mvtx_name: String, - replica: u16, -) -> &'static Vec<(String, String)> { +pub(crate) fn mvtx_forward_metric_labels() -> &'static Vec<(String, String)> { MONOVTX_METRICS_LABELS.get_or_init(|| { let common_labels = vec![ - (MVTX_NAME_LABEL.to_string(), mvtx_name), - (REPLICA_LABEL.to_string(), replica.to_string()), + (MVTX_NAME_LABEL.to_string(), get_vertex_name().to_string()), + (REPLICA_LABEL.to_string(), get_vertex_replica().to_string()), ]; common_labels }) @@ -442,26 +499,58 @@ pub(crate) fn mvtx_forward_metric_labels( static PIPELINE_READ_METRICS_LABELS: OnceLock> = OnceLock::new(); -pub(crate) fn pipeline_forward_read_metric_labels( - pipeline_name: &str, - partition_name: &str, - vertex_name: &str, +pub(crate) fn pipeline_forward_metric_labels( vertex_type: &str, - replica: u16, + partition_name: Option<&str>, ) -> &'static Vec<(String, String)> { PIPELINE_READ_METRICS_LABELS.get_or_init(|| { - vec![ - (PIPELINE_NAME_LABEL.to_string(), pipeline_name.to_string()), - (PIPELINE_REPLICA_LABEL.to_string(), replica.to_string()), + let mut labels = vec![ ( - PIPELINE_PARTITION_NAME_LABEL.to_string(), - partition_name.to_string(), + PIPELINE_NAME_LABEL.to_string(), + get_pipeline_name().to_string(), + ), + ( + PIPELINE_REPLICA_LABEL.to_string(), + get_vertex_replica().to_string(), ), ( PIPELINE_VERTEX_TYPE_LABEL.to_string(), vertex_type.to_string(), ), - (PIPELINE_VERTEX_LABEL.to_string(), vertex_name.to_string()), + ( + PIPELINE_VERTEX_LABEL.to_string(), + get_vertex_name().to_string(), + ), + ]; + + if let Some(partition) = partition_name { + labels.push(( + PIPELINE_PARTITION_NAME_LABEL.to_string(), + partition.to_string(), + )); + } + + labels + }) +} + +static PIPELINE_ISB_METRICS_LABELS: OnceLock> = OnceLock::new(); + +pub(crate) fn pipeline_isb_metric_labels() -> &'static Vec<(String, String)> { + PIPELINE_ISB_METRICS_LABELS.get_or_init(|| { + vec![ + ( + PIPELINE_NAME_LABEL.to_string(), + get_pipeline_name().to_string(), + ), + ( + PIPELINE_REPLICA_LABEL.to_string(), + get_vertex_replica().to_string(), + ), + ( + PIPELINE_VERTEX_LABEL.to_string(), + get_vertex_name().to_string(), + ), ] }) } @@ -595,9 +684,7 @@ struct TimestampedPending { /// and exposing the metrics. It maintains a list of pending stats and ensures that /// only the most recent entries are kept. pub(crate) struct PendingReader { - mvtx_name: String, - replica: u16, - lag_reader: SourceHandle, + lag_reader: Source, lag_checking_interval: Duration, refresh_interval: Duration, pending_stats: Arc>>, @@ -610,18 +697,14 @@ pub(crate) struct PendingReaderTasks { /// PendingReaderBuilder is used to build a [LagReader] instance. pub(crate) struct PendingReaderBuilder { - mvtx_name: String, - replica: u16, - lag_reader: SourceHandle, + lag_reader: Source, lag_checking_interval: Option, refresh_interval: Option, } impl PendingReaderBuilder { - pub(crate) fn new(mvtx_name: String, replica: u16, lag_reader: SourceHandle) -> Self { + pub(crate) fn new(lag_reader: Source) -> Self { Self { - mvtx_name, - replica, lag_reader, lag_checking_interval: None, refresh_interval: None, @@ -640,8 +723,6 @@ impl PendingReaderBuilder { pub(crate) fn build(self) -> PendingReader { PendingReader { - mvtx_name: self.mvtx_name, - replica: self.replica, lag_reader: self.lag_reader, lag_checking_interval: self .lag_checking_interval @@ -662,7 +743,7 @@ impl PendingReader { /// - Another to periodically expose the pending metrics. /// /// Dropping the PendingReaderTasks will abort the background tasks. - pub async fn start(&self) -> PendingReaderTasks { + pub async fn start(&self, is_mono_vertex: bool) -> PendingReaderTasks { let pending_reader = self.lag_reader.clone(); let lag_checking_interval = self.lag_checking_interval; let refresh_interval = self.refresh_interval; @@ -673,10 +754,8 @@ impl PendingReader { }); let pending_stats = self.pending_stats.clone(); - let mvtx_name = self.mvtx_name.clone(); - let replica = self.replica; let expose_handle = tokio::spawn(async move { - expose_pending_metrics(mvtx_name, replica, refresh_interval, pending_stats).await; + expose_pending_metrics(is_mono_vertex, refresh_interval, pending_stats).await; }); PendingReaderTasks { buildup_handle, @@ -696,7 +775,7 @@ impl Drop for PendingReaderTasks { /// Periodically checks the pending messages from the source client and build the pending stats. async fn build_pending_info( - source: SourceHandle, + source: Source, lag_checking_interval: Duration, pending_stats: Arc>>, ) { @@ -725,7 +804,7 @@ async fn build_pending_info( } } -async fn fetch_pending(lag_reader: &SourceHandle) -> crate::error::Result { +async fn fetch_pending(lag_reader: &Source) -> crate::error::Result { let response: i64 = lag_reader.pending().await?.map_or(-1, |p| p as i64); // default to -1(unavailable) Ok(response) } @@ -735,8 +814,7 @@ const LOOKBACK_SECONDS_MAP: [(&str, i64); 4] = // Periodically exposes the pending metrics by calculating the average pending messages over different intervals. async fn expose_pending_metrics( - mvtx_name: String, - replica: u16, + is_mono_vertex: bool, refresh_interval: Duration, pending_stats: Arc>>, ) { @@ -751,14 +829,21 @@ async fn expose_pending_metrics( for (label, seconds) in LOOKBACK_SECONDS_MAP { let pending = calculate_pending(seconds, &pending_stats).await; if pending != -1 { - let mut metric_labels = - mvtx_forward_metric_labels(mvtx_name.clone(), replica).clone(); + let mut metric_labels = mvtx_forward_metric_labels().clone(); metric_labels.push((PENDING_PERIOD_LABEL.to_string(), label.to_string())); pending_info.insert(label, pending); - forward_mvtx_metrics() - .source_pending - .get_or_create(&metric_labels) - .set(pending); + if is_mono_vertex { + monovertex_metrics() + .pending + .get_or_create(&metric_labels) + .set(pending); + } else { + pipeline_metrics() + .forwarder + .pending + .get_or_create(&metric_labels) + .set(pending); + } } } // skip for those the pending is not implemented @@ -806,7 +891,7 @@ mod tests { use tokio::sync::mpsc::Sender; use super::*; - use crate::shared::utils::create_rpc_channel; + use crate::shared::grpc::create_rpc_channel; struct SimpleSource; #[tonic::async_trait] @@ -913,7 +998,7 @@ mod tests { // wait for the servers to start // FIXME: we need to have a better way, this is flaky - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + tokio::time::sleep(Duration::from_millis(100)).await; let metrics_state = UserDefinedContainerState::Monovertex(MonovertexContainerState { source_client: Some(SourceClient::new( create_rpc_channel(src_sock_file).await.unwrap(), @@ -991,8 +1076,7 @@ mod tests { tokio::spawn({ let pending_stats = pending_stats.clone(); async move { - expose_pending_metrics("test".to_string(), 0, refresh_interval, pending_stats) - .await; + expose_pending_metrics(true, refresh_interval, pending_stats).await; } }); // We use tokio::time::interval() as the ticker in the expose_pending_metrics() function. @@ -1004,10 +1088,10 @@ mod tests { let mut stored_values: [i64; 4] = [0; 4]; { for (i, (label, _)) in LOOKBACK_SECONDS_MAP.iter().enumerate() { - let mut metric_labels = mvtx_forward_metric_labels("test".to_string(), 0).clone(); + let mut metric_labels = mvtx_forward_metric_labels().clone(); metric_labels.push((PENDING_PERIOD_LABEL.to_string(), label.to_string())); - let guage = forward_mvtx_metrics() - .source_pending + let guage = monovertex_metrics() + .pending .get_or_create(&metric_labels) .get(); stored_values[i] = guage; @@ -1077,7 +1161,7 @@ mod tests { ); global_metrics.sdk_info.get_or_create(&sdk_labels).set(1); - let metrics = forward_mvtx_metrics(); + let metrics = monovertex_metrics(); // Use a fixed set of labels instead of the ones from mvtx_forward_metric_labels() since other test functions may also set it. let common_labels = vec![ ( @@ -1091,7 +1175,7 @@ mod tests { metrics.read_bytes_total.get_or_create(&common_labels).inc(); metrics.ack_total.get_or_create(&common_labels).inc(); metrics.dropped_total.get_or_create(&common_labels).inc(); - metrics.source_pending.get_or_create(&common_labels).set(10); + metrics.pending.get_or_create(&common_labels).set(10); metrics.e2e_time.get_or_create(&common_labels).observe(10.0); metrics.read_time.get_or_create(&common_labels).observe(3.0); metrics.ack_time.get_or_create(&common_labels).observe(2.0); diff --git a/rust/numaflow-core/src/monovertex.rs b/rust/numaflow-core/src/monovertex.rs index 8e6d9cad1..b8016624f 100644 --- a/rust/numaflow-core/src/monovertex.rs +++ b/rust/numaflow-core/src/monovertex.rs @@ -1,27 +1,15 @@ use forwarder::ForwarderBuilder; -use numaflow_pb::clients::sink::sink_client::SinkClient; -use numaflow_pb::clients::source::source_client::SourceClient; -use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; use tokio_util::sync::CancellationToken; -use tonic::transport::Channel; use tracing::info; -use crate::config::components::{sink, source, transformer}; +use crate::config::is_mono_vertex; use crate::config::monovertex::MonovertexConfig; -use crate::error::{self, Error}; -use crate::metrics; -use crate::shared::server_info::{sdk_server_info, ContainerType}; -use crate::shared::utils; -use crate::shared::utils::{ - create_rpc_channel, wait_until_sink_ready, wait_until_source_ready, - wait_until_transformer_ready, -}; -use crate::sink::{SinkClientType, SinkHandle}; -use crate::source::generator::new_generator; -use crate::source::pulsar::new_pulsar_source; -use crate::source::user_defined::new_source; -use crate::source::{SourceHandle, SourceType}; -use crate::transformer::user_defined::SourceTransformHandle; +use crate::error::{self}; +use crate::shared::create_components; +use crate::sink::SinkWriter; +use crate::source::Source; +use crate::transformer::Transformer; +use crate::{metrics, shared}; /// [forwarder] orchestrates data movement from the Source to the Sink via the optional SourceTransformer. /// The forward-a-chunk executes the following in an infinite loop till a shutdown signal is received: @@ -35,155 +23,31 @@ pub(crate) async fn start_forwarder( cln_token: CancellationToken, config: &MonovertexConfig, ) -> error::Result<()> { - let mut source_grpc_client = if let source::SourceType::UserDefined(source_config) = - &config.source_config.source_type - { - // do server compatibility check - let server_info = sdk_server_info( - source_config.server_info_path.clone().into(), - cln_token.clone(), - ) - .await?; - - let metric_labels = metrics::sdk_info_labels( - metrics::COMPONENT_MVTX.to_string(), - config.name.clone(), - server_info.language, - server_info.version, - ContainerType::Sourcer.to_string(), - ); - metrics::global_metrics() - .sdk_info - .get_or_create(&metric_labels) - .set(1); - - let mut source_grpc_client = - SourceClient::new(create_rpc_channel(source_config.socket_path.clone().into()).await?) - .max_encoding_message_size(source_config.grpc_max_message_size) - .max_encoding_message_size(source_config.grpc_max_message_size); - - wait_until_source_ready(&cln_token, &mut source_grpc_client).await?; - Some(source_grpc_client) - } else { - None - }; - - let sink_grpc_client = if let sink::SinkType::UserDefined(udsink_config) = - &config.sink_config.sink_type - { - // do server compatibility check - let server_info = sdk_server_info( - udsink_config.server_info_path.clone().into(), - cln_token.clone(), - ) - .await?; + let (source, source_grpc_client) = create_components::create_source( + config.batch_size, + config.read_timeout, + &config.source_config, + cln_token.clone(), + ) + .await?; - let metric_labels = metrics::sdk_info_labels( - metrics::COMPONENT_MVTX.to_string(), - config.name.clone(), - server_info.language, - server_info.version, - ContainerType::Sinker.to_string(), - ); - metrics::global_metrics() - .sdk_info - .get_or_create(&metric_labels) - .set(1); - - let mut sink_grpc_client = - SinkClient::new(create_rpc_channel(udsink_config.socket_path.clone().into()).await?) - .max_encoding_message_size(udsink_config.grpc_max_message_size) - .max_encoding_message_size(udsink_config.grpc_max_message_size); - - wait_until_sink_ready(&cln_token, &mut sink_grpc_client).await?; - Some(sink_grpc_client) - } else { - None - }; - - let fb_sink_grpc_client = if let Some(fb_sink) = &config.fb_sink_config { - if let sink::SinkType::UserDefined(fb_sink_config) = &fb_sink.sink_type { - // do server compatibility check - let server_info = sdk_server_info( - fb_sink_config.server_info_path.clone().into(), - cln_token.clone(), - ) - .await?; - - let metric_labels = metrics::sdk_info_labels( - metrics::COMPONENT_MVTX.to_string(), - config.name.clone(), - server_info.language, - server_info.version, - ContainerType::FbSinker.to_string(), - ); - metrics::global_metrics() - .sdk_info - .get_or_create(&metric_labels) - .set(1); - - let mut fb_sink_grpc_client = SinkClient::new( - create_rpc_channel(fb_sink_config.socket_path.clone().into()).await?, - ) - .max_encoding_message_size(fb_sink_config.grpc_max_message_size) - .max_encoding_message_size(fb_sink_config.grpc_max_message_size); - - wait_until_sink_ready(&cln_token, &mut fb_sink_grpc_client).await?; - Some(fb_sink_grpc_client) - } else { - None - } - } else { - None - }; - - let transformer_grpc_client = if let Some(transformer) = &config.transformer_config { - if let transformer::TransformerType::UserDefined(transformer_config) = - &transformer.transformer_type - { - // do server compatibility check - let server_info = sdk_server_info( - transformer_config.server_info_path.clone().into(), - cln_token.clone(), - ) - .await?; - - let metric_labels = metrics::sdk_info_labels( - metrics::COMPONENT_MVTX.to_string(), - config.name.clone(), - server_info.language, - server_info.version, - ContainerType::SourceTransformer.to_string(), - ); - - metrics::global_metrics() - .sdk_info - .get_or_create(&metric_labels) - .set(1); - - let mut transformer_grpc_client = SourceTransformClient::new( - create_rpc_channel(transformer_config.socket_path.clone().into()).await?, - ) - .max_encoding_message_size(transformer_config.grpc_max_message_size) - .max_encoding_message_size(transformer_config.grpc_max_message_size); - - wait_until_transformer_ready(&cln_token, &mut transformer_grpc_client).await?; - Some(transformer_grpc_client.clone()) - } else { - None - } - } else { - None - }; - - let source_type = fetch_source(config, &mut source_grpc_client).await?; - let (sink, fb_sink) = fetch_sink( - config, - sink_grpc_client.clone(), - fb_sink_grpc_client.clone(), + let (transformer, transformer_grpc_client) = create_components::create_transformer( + config.batch_size, + config.transformer_config.clone(), + cln_token.clone(), ) .await?; + let (sink_writer, sink_grpc_client, fb_sink_grpc_client) = + create_components::create_sink_writer( + config.batch_size, + config.read_timeout, + config.sink_config.clone(), + config.fb_sink_config.clone(), + &cln_token, + ) + .await?; + // Start the metrics server in a separate background async spawn, // This should be running throughout the lifetime of the application, hence the handle is not // joined. @@ -197,145 +61,41 @@ pub(crate) async fn start_forwarder( // start the metrics server // FIXME: what to do with the handle - utils::start_metrics_server(config.metrics_config.clone(), metrics_state).await; - - let source = SourceHandle::new(source_type, config.batch_size); - start_forwarder_with_source( - config.clone(), - source, - sink, - transformer_grpc_client, - fb_sink, - cln_token, - ) - .await?; - - info!("Forwarder stopped gracefully"); - Ok(()) -} - -// fetch right the source. -// source_grpc_client can be optional because it is valid only for user-defined source. -async fn fetch_source( - config: &MonovertexConfig, - source_grpc_client: &mut Option>, -) -> crate::Result { - match &config.source_config.source_type { - source::SourceType::Generator(generator_config) => { - let (source_read, source_ack, lag_reader) = - new_generator(generator_config.clone(), config.batch_size)?; - Ok(SourceType::Generator(source_read, source_ack, lag_reader)) - } - source::SourceType::UserDefined(_) => { - let Some(source_grpc_client) = source_grpc_client.clone() else { - return Err(Error::Config( - "Configuration type is user-defined, however no grpc client is provided".into(), - )); - }; - let (source_read, source_ack, lag_reader) = - new_source(source_grpc_client, config.batch_size, config.read_timeout).await?; - Ok(SourceType::UserDefinedSource( - source_read, - source_ack, - lag_reader, - )) - } - source::SourceType::Pulsar(pulsar_config) => { - let pulsar = new_pulsar_source( - pulsar_config.clone(), - config.batch_size, - config.read_timeout, - ) - .await?; - Ok(SourceType::Pulsar(pulsar)) - } - } -} - -// fetch the actor handle for the sink. -// sink_grpc_client can be optional because it is valid only for user-defined sink. -async fn fetch_sink( - config: &MonovertexConfig, - sink_grpc_client: Option>, - fallback_sink_grpc_client: Option>, -) -> crate::Result<(SinkHandle, Option)> { - let fb_sink = match fallback_sink_grpc_client { - Some(fallback_sink) => Some( - SinkHandle::new( - SinkClientType::UserDefined(fallback_sink), - config.batch_size, - ) - .await?, - ), - None => { - if let Some(fb_sink_config) = &config.fb_sink_config { - if let sink::SinkType::Log(_) = &fb_sink_config.sink_type { - let log = SinkHandle::new(SinkClientType::Log, config.batch_size).await?; - return Ok((log, None)); - } - if let sink::SinkType::Blackhole(_) = &fb_sink_config.sink_type { - let blackhole = - SinkHandle::new(SinkClientType::Blackhole, config.batch_size).await?; - return Ok((blackhole, None)); - } - return Err(Error::Config( - "No valid Fallback Sink configuration found".to_string(), - )); - } + shared::metrics::start_metrics_server(config.metrics_config.clone(), metrics_state).await; - None - } - }; + start(config.clone(), source, sink_writer, transformer, cln_token).await?; - if let Some(sink_client) = sink_grpc_client { - let sink = - SinkHandle::new(SinkClientType::UserDefined(sink_client), config.batch_size).await?; - return Ok((sink, fb_sink)); - } - if let sink::SinkType::Log(_) = &config.sink_config.sink_type { - let log = SinkHandle::new(SinkClientType::Log, config.batch_size).await?; - return Ok((log, fb_sink)); - } - if let sink::SinkType::Blackhole(_) = &config.sink_config.sink_type { - let blackhole = SinkHandle::new(SinkClientType::Blackhole, config.batch_size).await?; - return Ok((blackhole, fb_sink)); - } - Err(Error::Config( - "No valid Sink configuration found".to_string(), - )) + Ok(()) } -async fn start_forwarder_with_source( +async fn start( mvtx_config: MonovertexConfig, - source: SourceHandle, - sink: SinkHandle, - transformer_client: Option>, - fallback_sink: Option, + source: Source, + sink: SinkWriter, + transformer: Option, cln_token: CancellationToken, ) -> error::Result<()> { // start the pending reader to publish pending metrics - let pending_reader = utils::create_pending_reader(&mvtx_config, source.clone()).await; - let _pending_reader_handle = pending_reader.start().await; + let pending_reader = + shared::metrics::create_pending_reader(&mvtx_config.metrics_config, source.clone()).await; + let _pending_reader_handle = pending_reader.start(is_mono_vertex()).await; - let mut forwarder_builder = ForwarderBuilder::new(source, sink, mvtx_config, cln_token); + let mut forwarder_builder = ForwarderBuilder::new(source, sink, cln_token); // add transformer if exists - if let Some(transformer_client) = transformer_client { - let transformer = SourceTransformHandle::new(transformer_client).await?; - forwarder_builder = forwarder_builder.source_transformer(transformer); + if let Some(transformer_client) = transformer { + forwarder_builder = forwarder_builder.transformer(transformer_client); } - // add fallback sink if exists - if let Some(fallback_sink) = fallback_sink { - forwarder_builder = forwarder_builder.fallback_sink_writer(fallback_sink); - } // build the final forwarder - let mut forwarder = forwarder_builder.build(); + let forwarder = forwarder_builder.build(); + + info!("Forwarder is starting..."); // start the forwarder, it will return only on Signal forwarder.start().await?; - info!("Forwarder stopped gracefully"); + info!("Forwarder stopped gracefully."); Ok(()) } diff --git a/rust/numaflow-core/src/monovertex/forwarder.rs b/rust/numaflow-core/src/monovertex/forwarder.rs index f84cade17..dc77154c6 100644 --- a/rust/numaflow-core/src/monovertex/forwarder.rs +++ b/rust/numaflow-core/src/monovertex/forwarder.rs @@ -1,598 +1,162 @@ -use std::collections::HashMap; +//! The forwarder for [MonoVertex] at its core orchestrates message movement asynchronously using +//! [Stream] over channels between the components. The messages send over this channel using +//! [Actor Pattern]. +//! +//! ```text +//! (source) --[c]--> (transformer)* --[c]--> (sink) +//! +//! [c] - channel +//! * - optional +//! ``` +//! +//! Most of the data move forward except for the ack which can happen only after the Write. +//! ```text +//! (Read) +-------> (UDF) -------> (Write) + +//! | | +//! | | +//! +-------> {Ack} <----------------+ +//! +//! {} -> Listens on a OneShot +//! () -> Streaming Interface +//! ``` +//! +//! [MonoVertex]: https://numaflow.numaproj.io/core-concepts/monovertex/ +//! [Stream]: https://docs.rs/tokio-stream/latest/tokio_stream/wrappers/struct.ReceiverStream.html +//! [Actor Pattern]: https://ryhl.io/blog/actors-with-tokio/ -use chrono::Utc; -use log::warn; -use tokio::time::sleep; use tokio_util::sync::CancellationToken; -use tracing::{debug, info}; -use crate::config::components::sink::{OnFailureStrategy, RetryConfig}; -use crate::config::monovertex::MonovertexConfig; use crate::error; -use crate::message::{Message, Offset, ResponseStatusFromSink}; -use crate::metrics; -use crate::metrics::forward_mvtx_metrics; -use crate::sink::SinkHandle; +use crate::sink::SinkWriter; +use crate::source::Source; +use crate::transformer::Transformer; use crate::Error; -use crate::{source::SourceHandle, transformer::user_defined::SourceTransformHandle}; /// Forwarder is responsible for reading messages from the source, applying transformation if /// transformer is present, writing the messages to the sink, and then acknowledging the messages /// back to the source. pub(crate) struct Forwarder { - source_reader: SourceHandle, - sink_writer: SinkHandle, - source_transformer: Option, - fb_sink_writer: Option, + source: Source, + transformer: Option, + sink_writer: SinkWriter, cln_token: CancellationToken, - common_labels: Vec<(String, String)>, - mvtx_config: MonovertexConfig, } -/// ForwarderBuilder is used to build a Forwarder instance with optional fields. pub(crate) struct ForwarderBuilder { - source_reader: SourceHandle, - sink_writer: SinkHandle, + source: Source, + sink_writer: SinkWriter, cln_token: CancellationToken, - source_transformer: Option, - fb_sink_writer: Option, - mvtx_config: MonovertexConfig, + transformer: Option, } impl ForwarderBuilder { /// Create a new builder with mandatory fields pub(crate) fn new( - source_reader: SourceHandle, - sink_writer: SinkHandle, - mvtx_config: MonovertexConfig, + streaming_source: Source, + streaming_sink: SinkWriter, cln_token: CancellationToken, ) -> Self { Self { - source_reader, - sink_writer, + source: streaming_source, + sink_writer: streaming_sink, cln_token, - source_transformer: None, - fb_sink_writer: None, - mvtx_config, + transformer: None, } } /// Set the optional transformer client - pub(crate) fn source_transformer(mut self, transformer_client: SourceTransformHandle) -> Self { - self.source_transformer = Some(transformer_client); + pub(crate) fn transformer(mut self, transformer: Transformer) -> Self { + self.transformer = Some(transformer); self } - /// Set the optional fallback client - pub(crate) fn fallback_sink_writer(mut self, fallback_client: SinkHandle) -> Self { - self.fb_sink_writer = Some(fallback_client); - self - } - - /// Build the Forwarder instance + /// Build the StreamingForwarder instance #[must_use] pub(crate) fn build(self) -> Forwarder { - let common_labels = metrics::mvtx_forward_metric_labels( - self.mvtx_config.name.clone(), - self.mvtx_config.replica, - ) - .clone(); Forwarder { - source_reader: self.source_reader, + source: self.source, sink_writer: self.sink_writer, - source_transformer: self.source_transformer, - fb_sink_writer: self.fb_sink_writer, + transformer: self.transformer, cln_token: self.cln_token, - mvtx_config: self.mvtx_config, - common_labels, } } } impl Forwarder { - /// start starts the forward-a-chunk loop and exits only after a chunk has been forwarded and ack'ed. - /// this means that, in the happy path scenario a block is always completely processed. - /// this function will return on any error and will cause end up in a non-0 exit code. - pub(crate) async fn start(&mut self) -> error::Result<()> { - let mut processed_msgs_count: usize = 0; - let mut last_forwarded_at = std::time::Instant::now(); - info!("Forwarder has started"); - loop { - let start_time = tokio::time::Instant::now(); - if self.cln_token.is_cancelled() { - break; - } - - processed_msgs_count += self.read_and_process_messages().await?; - - // if the last forward was more than 1 second ago, forward a chunk print the number of messages forwarded - // TODO: add histogram details (p99, etc.) - if last_forwarded_at.elapsed().as_millis() >= 1000 { - info!( - "Forwarded {} messages at time {}", - processed_msgs_count, - Utc::now() - ); - processed_msgs_count = 0; - last_forwarded_at = std::time::Instant::now(); + pub(crate) async fn start(&self) -> error::Result<()> { + let (messages_stream, reader_handle) = + self.source.streaming_read(self.cln_token.clone())?; + + let (transformed_messages_stream, transformer_handle) = + if let Some(transformer) = &self.transformer { + let (transformed_messages_rx, transformer_handle) = + transformer.transform_stream(messages_stream)?; + (transformed_messages_rx, Some(transformer_handle)) + } else { + (messages_stream, None) + }; + + let sink_writer_handle = self + .sink_writer + .streaming_write(transformed_messages_stream, self.cln_token.clone()) + .await?; + + match tokio::try_join!( + reader_handle, + transformer_handle.unwrap_or_else(|| tokio::spawn(async { Ok(()) })), + sink_writer_handle, + ) { + Ok((reader_result, transformer_result, sink_writer_result)) => { + reader_result?; + transformer_result?; + sink_writer_result?; + Ok(()) } - - forward_mvtx_metrics() - .e2e_time - .get_or_create(&self.common_labels) - .observe(start_time.elapsed().as_micros() as f64); - } - Ok(()) - } - - /// Read messages from the source, apply transformation if transformer is present, - /// write the messages to the sink, if fallback messages are present write them to the fallback sink, - /// and then acknowledge the messages back to the source. - async fn read_and_process_messages(&mut self) -> error::Result { - let start_time = tokio::time::Instant::now(); - let messages = self.source_reader.read().await.map_err(|e| { - Error::Forwarder(format!("Failed to read messages from source {:?}", e)) - })?; - - debug!( - "Read batch size: {} and latency - {}ms", - messages.len(), - start_time.elapsed().as_millis() - ); - - forward_mvtx_metrics() - .read_time - .get_or_create(&self.common_labels) - .observe(start_time.elapsed().as_micros() as f64); - - // read returned 0 messages, nothing more to be done. - if messages.is_empty() { - return Ok(0); - } - - let msg_count = messages.len() as u64; - forward_mvtx_metrics() - .read_total - .get_or_create(&self.common_labels) - .inc_by(msg_count); - - let (offsets, bytes_count): (Vec, u64) = messages.iter().try_fold( - (Vec::with_capacity(messages.len()), 0), - |(mut offsets, mut bytes_count), msg| { - if let Some(offset) = &msg.offset { - offsets.push(offset.clone()); - bytes_count += msg.value.len() as u64; - Ok((offsets, bytes_count)) - } else { - Err(Error::Forwarder("Message offset is missing".to_string())) - } - }, - )?; - - forward_mvtx_metrics() - .read_bytes_total - .get_or_create(&self.common_labels) - .inc_by(bytes_count); - - // Apply transformation if transformer is present - let transformed_messages = self.apply_transformer(messages).await.map_err(|e| { - Error::Forwarder(format!( - "Failed to apply transformation to messages {:?}", - e - )) - })?; - - // Write the messages to the sink - self.write_to_sink(transformed_messages) - .await - .map_err(|e| Error::Forwarder(format!("Failed to write messages to sink {:?}", e)))?; - - // Acknowledge the messages back to the source - self.acknowledge_messages(offsets).await.map_err(|e| { - Error::Forwarder(format!( - "Failed to acknowledge messages back to source {:?}", + Err(e) => Err(Error::Forwarder(format!( + "Error while joining reader, transformer, and sink writer: {:?}", e - )) - })?; - - Ok(msg_count as usize) - } - - // Applies transformation to the messages if transformer is present - // we concurrently apply transformation to all the messages. - async fn apply_transformer(&mut self, messages: Vec) -> error::Result> { - let Some(client) = &mut self.source_transformer else { - // return early if there is no transformer - return Ok(messages); - }; - - let start_time = tokio::time::Instant::now(); - let results = client.transform(messages).await?; - - debug!( - "Transformer latency - {}ms", - start_time.elapsed().as_millis() - ); - forward_mvtx_metrics() - .transformer - .time - .get_or_create(&self.common_labels) - .observe(start_time.elapsed().as_micros() as f64); - - Ok(results) - } - - // Writes the messages to the sink and handles fallback messages if present - async fn write_to_sink(&mut self, messages: Vec) -> error::Result<()> { - let msg_count = messages.len() as u64; - - if messages.is_empty() { - return Ok(()); - } - - // this start time is for tracking the total time taken - let start_time_e2e = tokio::time::Instant::now(); - - let mut attempts = 0; - let mut error_map = HashMap::new(); - let mut fallback_msgs = Vec::new(); - // start with the original set of message to be sent. - // we will overwrite this vec with failed messages and will keep retrying. - let mut messages_to_send = messages; - - // only breaks out of this loop based on the retry strategy unless all the messages have been written to sink - // successfully. - let retry_config = &self - .mvtx_config - .sink_config - .retry_config - .clone() - .unwrap_or_default(); - - loop { - while attempts < retry_config.sink_max_retry_attempts { - let status = self - .write_to_sink_once( - &mut error_map, - &mut fallback_msgs, - &mut messages_to_send, - retry_config, - ) - .await; - match status { - Ok(true) => break, - Ok(false) => { - attempts += 1; - warn!( - "Retry attempt {} due to retryable error. Errors: {:?}", - attempts, error_map - ); - } - Err(e) => Err(e)?, - } - - // if we are shutting down, stop the retry - if self.cln_token.is_cancelled() { - return Err(Error::Sink( - "Cancellation token triggered during retry".to_string(), - )); - } - } - - // If after the retries we still have messages to process, handle the post retry failures - let need_retry = self.handle_sink_post_retry( - &mut attempts, - &mut error_map, - &mut fallback_msgs, - &mut messages_to_send, - retry_config, - ); - - match need_retry { - // if we are done with the messages, break the loop - Ok(false) => break, - // if we need to retry, reset the attempts and error_map - Ok(true) => { - attempts = 0; - error_map.clear(); - } - Err(e) => Err(e)?, - } - } - - // If there are fallback messages, write them to the fallback sink - if !fallback_msgs.is_empty() { - self.handle_fallback_messages(fallback_msgs, retry_config) - .await?; - } - - forward_mvtx_metrics() - .sink - .time - .get_or_create(&self.common_labels) - .observe(start_time_e2e.elapsed().as_micros() as f64); - - // update the metric for number of messages written to the sink - // this included primary and fallback sink - forward_mvtx_metrics() - .sink - .write_total - .get_or_create(&self.common_labels) - .inc_by(msg_count); - Ok(()) - } - - /// Handles the post retry failures based on the configured strategy, - /// returns true if we need to retry, else false. - fn handle_sink_post_retry( - &mut self, - attempts: &mut u16, - error_map: &mut HashMap, - fallback_msgs: &mut Vec, - messages_to_send: &mut Vec, - retry_config: &RetryConfig, - ) -> error::Result { - // if we are done with the messages, break the loop - if messages_to_send.is_empty() { - return Ok(false); - } - // check what is the failure strategy in the config - let strategy = retry_config.sink_retry_on_fail_strategy.clone(); - match strategy { - // if we need to retry, return true - OnFailureStrategy::Retry => { - warn!( - "Using onFailure Retry, Retry attempts {} completed", - attempts - ); - return Ok(true); - } - // if we need to drop the messages, log and return false - OnFailureStrategy::Drop => { - // log that we are dropping the messages as requested - warn!( - "Dropping messages after {} attempts. Errors: {:?}", - attempts, error_map - ); - // update the metrics - forward_mvtx_metrics() - .dropped_total - .get_or_create(&self.common_labels) - .inc_by(messages_to_send.len() as u64); - } - // if we need to move the messages to the fallback, return false - OnFailureStrategy::Fallback => { - // log that we are moving the messages to the fallback as requested - warn!( - "Moving messages to fallback after {} attempts. Errors: {:?}", - attempts, error_map - ); - // move the messages to the fallback messages - fallback_msgs.append(messages_to_send); - } - } - // if we are done with the messages, break the loop - Ok(false) - } - - /// Writes to sink once and will return true if successful, else false. Please note that it - /// mutates is incoming fields. - async fn write_to_sink_once( - &mut self, - error_map: &mut HashMap, - fallback_msgs: &mut Vec, - messages_to_send: &mut Vec, - retry_config: &RetryConfig, - ) -> error::Result { - let start_time = tokio::time::Instant::now(); - match self.sink_writer.sink(messages_to_send.clone()).await { - Ok(response) => { - debug!("Sink latency - {}ms", start_time.elapsed().as_millis()); - - // create a map of id to result, since there is no strict requirement - // for the udsink to return the results in the same order as the requests - let result_map = response - .into_iter() - .map(|resp| (resp.id, resp.status)) - .collect::>(); - - error_map.clear(); - // drain all the messages that were successfully written - // and keep only the failed messages to send again - // construct the error map for the failed messages - messages_to_send.retain(|msg| { - if let Some(result) = result_map.get(&msg.id.to_string()) { - return match result { - ResponseStatusFromSink::Success => false, - ResponseStatusFromSink::Failed(err_msg) => { - *error_map.entry(err_msg.clone()).or_insert(0) += 1; - true - } - ResponseStatusFromSink::Fallback => { - fallback_msgs.push(msg.clone()); - false - } - }; - } - false - }); - - // if all messages are successfully written, break the loop - if messages_to_send.is_empty() { - return Ok(true); - } - - sleep(tokio::time::Duration::from_millis( - retry_config.sink_retry_interval_in_ms as u64, - )) - .await; - - // we need to retry - Ok(false) - } - Err(e) => Err(e), - } - } - - // Writes the fallback messages to the fallback sink - async fn handle_fallback_messages( - &mut self, - fallback_msgs: Vec, - retry_config: &RetryConfig, - ) -> error::Result<()> { - if self.fb_sink_writer.is_none() { - return Err(Error::Sink( - "Response contains fallback messages but no fallback sink is configured" - .to_string(), - )); - } - - let fallback_client = self.fb_sink_writer.as_mut().unwrap(); - let mut attempts = 0; - let mut fallback_error_map = HashMap::new(); - // start with the original set of message to be sent. - // we will overwrite this vec with failed messages and will keep retrying. - let mut messages_to_send = fallback_msgs; - let fb_msg_count = messages_to_send.len() as u64; - - let default_retry = retry_config - .sink_default_retry_strategy - .clone() - .backoff - .unwrap(); - let max_attempts = default_retry.steps.unwrap(); - let sleep_interval = default_retry.interval.unwrap(); - - while attempts < max_attempts { - let start_time = tokio::time::Instant::now(); - match fallback_client.sink(messages_to_send.clone()).await { - Ok(fb_response) => { - debug!( - "Fallback sink latency - {}ms", - start_time.elapsed().as_millis() - ); - - // create a map of id to result, since there is no strict requirement - // for the udsink to return the results in the same order as the requests - let result_map = fb_response - .into_iter() - .map(|resp| (resp.id, resp.status)) - .collect::>(); - - let mut contains_fallback_status = false; - - fallback_error_map.clear(); - // drain all the messages that were successfully written - // and keep only the failed messages to send again - // construct the error map for the failed messages - messages_to_send.retain(|msg| { - if let Some(result) = result_map.get(&msg.id.to_string()) { - return match result { - ResponseStatusFromSink::Success => false, - ResponseStatusFromSink::Failed(err_msg) => { - *fallback_error_map.entry(err_msg.clone()).or_insert(0) += 1; - true - } - ResponseStatusFromSink::Fallback => { - contains_fallback_status = true; - false - } - }; - } else { - false - } - }); - - // specifying fallback status in fallback response is not allowed - if contains_fallback_status { - return Err(Error::Sink( - "Fallback response contains fallback status".to_string(), - )); - } - - attempts += 1; - - if messages_to_send.is_empty() { - break; - } - - warn!( - "Retry attempt {} due to retryable error. Errors: {:?}", - attempts, fallback_error_map - ); - sleep(tokio::time::Duration::from(sleep_interval)).await; - } - Err(e) => return Err(e), - } - } - if !messages_to_send.is_empty() { - return Err(Error::Sink(format!( - "Failed to write messages to fallback sink after {} attempts. Errors: {:?}", - attempts, fallback_error_map - ))); + ))), } - // increment the metric for the fallback sink write - forward_mvtx_metrics() - .fb_sink - .write_total - .get_or_create(&self.common_labels) - .inc_by(fb_msg_count); - Ok(()) - } - - // Acknowledge the messages back to the source - async fn acknowledge_messages(&mut self, offsets: Vec) -> error::Result<()> { - let n = offsets.len(); - let start_time = tokio::time::Instant::now(); - - self.source_reader.ack(offsets).await?; - - debug!("Ack latency - {}ms", start_time.elapsed().as_millis()); - - forward_mvtx_metrics() - .ack_time - .get_or_create(&self.common_labels) - .observe(start_time.elapsed().as_micros() as f64); - - forward_mvtx_metrics() - .ack_total - .get_or_create(&self.common_labels) - .inc_by(n as u64); - Ok(()) } } #[cfg(test)] mod tests { use std::collections::HashSet; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; use chrono::Utc; use numaflow::source::{Message, Offset, SourceReadRequest}; - use numaflow::{sink, source, sourcetransform}; - use numaflow_pb::clients::sink::sink_client::SinkClient; + use numaflow::{source, sourcetransform}; use numaflow_pb::clients::source::source_client::SourceClient; use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; - use tokio::sync::mpsc; + use tempfile::TempDir; use tokio::sync::mpsc::Sender; + use tokio::sync::oneshot; + use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use crate::monovertex::forwarder::ForwarderBuilder; - use crate::shared::utils::create_rpc_channel; - use crate::sink::{SinkClientType, SinkHandle}; + use crate::shared::grpc::create_rpc_channel; + use crate::sink::{SinkClientType, SinkWriterBuilder}; use crate::source::user_defined::new_source; - use crate::source::SourceHandle; - use crate::source::SourceType; - use crate::transformer::user_defined::SourceTransformHandle; + use crate::source::{Source, SourceType}; + use crate::transformer::Transformer; + use crate::Result; struct SimpleSource { - yet_to_be_acked: std::sync::RwLock>, + num: usize, + sent_count: AtomicUsize, + yet_to_ack: std::sync::RwLock>, } impl SimpleSource { - fn new() -> Self { + fn new(num: usize) -> Self { Self { - yet_to_be_acked: std::sync::RwLock::new(HashSet::new()), + num, + sent_count: AtomicUsize::new(0), + yet_to_ack: std::sync::RwLock::new(HashSet::new()), } } } @@ -602,32 +166,35 @@ mod tests { async fn read(&self, request: SourceReadRequest, transmitter: Sender) { let event_time = Utc::now(); let mut message_offsets = Vec::with_capacity(request.count); - for i in 0..2 { + + for i in 0..request.count { + if self.sent_count.load(Ordering::SeqCst) >= self.num { + return; + } + let offset = format!("{}-{}", event_time.timestamp_nanos_opt().unwrap(), i); transmitter .send(Message { - value: "test-message".as_bytes().to_vec(), + value: b"hello".to_vec(), event_time, offset: Offset { offset: offset.clone().into_bytes(), partition_id: 0, }, - keys: vec!["test-key".to_string()], + keys: vec![], headers: Default::default(), }) .await .unwrap(); - message_offsets.push(offset) + message_offsets.push(offset); + self.sent_count.fetch_add(1, Ordering::SeqCst); } - self.yet_to_be_acked - .write() - .unwrap() - .extend(message_offsets) + self.yet_to_ack.write().unwrap().extend(message_offsets); } async fn ack(&self, offsets: Vec) { for offset in offsets { - self.yet_to_be_acked + self.yet_to_ack .write() .unwrap() .remove(&String::from_utf8(offset.offset).unwrap()); @@ -635,464 +202,127 @@ mod tests { } async fn pending(&self) -> usize { - self.yet_to_be_acked.read().unwrap().len() + self.num - self.sent_count.load(Ordering::SeqCst) + + self.yet_to_ack.read().unwrap().len() } async fn partitions(&self) -> Option> { - Some(vec![0]) + Some(vec![1, 2]) } } struct SimpleTransformer; + #[tonic::async_trait] impl sourcetransform::SourceTransformer for SimpleTransformer { async fn transform( &self, input: sourcetransform::SourceTransformRequest, ) -> Vec { - let keys = input - .keys - .iter() - .map(|k| k.clone() + "-transformed") - .collect(); - let message = sourcetransform::Message::new(input.value, Utc::now()) - .keys(keys) - .tags(vec![]); + let message = sourcetransform::Message::new(input.value, Utc::now()).keys(input.keys); vec![message] } } - struct InMemorySink { - sender: Sender, - } - - impl InMemorySink { - fn new(sender: Sender) -> Self { - Self { sender } - } - } - - #[tonic::async_trait] - impl sink::Sinker for InMemorySink { - async fn sink(&self, mut input: mpsc::Receiver) -> Vec { - let mut responses: Vec = Vec::new(); - while let Some(datum) = input.recv().await { - let response = match std::str::from_utf8(&datum.value) { - Ok(_) => { - self.sender - .send(Message { - value: datum.value.clone(), - event_time: datum.event_time, - offset: Offset { - offset: "test-offset".to_string().into_bytes(), - partition_id: 0, - }, - keys: datum.keys.clone(), - headers: Default::default(), - }) - .await - .unwrap(); - sink::Response::ok(datum.id) - } - Err(e) => { - sink::Response::failure(datum.id, format!("Invalid UTF-8 sequence: {}", e)) - } - }; - responses.push(response); - } - responses - } - } - #[tokio::test] - async fn test_forwarder_source_sink() { - let batch_size = 100; - let timeout_in_ms = 1000; - - let (sink_tx, mut sink_rx) = mpsc::channel(10); - - // Start the source server - let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let source_sock_file = tmp_dir.path().join("source.sock"); - let server_info_file = tmp_dir.path().join("source-server-info"); - - let server_info = server_info_file.clone(); - let source_socket = source_sock_file.clone(); - let source_server_handle = tokio::spawn(async move { - source::Server::new(SimpleSource::new()) - .with_socket_file(source_socket) - .with_server_info_file(server_info) - .start_with_shutdown(source_shutdown_rx) - .await - .unwrap(); - }); - - // Start the sink server - let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); - let sink_tmp_dir = tempfile::TempDir::new().unwrap(); - let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); - let server_info_file = sink_tmp_dir.path().join("sink-server-info"); - - let server_info = server_info_file.clone(); - let sink_socket = sink_sock_file.clone(); - let sink_server_handle = tokio::spawn(async move { - sink::Server::new(InMemorySink::new(sink_tx)) - .with_socket_file(sink_socket) - .with_server_info_file(server_info) - .start_with_shutdown(sink_shutdown_rx) - .await - .unwrap(); - }); - - // Start the transformer server - let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let transformer_sock_file = tmp_dir.path().join("transformer.sock"); - let server_info_file = tmp_dir.path().join("transformer-server-info"); - - let server_info = server_info_file.clone(); - let transformer_socket = transformer_sock_file.clone(); - let transformer_server_handle = tokio::spawn(async move { - sourcetransform::Server::new(SimpleTransformer) - .with_socket_file(transformer_socket) - .with_server_info_file(server_info) - .start_with_shutdown(transformer_shutdown_rx) - .await - .unwrap(); - }); - - // Wait for the servers to start - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + async fn test_forwarder() { + // create the source which produces x number of messages let cln_token = CancellationToken::new(); - let (source_read, source_ack, source_lag_reader) = new_source( - SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()), - batch_size, - Duration::from_millis(timeout_in_ms), - ) - .await - .expect("failed to connect to source server"); - - let src_reader = SourceHandle::new( - SourceType::UserDefinedSource(source_read, source_ack, source_lag_reader), - batch_size, - ); - - let sink_grpc_client = SinkClient::new(create_rpc_channel(sink_sock_file).await.unwrap()); - let sink_writer = - SinkHandle::new(SinkClientType::UserDefined(sink_grpc_client), batch_size) - .await - .expect("failed to connect to sink server"); - - let transformer_client = SourceTransformHandle::new(SourceTransformClient::new( - create_rpc_channel(transformer_sock_file).await.unwrap(), - )) - .await - .expect("failed to connect to transformer server"); - - let mut forwarder = ForwarderBuilder::new( - src_reader, - sink_writer, - Default::default(), - cln_token.clone(), - ) - .source_transformer(transformer_client) - .build(); - - // Assert the received message in a different task - let assert_handle = tokio::spawn(async move { - let received_message = sink_rx.recv().await.unwrap(); - assert_eq!(received_message.value, "test-message".as_bytes()); - assert_eq!( - received_message.keys, - vec!["test-key-transformed".to_string()] - ); - cln_token.cancel(); - }); - - forwarder.start().await.unwrap(); - - // Wait for the assertion task to complete - assert_handle.await.unwrap(); - - drop(forwarder); - // stop the servers - source_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - source_server_handle - .await - .expect("failed to join source server task"); - - transformer_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - transformer_server_handle - .await - .expect("failed to join transformer server task"); - - sink_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - sink_server_handle - .await - .expect("failed to join sink server task"); - } - - struct ErrorSink {} - - #[tonic::async_trait] - impl sink::Sinker for ErrorSink { - async fn sink( - &self, - mut input: tokio::sync::mpsc::Receiver, - ) -> Vec { - let mut responses = vec![]; - while let Some(datum) = input.recv().await { - responses.append(&mut vec![sink::Response::failure( - datum.id, - "error".to_string(), - )]); - } - responses - } - } - - #[tokio::test] - async fn test_forwarder_sink_error() { - let batch_size = 100; - let timeout_in_ms = 1000; - - // Start the source server - let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let source_sock_file = tmp_dir.path().join("source.sock"); + let (src_shutdown_tx, src_shutdown_rx) = oneshot::channel(); + let tmp_dir = TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("source.sock"); let server_info_file = tmp_dir.path().join("source-server-info"); let server_info = server_info_file.clone(); - let source_socket = source_sock_file.clone(); - let source_server_handle = tokio::spawn(async move { - source::Server::new(SimpleSource::new()) - .with_socket_file(source_socket) - .with_server_info_file(server_info) - .start_with_shutdown(source_shutdown_rx) - .await - .unwrap(); - }); - - // Start the sink server - let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); - let sink_tmp_dir = tempfile::TempDir::new().unwrap(); - let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); - let server_info_file = sink_tmp_dir.path().join("sink-server-info"); - - let server_info = server_info_file.clone(); - let sink_socket = sink_sock_file.clone(); - let sink_server_handle = tokio::spawn(async move { - sink::Server::new(ErrorSink {}) - .with_socket_file(sink_socket) + let server_socket = sock_file.clone(); + let source_handle = tokio::spawn(async move { + // a simple source which generates total of 100 messages + source::Server::new(SimpleSource::new(100)) + .with_socket_file(server_socket) .with_server_info_file(server_info) - .start_with_shutdown(sink_shutdown_rx) + .start_with_shutdown(src_shutdown_rx) .await - .unwrap(); - }); - - // Wait for the servers to start - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - - let cln_token = CancellationToken::new(); - - let (source_read, source_ack, lag_reader) = new_source( - SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()), - batch_size, - Duration::from_millis(timeout_in_ms), - ) - .await - .expect("failed to connect to source server"); - - let source_reader = SourceHandle::new( - SourceType::UserDefinedSource(source_read, source_ack, lag_reader), - batch_size, - ); - - let sink_client = SinkClient::new(create_rpc_channel(sink_sock_file).await.unwrap()); - let sink_writer = SinkHandle::new(SinkClientType::UserDefined(sink_client), batch_size) - .await - .expect("failed to connect to sink server"); - - let mut forwarder = ForwarderBuilder::new( - source_reader, - sink_writer, - Default::default(), - cln_token.clone(), - ) - .build(); - - let cancel_handle = tokio::spawn(async move { - tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - cln_token.cancel(); + .unwrap() }); - let forwarder_result = forwarder.start().await; - assert!(forwarder_result.is_err()); - cancel_handle.await.unwrap(); + // wait for the server to start + // TODO: flaky + tokio::time::sleep(Duration::from_millis(100)).await; - // stop the servers - drop(forwarder); - source_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - source_server_handle - .await - .expect("failed to join source server task"); + let client = SourceClient::new(create_rpc_channel(sock_file).await.unwrap()); - sink_shutdown_tx - .send(()) - .expect("failed to send sink shutdown signal"); - sink_server_handle + let (src_read, src_ack, lag_reader) = new_source(client, 5, Duration::from_millis(1000)) .await - .expect("failed to join sink server task"); - } - - // Sink that returns status fallback - struct FallbackSender {} - - #[tonic::async_trait] - impl sink::Sinker for FallbackSender { - async fn sink(&self, mut input: mpsc::Receiver) -> Vec { - let mut responses = vec![]; - while let Some(datum) = input.recv().await { - responses.append(&mut vec![sink::Response::fallback(datum.id)]); - } - responses - } - } - - #[tokio::test] - async fn test_fb_sink() { - let batch_size = 100; - - let (sink_tx, mut sink_rx) = mpsc::channel(10); - - // Start the source server - let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let source_sock_file = tmp_dir.path().join("source.sock"); - let server_info_file = tmp_dir.path().join("source-server-info"); - - let server_info = server_info_file.clone(); - let source_socket = source_sock_file.clone(); - let source_server_handle = tokio::spawn(async move { - source::Server::new(SimpleSource::new()) - .with_socket_file(source_socket) - .with_server_info_file(server_info) - .start_with_shutdown(source_shutdown_rx) - .await - .unwrap(); - }); - - // Start the primary sink server (which returns status fallback) - let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); - let sink_tmp_dir = tempfile::TempDir::new().unwrap(); - let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); - let server_info_file = sink_tmp_dir.path().join("sink-server-info"); + .map_err(|e| panic!("failed to create source reader: {:?}", e)) + .unwrap(); - let server_info = server_info_file.clone(); - let sink_socket = sink_sock_file.clone(); - let sink_server_handle = tokio::spawn(async move { - sink::Server::new(FallbackSender {}) - .with_socket_file(sink_socket) - .with_server_info_file(server_info) - .start_with_shutdown(sink_shutdown_rx) - .await - .unwrap(); - }); + let source = Source::new( + 5, + SourceType::UserDefinedSource(src_read, src_ack, lag_reader), + ); - // Start the fb sink server - let (fb_sink_shutdown_tx, fb_sink_shutdown_rx) = tokio::sync::oneshot::channel(); - let fb_sink_tmp_dir = tempfile::TempDir::new().unwrap(); - let fb_sink_sock_file = fb_sink_tmp_dir.path().join("fb-sink.sock"); - let server_info_file = fb_sink_tmp_dir.path().join("fb-sinker-server-info"); + // create a transformer + let (st_shutdown_tx, st_shutdown_rx) = oneshot::channel(); + let tmp_dir = TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sourcetransform.sock"); + let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); let server_info = server_info_file.clone(); - let fb_sink_socket = fb_sink_sock_file.clone(); - let fb_sink_server_handle = tokio::spawn(async move { - sink::Server::new(InMemorySink::new(sink_tx)) - .with_socket_file(fb_sink_socket) + let server_socket = sock_file.clone(); + let transformer_handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer) + .with_socket_file(server_socket) .with_server_info_file(server_info) - .start_with_shutdown(fb_sink_shutdown_rx) + .start_with_shutdown(st_shutdown_rx) .await - .unwrap(); + .expect("server failed"); }); - // Wait for the servers to start + // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let cln_token = CancellationToken::new(); - - let (source_read, source_ack, source_lag_reader) = new_source( - SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()), - 500, - Duration::from_millis(100), - ) - .await - .expect("failed to connect to source server"); - - let source = SourceHandle::new( - SourceType::UserDefinedSource(source_read, source_ack, source_lag_reader), - batch_size, - ); - - let sink_client = SinkClient::new(create_rpc_channel(sink_sock_file).await.unwrap()); - let sink_writer = SinkHandle::new(SinkClientType::UserDefined(sink_client), batch_size) - .await - .expect("failed to connect to sink server"); + let client = SourceTransformClient::new(create_rpc_channel(sock_file).await.unwrap()); + let transformer = Transformer::new(10, 10, client).await.unwrap(); - let fb_sink_writer = SinkClient::new(create_rpc_channel(fb_sink_sock_file).await.unwrap()); - let fb_sink_writer = - SinkHandle::new(SinkClientType::UserDefined(fb_sink_writer), batch_size) + let sink_writer = + SinkWriterBuilder::new(10, Duration::from_millis(100), SinkClientType::Log) + .build() .await - .expect("failed to connect to fb sink server"); + .unwrap(); - let mut forwarder = - ForwarderBuilder::new(source, sink_writer, Default::default(), cln_token.clone()) - .fallback_sink_writer(fb_sink_writer) - .build(); + // create the forwarder with the source, transformer, and writer + let forwarder = ForwarderBuilder::new(source.clone(), sink_writer, cln_token.clone()) + .transformer(transformer) + .build(); - let assert_handle = tokio::spawn(async move { - let received_message = sink_rx.recv().await.unwrap(); - assert_eq!(received_message.value, "test-message".as_bytes()); - assert_eq!(received_message.keys, vec!["test-key".to_string()]); - cln_token.cancel(); + let forwarder_handle: JoinHandle> = tokio::spawn(async move { + forwarder.start().await?; + Ok(()) }); - forwarder.start().await.unwrap(); - - assert_handle.await.unwrap(); - - drop(forwarder); - // stop the servers - source_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - source_server_handle - .await - .expect("failed to join source server task"); - - sink_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - sink_server_handle - .await - .expect("failed to join sink server task"); + // wait for one sec to check if the pending becomes zero, because all the messages + // should be read and acked; if it doesn't, then fail the test + let tokio_result = tokio::time::timeout(Duration::from_secs(1), async move { + loop { + let pending = source.pending().await.unwrap(); + if pending == Some(0) { + break; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + }) + .await; - fb_sink_shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - fb_sink_server_handle - .await - .expect("failed to join fb sink server task"); + assert!( + tokio_result.is_ok(), + "Timeout occurred before pending became zero" + ); + cln_token.cancel(); + forwarder_handle.await.unwrap().unwrap(); + st_shutdown_tx.send(()).unwrap(); + src_shutdown_tx.send(()).unwrap(); + source_handle.await.unwrap(); + transformer_handle.await.unwrap(); } } diff --git a/rust/numaflow-core/src/pipeline.rs b/rust/numaflow-core/src/pipeline.rs index ef49bdc75..e29ebe504 100644 --- a/rust/numaflow-core/src/pipeline.rs +++ b/rust/numaflow-core/src/pipeline.rs @@ -1,30 +1,21 @@ -use crate::config::components::source::SourceType; -use crate::config::pipeline; -use crate::config::pipeline::PipelineConfig; -use crate::metrics::{PipelineContainerState, UserDefinedContainerState}; -use crate::pipeline::isb::jetstream::reader::JetstreamReader; -use crate::pipeline::isb::jetstream::WriterHandle; -use crate::shared::server_info::sdk_server_info; -use crate::shared::utils; -use crate::shared::utils::{ - create_rpc_channel, start_metrics_server, wait_until_source_ready, wait_until_transformer_ready, -}; -use crate::sink::SinkWriter; -use crate::source::generator::new_generator; -use crate::source::pulsar::new_pulsar_source; -use crate::source::user_defined::new_source; -use crate::transformer::user_defined::SourceTransformHandle; -use crate::{config, error, source, Result}; +use std::time::Duration; + use async_nats::jetstream::Context; use async_nats::{jetstream, ConnectOptions}; use futures::future::try_join_all; -use numaflow_pb::clients::sink::sink_client::SinkClient; -use numaflow_pb::clients::source::source_client::SourceClient; -use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; -use std::collections::HashMap; -use std::time::Duration; use tokio_util::sync::CancellationToken; -use tonic::transport::Channel; +use tracing::info; + +use crate::config::pipeline; +use crate::config::pipeline::{PipelineConfig, SinkVtxConfig, SourceVtxConfig}; +use crate::metrics::{PipelineContainerState, UserDefinedContainerState}; +use crate::pipeline::forwarder::source_forwarder; +use crate::pipeline::isb::jetstream::reader::JetstreamReader; +use crate::pipeline::isb::jetstream::ISBWriter; +use crate::shared::create_components; +use crate::shared::create_components::create_sink_writer; +use crate::shared::metrics::start_metrics_server; +use crate::{error, Result}; mod forwarder; mod isb; @@ -34,117 +25,142 @@ pub(crate) async fn start_forwarder( cln_token: CancellationToken, config: PipelineConfig, ) -> Result<()> { - let js_context = create_js_context(config.js_client_config.clone()).await?; - match &config.vertex_config { pipeline::VertexType::Source(source) => { - let buffer_writers = - create_buffer_writers(&config, js_context.clone(), cln_token.clone()).await?; - - let (source_type, source_grpc_client) = - create_source_type(source, &config, cln_token.clone()).await?; - let (transformer, transformer_grpc_client) = - create_transformer(source, cln_token.clone()).await?; - - start_metrics_server( - config.metrics_config.clone(), - UserDefinedContainerState::Pipeline(PipelineContainerState::Source(( - source_grpc_client.clone(), - transformer_grpc_client.clone(), - ))), - ) - .await; - - let source_handle = source::SourceHandle::new(source_type, config.batch_size); - let mut forwarder = forwarder::source_forwarder::ForwarderBuilder::new( - source_handle, - transformer, - buffer_writers, - cln_token.clone(), - config.clone(), - ) - .build(); - forwarder.start().await?; + info!("Starting source forwarder"); + start_source_forwarder(cln_token, config.clone(), source.clone()).await?; } pipeline::VertexType::Sink(sink) => { - // Create buffer readers for each partition - let buffer_readers = create_buffer_readers(&config, js_context.clone()).await?; - - // Create sink writers and clients - let mut sink_writers = Vec::new(); - for _ in &buffer_readers { - let (sink_writer, sink_grpc_client, fb_sink_grpc_client) = - create_sink_writer(&config, sink, cln_token.clone()).await?; - sink_writers.push((sink_writer, sink_grpc_client, fb_sink_grpc_client)); - } + info!("Starting sink forwarder"); + start_sink_forwarder(cln_token, config.clone(), sink.clone()).await?; + } + } + Ok(()) +} - // Start the metrics server with one of the clients - if let Some((_, sink, fb_sink)) = sink_writers.first() { - start_metrics_server( - config.metrics_config.clone(), - UserDefinedContainerState::Pipeline(PipelineContainerState::Sink(( - sink.clone(), - fb_sink.clone(), - ))), - ) - .await; - } +async fn start_source_forwarder( + cln_token: CancellationToken, + config: PipelineConfig, + source_config: SourceVtxConfig, +) -> Result<()> { + let js_context = create_js_context(config.js_client_config.clone()).await?; - // Start a new forwarder for each buffer reader - let mut forwarder_tasks = Vec::new(); - for (buffer_reader, (sink_writer, _, _)) in buffer_readers.into_iter().zip(sink_writers) - { - let forwarder = forwarder::sink_forwarder::SinkForwarder::new( - buffer_reader, - sink_writer, - cln_token.clone(), - ) - .await; + let buffer_writer = create_buffer_writer(&config, js_context.clone(), cln_token.clone()).await; - let task = tokio::spawn({ - let config = config.clone(); - async move { forwarder.start(config.clone()).await } - }); + let (source, source_grpc_client) = create_components::create_source( + config.batch_size, + config.read_timeout, + &source_config.source_config, + cln_token.clone(), + ) + .await?; + let (transformer, transformer_grpc_client) = create_components::create_transformer( + config.batch_size, + source_config.transformer_config.clone(), + cln_token.clone(), + ) + .await?; - forwarder_tasks.push(task); - } + start_metrics_server( + config.metrics_config.clone(), + UserDefinedContainerState::Pipeline(PipelineContainerState::Source(( + source_grpc_client.clone(), + transformer_grpc_client.clone(), + ))), + ) + .await; - try_join_all(forwarder_tasks) - .await - .map_err(|e| error::Error::Forwarder(e.to_string()))?; - } + let forwarder = + source_forwarder::SourceForwarderBuilder::new(source, buffer_writer, cln_token.clone()); + + let forwarder = if let Some(transformer) = transformer { + forwarder.with_transformer(transformer).build() + } else { + forwarder.build() + }; + + forwarder.start().await?; + Ok(()) +} + +async fn start_sink_forwarder( + cln_token: CancellationToken, + config: PipelineConfig, + sink: SinkVtxConfig, +) -> Result<()> { + let js_context = create_js_context(config.js_client_config.clone()).await?; + + // Create buffer readers for each partition + let buffer_readers = create_buffer_readers(&config, js_context.clone()).await?; + + // Create sink writers and clients + let mut sink_writers = Vec::new(); + for _ in &buffer_readers { + let (sink_writer, sink_grpc_client, fb_sink_grpc_client) = create_sink_writer( + config.batch_size, + config.read_timeout, + sink.sink_config.clone(), + sink.fb_sink_config.clone(), + &cln_token, + ) + .await?; + sink_writers.push((sink_writer, sink_grpc_client, fb_sink_grpc_client)); + } + + // Start the metrics server with one of the clients + if let Some((_, sink, fb_sink)) = sink_writers.first() { + start_metrics_server( + config.metrics_config.clone(), + UserDefinedContainerState::Pipeline(PipelineContainerState::Sink(( + sink.clone(), + fb_sink.clone(), + ))), + ) + .await; + } + + // Start a new forwarder for each buffer reader + let mut forwarder_tasks = Vec::new(); + for (buffer_reader, (sink_writer, _, _)) in buffer_readers.into_iter().zip(sink_writers) { + info!(%buffer_reader, "Starting forwarder for buffer reader"); + let forwarder = forwarder::sink_forwarder::SinkForwarder::new( + buffer_reader, + sink_writer, + cln_token.clone(), + ) + .await; + + let task = tokio::spawn({ + let config = config.clone(); + async move { forwarder.start(config.clone()).await } + }); + + forwarder_tasks.push(task); } + + try_join_all(forwarder_tasks) + .await + .map_err(|e| error::Error::Forwarder(e.to_string()))?; + info!("All forwarders have stopped successfully"); Ok(()) } -/// Creates the required buffer writers based on the pipeline configuration, it creates a map -/// of vertex name to a list of writer handles. -async fn create_buffer_writers( +async fn create_buffer_writer( config: &PipelineConfig, js_context: Context, cln_token: CancellationToken, -) -> Result>> { - let mut buffer_writers = HashMap::new(); - for to_vertex in &config.to_vertex_config { - let writers = to_vertex - .writer_config - .streams +) -> ISBWriter { + ISBWriter::new( + config.paf_concurrency, + config + .to_vertex_config .iter() - .map(|stream| { - WriterHandle::new( - stream.0.clone(), - stream.1, - to_vertex.writer_config.clone(), - js_context.clone(), - config.batch_size, - config.paf_batch_size, - cln_token.clone(), - ) - }) - .collect(); - buffer_writers.insert(to_vertex.name.clone(), writers); - } - Ok(buffer_writers) + .map(|tv| tv.writer_config.clone()) + .collect(), + js_context, + cln_token, + ) + .await } async fn create_buffer_readers( @@ -153,17 +169,16 @@ async fn create_buffer_readers( ) -> Result> { // Only the reader config of the first "from" vertex is needed, as all "from" vertices currently write // to a common buffer, in the case of a join. - let reader_config = config + let reader_config = &config .from_vertex_config .first() .ok_or_else(|| error::Error::Config("No from vertex config found".to_string()))? - .reader_config - .clone(); + .reader_config; let mut readers = Vec::new(); for stream in &reader_config.streams { let reader = JetstreamReader::new( - stream.0.clone(), + stream.0, stream.1, js_context.clone(), reader_config.clone(), @@ -175,129 +190,6 @@ async fn create_buffer_readers( Ok(readers) } -// Creates a sink writer based on the pipeline configuration -async fn create_sink_writer( - config: &PipelineConfig, - sink_vtx_config: &pipeline::SinkVtxConfig, - cln_token: CancellationToken, -) -> Result<( - SinkWriter, - Option>, - Option>, -)> { - let (sink_handle, sink_grpc_client) = utils::create_sink_handle( - config.batch_size, - &sink_vtx_config.sink_config.sink_type, - &cln_token, - ) - .await?; - let (fb_sink_handle, fb_sink_grpc_client) = match &sink_vtx_config.fb_sink_config { - None => (None, None), - Some(fb_sink_config) => { - let (handle, client) = - utils::create_sink_handle(config.batch_size, &fb_sink_config.sink_type, &cln_token) - .await?; - (Some(handle), client) - } - }; - - Ok(( - SinkWriter::new( - config.batch_size, - config.read_timeout, - sink_vtx_config.clone(), - sink_handle, - fb_sink_handle, - ) - .await?, - sink_grpc_client, - fb_sink_grpc_client, - )) -} - -/// Creates a source type based on the pipeline configuration -async fn create_source_type( - source: &pipeline::SourceVtxConfig, - config: &PipelineConfig, - cln_token: CancellationToken, -) -> Result<(source::SourceType, Option>)> { - match &source.source_config.source_type { - SourceType::Generator(generator_config) => { - let (generator_read, generator_ack, generator_lag) = - new_generator(generator_config.clone(), config.batch_size)?; - Ok(( - source::SourceType::Generator(generator_read, generator_ack, generator_lag), - None, - )) - } - SourceType::UserDefined(udsource_config) => { - _ = sdk_server_info( - udsource_config.server_info_path.clone().into(), - cln_token.clone(), - ) - .await?; - - // TODO: Add sdk info metric - - let mut source_grpc_client = SourceClient::new( - create_rpc_channel(udsource_config.socket_path.clone().into()).await?, - ) - .max_encoding_message_size(udsource_config.grpc_max_message_size) - .max_encoding_message_size(udsource_config.grpc_max_message_size); - wait_until_source_ready(&cln_token, &mut source_grpc_client).await?; - let (ud_read, ud_ack, ud_lag) = new_source( - source_grpc_client.clone(), - config.batch_size, - config.read_timeout, - ) - .await?; - Ok(( - source::SourceType::UserDefinedSource(ud_read, ud_ack, ud_lag), - Some(source_grpc_client), - )) - } - SourceType::Pulsar(pulsar_config) => { - let pulsar_source = new_pulsar_source( - pulsar_config.clone(), - config.batch_size, - config.read_timeout, - ) - .await?; - Ok((source::SourceType::Pulsar(pulsar_source), None)) - } - } -} -/// Creates a transformer if it is configured in the pipeline -async fn create_transformer( - source: &pipeline::SourceVtxConfig, - cln_token: CancellationToken, -) -> Result<( - Option, - Option>, -)> { - if let Some(transformer_config) = &source.transformer_config { - if let config::components::transformer::TransformerType::UserDefined(ud_transformer) = - &transformer_config.transformer_type - { - _ = sdk_server_info(ud_transformer.socket_path.clone().into(), cln_token.clone()) - .await?; - // TODO: Add sdk info metric - - let mut transformer_grpc_client = SourceTransformClient::new( - create_rpc_channel(ud_transformer.socket_path.clone().into()).await?, - ) - .max_encoding_message_size(ud_transformer.grpc_max_message_size) - .max_encoding_message_size(ud_transformer.grpc_max_message_size); - wait_until_transformer_ready(&cln_token, &mut transformer_grpc_client).await?; - return Ok(( - Some(SourceTransformHandle::new(transformer_grpc_client.clone()).await?), - Some(transformer_grpc_client), - )); - } - } - Ok((None, None)) -} - /// Creates a jetstream context based on the provided configuration async fn create_js_context(config: pipeline::isb::jetstream::ClientConfig) -> Result { // TODO: make these configurable. today this is hardcoded on Golang code too. @@ -305,6 +197,7 @@ async fn create_js_context(config: pipeline::isb::jetstream::ClientConfig) -> Re .max_reconnects(None) // -1 for unlimited reconnects .ping_interval(Duration::from_secs(3)) .max_reconnects(None) + .ping_interval(Duration::from_secs(3)) .retry_on_initial_connect(); if let (Some(user), Some(password)) = (config.user, config.password) { @@ -320,6 +213,7 @@ async fn create_js_context(config: pipeline::isb::jetstream::ClientConfig) -> Re #[cfg(test)] mod tests { + use std::collections::HashMap; use std::time::Duration; use async_nats::jetstream; @@ -327,7 +221,6 @@ mod tests { use futures::StreamExt; use super::*; - use crate::config::components::metrics::MetricsConfig; use crate::config::components::sink::{BlackholeConfig, SinkConfig, SinkType}; use crate::config::components::source::GeneratorConfig; @@ -393,7 +286,7 @@ mod tests { vertex_name: "in".to_string(), replica: 0, batch_size: 1000, - paf_batch_size: 30000, + paf_concurrency: 30000, read_timeout: Duration::from_secs(1), js_client_config: isb::jetstream::ClientConfig { url: "localhost:4222".to_string(), @@ -516,8 +409,9 @@ mod tests { .unwrap(); // Publish some messages into the stream - use crate::message::{Message, MessageID, Offset, StringOffset}; use chrono::{TimeZone, Utc}; + + use crate::message::{Message, MessageID, Offset, StringOffset}; let message = Message { keys: vec!["key1".to_string()], value: vec![1, 2, 3].into(), @@ -560,7 +454,7 @@ mod tests { vertex_name: "in".to_string(), replica: 0, batch_size: 1000, - paf_batch_size: 30000, + paf_concurrency: 30000, read_timeout: Duration::from_secs(1), js_client_config: isb::jetstream::ClientConfig { url: "localhost:4222".to_string(), @@ -575,7 +469,7 @@ mod tests { streams: streams .iter() .enumerate() - .map(|(i, key)| (key.to_string(), i as u16)) + .map(|(i, key)| (*key, i as u16)) .collect(), wip_ack_interval: Duration::from_secs(1), }, diff --git a/rust/numaflow-core/src/pipeline/forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder.rs index 6e8774c32..9c1f8deef 100644 --- a/rust/numaflow-core/src/pipeline/forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder.rs @@ -1,12 +1,29 @@ -/// Forwarder consists -/// (Read) +-------> (UDF) -------> (Write) + -/// | | -/// | | -/// +-------> {Ack} <----------------+ -/// -/// {} -> Listens on a OneShot -/// () -> Streaming Interface -/// +//! The forwarder for [Pipeline] at its core orchestrates message movement asynchronously using +//! [Stream] over channels between the components. The messages send over this channel using +//! [Actor Pattern]. +//! +//! ```text +//! (source) --[c]--> (transformer)* --[c]--> ==> (map)* --[c]--> ===> (reducer)* --[c]--> ===> --[c]--> (sink) +//! +//! ==> - ISB +//! [c] - channel +//! * - optional +//! ``` +//! +//! Most of the data move forward except for the ack which can happen only after the Write. +//! ```text +//! (Read) +-------> (UDF) -------> (Write) + +//! | | +//! | | +//! +-------> {Ack} <----------------+ +//! +//! {} -> Listens on a OneShot +//! () -> Streaming Interface +//! ``` +//! +//! [Pipeline]: https://numaflow.numaproj.io/core-concepts/pipeline/ +//! [Stream]: https://docs.rs/tokio-stream/latest/tokio_stream/wrappers/struct.ReceiverStream.html +//! [Actor Pattern]: https://ryhl.io/blog/actors-with-tokio/ /// Forwarder specific to Sink where reader is ISB, UDF is not present, while /// the Write is User-defined Sink or builtin. diff --git a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs index 74846e931..7153a4ff1 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/sink_forwarder.rs @@ -6,7 +6,8 @@ use crate::pipeline::isb::jetstream::reader::JetstreamReader; use crate::sink::SinkWriter; use crate::Result; -/// Sink forwarder reads messages from the jetstream and writes to the sink. +/// Sink forwarder is a component which starts a streaming reader and a sink writer +/// and manages the lifecycle of these components. pub(crate) struct SinkForwarder { jetstream_reader: JetstreamReader, sink_writer: SinkWriter, @@ -29,14 +30,14 @@ impl SinkForwarder { pub(crate) async fn start(&self, pipeline_config: PipelineConfig) -> Result<()> { // Create a child cancellation token only for the reader so that we can stop the reader first let reader_cancellation_token = self.cln_token.child_token(); - let (read_messages_rx, reader_handle) = self + let (read_messages_stream, reader_handle) = self .jetstream_reader - .start(reader_cancellation_token.clone(), &pipeline_config) + .streaming_read(reader_cancellation_token.clone(), &pipeline_config) .await?; let sink_writer_handle = self .sink_writer - .start(read_messages_rx, self.cln_token.clone()) + .streaming_write(read_messages_stream, self.cln_token.clone()) .await?; // Join the reader and sink writer diff --git a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs index 9ba2ba94f..624633444 100644 --- a/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs +++ b/rust/numaflow-core/src/pipeline/forwarder/source_forwarder.rs @@ -1,197 +1,340 @@ -use std::collections::HashMap; - -use chrono::Utc; use tokio_util::sync::CancellationToken; -use tracing::{debug, info}; -use crate::config::pipeline::PipelineConfig; use crate::error; use crate::error::Error; -use crate::message::{Message, Offset}; -use crate::metrics::{forward_pipeline_metrics, pipeline_forward_read_metric_labels}; -use crate::pipeline::isb::jetstream::WriterHandle; -use crate::source::SourceHandle; -use crate::transformer::user_defined::SourceTransformHandle; - -/// Simple source forwarder that reads messages from the source, applies transformation if present -/// and writes to the messages to ISB. -pub(crate) struct Forwarder { - source_reader: SourceHandle, - transformer: Option, - buffer_writers: HashMap>, +use crate::pipeline::isb::jetstream::ISBWriter; +use crate::source::Source; +use crate::transformer::Transformer; + +/// Source forwarder is the orchestrator which starts streaming source, a transformer, and an isb writer +/// and manages the lifecycle of these components. +pub(crate) struct SourceForwarder { + source: Source, + transformer: Option, + writer: ISBWriter, cln_token: CancellationToken, - config: PipelineConfig, } -pub(crate) struct ForwarderBuilder { - source_reader: SourceHandle, - transformer: Option, - buffer_writers: HashMap>, +/// ForwarderBuilder is a builder for Forwarder. +pub(crate) struct SourceForwarderBuilder { + streaming_source: Source, + transformer: Option, + writer: ISBWriter, cln_token: CancellationToken, - config: PipelineConfig, } -impl ForwarderBuilder { +impl SourceForwarderBuilder { pub(crate) fn new( - source_reader: SourceHandle, - transformer: Option, - buffer_writers: HashMap>, + streaming_source: Source, + writer: ISBWriter, cln_token: CancellationToken, - config: PipelineConfig, ) -> Self { Self { - source_reader, - transformer, - buffer_writers, + streaming_source, + transformer: None, + writer, cln_token, - config, } } - pub(crate) fn build(self) -> Forwarder { - Forwarder { - source_reader: self.source_reader, + pub(crate) fn with_transformer(mut self, transformer: Transformer) -> Self { + self.transformer = Some(transformer); + self + } + + pub(crate) fn build(self) -> SourceForwarder { + SourceForwarder { + source: self.streaming_source, transformer: self.transformer, - buffer_writers: self.buffer_writers, + writer: self.writer, cln_token: self.cln_token, - config: self.config, } } } -impl Forwarder { - pub(crate) async fn start(&mut self) -> Result<(), Error> { - let mut processed_msgs_count: usize = 0; - let mut last_forwarded_at = std::time::Instant::now(); - info!("Forwarder has started"); - loop { - tokio::time::Instant::now(); - if self.cln_token.is_cancelled() { - break; +impl SourceForwarder { + /// Start the forwarder by starting the streaming source, transformer, and writer. + pub(crate) async fn start(&self) -> error::Result<()> { + // RETHINK: only source should stop when the token is cancelled, transformer and writer should drain the streams + // and then stop. + let (read_messages_stream, reader_handle) = + self.source.streaming_read(self.cln_token.clone())?; + + // start the transformer if it is present + let (transformed_messages_stream, transformer_handle) = + if let Some(transformer) = &self.transformer { + let (transformed_messages_stream, transformer_handle) = + transformer.transform_stream(read_messages_stream)?; + (transformed_messages_stream, Some(transformer_handle)) + } else { + (read_messages_stream, None) + }; + + let writer_handle = self + .writer + .streaming_write(transformed_messages_stream) + .await?; + + match tokio::try_join!( + reader_handle, + transformer_handle.unwrap_or_else(|| tokio::spawn(async { Ok(()) })), + writer_handle, + ) { + Ok((reader_result, transformer_result, sink_writer_result)) => { + reader_result?; + transformer_result?; + sink_writer_result?; + Ok(()) } - processed_msgs_count += self.read_and_process_messages().await?; - - if last_forwarded_at.elapsed().as_millis() >= 1000 { - info!( - "Forwarded {} messages at time in the pipeline {}", - processed_msgs_count, - Utc::now() - ); - processed_msgs_count = 0; - last_forwarded_at = std::time::Instant::now(); + Err(e) => Err(Error::Forwarder(format!( + "Error while joining reader, transformer, and sink writer: {:?}", + e + ))), + } + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::time::Duration; + + use async_nats::jetstream; + use async_nats::jetstream::{consumer, stream}; + use chrono::Utc; + use numaflow::source::{Message, Offset, SourceReadRequest}; + use numaflow::{source, sourcetransform}; + use numaflow_pb::clients::source::source_client::SourceClient; + use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; + use tempfile::TempDir; + use tokio::sync::mpsc::Sender; + use tokio::sync::oneshot; + use tokio::task::JoinHandle; + use tokio_util::sync::CancellationToken; + + use crate::config::pipeline::isb::BufferWriterConfig; + use crate::pipeline::isb::jetstream::ISBWriter; + use crate::pipeline::source_forwarder::SourceForwarderBuilder; + use crate::shared::grpc::create_rpc_channel; + use crate::source::user_defined::new_source; + use crate::source::{Source, SourceType}; + use crate::transformer::Transformer; + use crate::Result; + + struct SimpleSource { + num: usize, + sent_count: AtomicUsize, + yet_to_ack: std::sync::RwLock>, + } + + impl SimpleSource { + fn new(num: usize) -> Self { + Self { + num, + sent_count: AtomicUsize::new(0), + yet_to_ack: std::sync::RwLock::new(HashSet::new()), } } - Ok(()) } - async fn read_and_process_messages(&mut self) -> Result { - let start_time = tokio::time::Instant::now(); - let messages = self.source_reader.read().await.map_err(|e| { - Error::Forwarder(format!("Failed to read messages from source {:?}", e)) - })?; + #[tonic::async_trait] + impl source::Sourcer for SimpleSource { + async fn read(&self, request: SourceReadRequest, transmitter: Sender) { + let event_time = Utc::now(); + let mut message_offsets = Vec::with_capacity(request.count); - debug!( - "Read batch size: {} and latency - {}ms", - messages.len(), - start_time.elapsed().as_millis() - ); + for i in 0..request.count { + if self.sent_count.load(Ordering::SeqCst) >= self.num { + return; + } - let labels = pipeline_forward_read_metric_labels( - self.config.pipeline_name.as_ref(), - self.config.vertex_name.as_ref(), - self.config.vertex_name.as_ref(), - "Source", - self.config.replica, - ); - forward_pipeline_metrics() - .forwarder - .data_read - .get_or_create(labels) - .inc_by(messages.len() as u64); - - if messages.is_empty() { - return Ok(0); + let offset = format!("{}-{}", event_time.timestamp_nanos_opt().unwrap(), i); + transmitter + .send(Message { + value: b"hello".to_vec(), + event_time, + offset: Offset { + offset: offset.clone().into_bytes(), + partition_id: 0, + }, + keys: vec![], + headers: Default::default(), + }) + .await + .unwrap(); + message_offsets.push(offset); + self.sent_count.fetch_add(1, Ordering::SeqCst); + } + self.yet_to_ack.write().unwrap().extend(message_offsets); } - let msg_count = messages.len() as u64; - let offsets: Vec = - messages - .iter() - .try_fold(Vec::with_capacity(messages.len()), |mut offsets, msg| { - if let Some(offset) = &msg.offset { - offsets.push(offset.clone()); - Ok(offsets) - } else { - Err(Error::Forwarder("Message offset is missing".to_string())) - } - })?; - - // Apply transformation if transformer is present - // FIXME: we should stream the responses back and write it to the jetstream writer - let transformed_messages = self.apply_transformer(messages).await.map_err(|e| { - Error::Forwarder(format!( - "Failed to apply transformation to messages {:?}", - e - )) - })?; + async fn ack(&self, offsets: Vec) { + for offset in offsets { + self.yet_to_ack + .write() + .unwrap() + .remove(&String::from_utf8(offset.offset).unwrap()); + } + } - self.write_to_jetstream(transformed_messages).await?; + async fn pending(&self) -> usize { + self.num - self.sent_count.load(Ordering::SeqCst) + + self.yet_to_ack.read().unwrap().len() + } + + async fn partitions(&self) -> Option> { + Some(vec![1, 2]) + } + } - self.source_reader.ack(offsets).await?; + struct SimpleTransformer; - Ok(msg_count as usize) + #[tonic::async_trait] + impl sourcetransform::SourceTransformer for SimpleTransformer { + async fn transform( + &self, + input: sourcetransform::SourceTransformRequest, + ) -> Vec { + let message = sourcetransform::Message::new(input.value, Utc::now()).keys(input.keys); + vec![message] + } } - /// Applies the transformer to the messages. - async fn apply_transformer(&mut self, messages: Vec) -> error::Result> { - let Some(client) = &mut self.transformer else { - // return early if there is no transformer - return Ok(messages); - }; + #[cfg(feature = "nats-tests")] + #[tokio::test] + async fn test_source_forwarder() { + // create the source which produces x number of messages + let cln_token = CancellationToken::new(); + + let (src_shutdown_tx, src_shutdown_rx) = oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("source.sock"); + let server_info_file = tmp_dir.path().join("source-server-info"); - let start_time = tokio::time::Instant::now(); - let results = client.transform(messages).await?; + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let source_handle = tokio::spawn(async move { + // a simple source which generates total of 100 messages + source::Server::new(SimpleSource::new(100)) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(src_shutdown_rx) + .await + .unwrap() + }); + + // wait for the server to start + // TODO: flaky + tokio::time::sleep(Duration::from_millis(100)).await; + + let client = SourceClient::new(create_rpc_channel(sock_file).await.unwrap()); - debug!( - "Transformer latency - {}ms", - start_time.elapsed().as_millis() + let (src_read, src_ack, lag_reader) = new_source(client, 5, Duration::from_millis(1000)) + .await + .map_err(|e| panic!("failed to create source reader: {:?}", e)) + .unwrap(); + + let source = Source::new( + 5, + SourceType::UserDefinedSource(src_read, src_ack, lag_reader), ); - Ok(results) - } + // create a js writer + let js_url = "localhost:4222"; + // Create JetStream context + let client = async_nats::connect(js_url).await.unwrap(); + let context = jetstream::new(client); - /// Writes messages to the jetstream, it writes to all the downstream buffers. - async fn write_to_jetstream(&mut self, messages: Vec) -> Result<(), Error> { - let start_time = tokio::time::Instant::now(); - if messages.is_empty() { - return Ok(()); - } + let stream_name = "test_source_forwarder"; + let _stream = context + .get_or_create_stream(stream::Config { + name: stream_name.into(), + subjects: vec![stream_name.into()], + max_message_size: 1024, + ..Default::default() + }) + .await + .unwrap(); - let mut results = Vec::new(); + let _consumer = context + .create_consumer_on_stream( + consumer::Config { + name: Some(stream_name.to_string()), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }, + stream_name, + ) + .await + .unwrap(); - // write to all the buffers - for i in 0..messages.len() { - for writers in self.buffer_writers.values() { - // write to the stream writers in round-robin fashion - let writer = &writers[i % writers.len()]; // FIXME: we need to shuffle based on the message id hash - let result = writer.write(messages[i].clone()).await?; - results.push(result); - } - } + let writer = ISBWriter::new( + 10, + vec![BufferWriterConfig { + streams: vec![(stream_name.to_string(), 0)], + ..Default::default() + }], + context.clone(), + cln_token.clone(), + ) + .await; + + // create a transformer + let (st_shutdown_tx, st_shutdown_rx) = oneshot::channel(); + let tmp_dir = TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sourcetransform.sock"); + let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); - // await for all the result futures to complete - // FIXME: we should not await for the results to complete, that will make it sequential - for result in results { - // we can use the ack to publish watermark etc - result + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let transformer_handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(st_shutdown_rx) .await - .map_err(|e| Error::Forwarder(format!("Failed to write to jetstream {:?}", e)))??; - } - debug!( - len = messages.len(), - elapsed_ms = start_time.elapsed().as_millis(), - "Wrote messages to jetstream", + .expect("server failed"); + }); + + // wait for the server to start + tokio::time::sleep(Duration::from_millis(100)).await; + + let client = SourceTransformClient::new(create_rpc_channel(sock_file).await.unwrap()); + let transformer = Transformer::new(10, 10, client).await.unwrap(); + + // create the forwarder with the source, transformer, and writer + let forwarder = SourceForwarderBuilder::new(source.clone(), writer, cln_token.clone()) + .with_transformer(transformer) + .build(); + + let forwarder_handle: JoinHandle> = tokio::spawn(async move { + forwarder.start().await?; + Ok(()) + }); + + // wait for one sec to check if the pending becomes zero, because all the messages + // should be read and acked; if it doesn't, then fail the test + let tokio_result = tokio::time::timeout(Duration::from_secs(1), async move { + loop { + let pending = source.pending().await.unwrap(); + if pending == Some(0) { + break; + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + }) + .await; + + assert!( + tokio_result.is_ok(), + "Timeout occurred before pending became zero" ); - Ok(()) + cln_token.cancel(); + forwarder_handle.await.unwrap().unwrap(); + st_shutdown_tx.send(()).unwrap(); + src_shutdown_tx.send(()).unwrap(); + source_handle.await.unwrap(); + transformer_handle.await.unwrap(); } } diff --git a/rust/numaflow-core/src/pipeline/isb.rs b/rust/numaflow-core/src/pipeline/isb.rs index 53ab02707..e59d0b983 100644 --- a/rust/numaflow-core/src/pipeline/isb.rs +++ b/rust/numaflow-core/src/pipeline/isb.rs @@ -1 +1,3 @@ +// TODO: implement a simple ISB and a trait for ISB + pub(crate) mod jetstream; diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream.rs b/rust/numaflow-core/src/pipeline/isb/jetstream.rs index ccba63a8d..9f3635861 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream.rs @@ -1,13 +1,18 @@ use async_nats::jetstream::Context; use bytes::BytesMut; -use tokio::sync::mpsc::Receiver; -use tokio::sync::{mpsc, oneshot}; +use tokio::task::JoinHandle; +use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; +use tracing::info; use crate::config::pipeline::isb::BufferWriterConfig; use crate::error::Error; -use crate::message::{Message, Offset}; -use crate::pipeline::isb::jetstream::writer::JetstreamWriter; +use crate::message::{ReadAck, ReadMessage}; +use crate::metrics::{pipeline_isb_metric_labels, pipeline_metrics}; +use crate::pipeline::isb::jetstream::writer::{ + JetstreamWriter, PafResolver, ResolveAndPublishResult, +}; use crate::Result; /// JetStream Writer is responsible for writing messages to JetStream ISB. @@ -19,115 +24,114 @@ pub(super) mod writer; pub(crate) mod reader; -/// ISB Writer accepts an Actor pattern based messages. -#[derive(Debug)] -struct ActorMessage { - /// Write the messages to ISB - message: Message, - /// once the message has been successfully written, we can let the sender know. - /// This can be used to trigger Acknowledgement of the message from the Reader. - // FIXME: concrete type and better name - callee_tx: oneshot::Sender>, -} - -impl ActorMessage { - fn new(message: Message, callee_tx: oneshot::Sender>) -> Self { - Self { message, callee_tx } - } -} +/// Stream is a combination of stream name and partition id. +type Stream = (String, u16); -/// WriterActor will handle the messages and write them to the Jetstream ISB. -struct WriterActor { - js_writer: JetstreamWriter, - receiver: Receiver, +/// StreamingJetstreamWriter is a streaming version of JetstreamWriter. It accepts a stream of messages +/// and writes them to Jetstream ISB. It also has a PAF resolver actor to resolve the PAFs. +#[derive(Clone)] +pub(crate) struct ISBWriter { + paf_concurrency: usize, + config: Vec, + writer: JetstreamWriter, } -impl WriterActor { - fn new(js_writer: JetstreamWriter, receiver: Receiver) -> Self { - Self { - js_writer, - receiver, - } - } - - async fn handle_message(&mut self, msg: ActorMessage) { - let payload: BytesMut = msg - .message - .try_into() - .expect("message serialization should not fail"); - self.js_writer.write(payload.into(), msg.callee_tx).await - } - - async fn run(&mut self) { - while let Some(msg) = self.receiver.recv().await { - self.handle_message(msg).await; - } - } -} - -/// WriterHandle is the handle to the WriterActor. It exposes a method to send messages to the Actor. -pub(crate) struct WriterHandle { - sender: mpsc::Sender, -} - -impl WriterHandle { - pub(crate) fn new( - stream_name: String, - partition_idx: u16, - config: BufferWriterConfig, +impl ISBWriter { + pub(crate) async fn new( + paf_concurrency: usize, + config: Vec, js_ctx: Context, - batch_size: usize, - paf_batch_size: usize, cancel_token: CancellationToken, ) -> Self { - let (sender, receiver) = mpsc::channel::(batch_size); + info!(?config, paf_concurrency, "Streaming JetstreamWriter",); let js_writer = JetstreamWriter::new( - stream_name, - partition_idx, - config, + // flatten the streams across the config + config.iter().flat_map(|c| c.streams.clone()).collect(), + config.first().unwrap().clone(), js_ctx, - paf_batch_size, cancel_token.clone(), ); - let mut actor = WriterActor::new(js_writer.clone(), receiver); - - tokio::spawn(async move { - actor.run().await; - }); - Self { sender } + Self { + config, + writer: js_writer, + paf_concurrency, + } } - pub(crate) async fn write( + /// Starts reading messages from the stream and writes them to Jetstream ISB. + pub(crate) async fn streaming_write( &self, - message: Message, - ) -> Result>> { - let (sender, receiver) = oneshot::channel(); - let msg = ActorMessage::new(message, sender); - self.sender - .send(msg) - .await - .map_err(|e| Error::ISB(format!("Failed to write message to actor channel: {}", e)))?; - - Ok(receiver) + messages_stream: ReceiverStream, + ) -> Result>> { + let handle: JoinHandle> = tokio::spawn({ + let writer = self.writer.clone(); + let paf_concurrency = self.paf_concurrency; + let config = self.config.clone(); + let mut messages_stream = messages_stream; + let mut index = 0; + + async move { + let paf_resolver = PafResolver::new(paf_concurrency, writer.clone()); + while let Some(read_message) = messages_stream.next().await { + // if message needs to be dropped, ack and continue + // TODO: add metric for dropped count + if read_message.message.dropped() { + read_message + .ack + .send(ReadAck::Ack) + .map_err(|e| Error::ISB(format!("Failed to send ack: {:?}", e)))?; + continue; + } + let mut pafs = vec![]; + + // FIXME(CF): This is a temporary solution to round-robin the streams + for buffer in &config { + let payload: BytesMut = read_message + .message + .clone() + .try_into() + .expect("message serialization should not fail"); + let stream = buffer.streams.get(index).unwrap(); + index = (index + 1) % buffer.streams.len(); + + let paf = writer.write(stream.clone(), payload.into()).await; + pafs.push((stream.clone(), paf)); + } + + pipeline_metrics() + .forwarder + .write_total + .get_or_create(pipeline_isb_metric_labels()) + .inc(); + + paf_resolver + .resolve_pafs(ResolveAndPublishResult { + pafs, + payload: read_message.message.value.clone().into(), + ack_tx: read_message.ack, + }) + .await?; + } + Ok(()) + } + }); + Ok(handle) } } #[cfg(test)] mod tests { use std::collections::HashMap; - use std::time::Duration; use async_nats::jetstream; - use async_nats::jetstream::stream; + use async_nats::jetstream::{consumer, stream}; use chrono::Utc; use tokio::sync::oneshot; - use tokio::time::Instant; - use tracing::info; use super::*; - use crate::message::{Message, MessageID}; + use crate::message::{Message, MessageID, ReadAck}; #[cfg(feature = "nats-tests")] #[tokio::test] @@ -138,29 +142,43 @@ mod tests { let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let stream_name = "default"; + let stream_name = "test_publish_messages"; let _stream = context .get_or_create_stream(stream::Config { name: stream_name.into(), subjects: vec![stream_name.into()], + max_messages: 1000, ..Default::default() }) .await .unwrap(); - // Create ISBMessageHandler - let batch_size = 500; - let handler = WriterHandle::new( - stream_name.to_string(), - 0, - Default::default(), + let _consumer = context + .create_consumer_on_stream( + consumer::Config { + name: Some(stream_name.to_string()), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }, + stream_name, + ) + .await + .unwrap(); + + let writer = ISBWriter::new( + 10, + vec![BufferWriterConfig { + streams: vec![(stream_name.to_string(), 0)], + max_length: 1000, + ..Default::default() + }], context.clone(), - batch_size, - 1000, cln_token.clone(), - ); + ) + .await; - let mut result_receivers = Vec::new(); + let mut ack_receivers = Vec::new(); + let (messages_tx, messages_rx) = tokio::sync::mpsc::channel(500); // Publish 500 messages for i in 0..500 { let message = Message { @@ -176,20 +194,22 @@ mod tests { headers: HashMap::new(), }; let (sender, receiver) = oneshot::channel(); - let msg = ActorMessage { + let read_message = ReadMessage { message, - callee_tx: sender, + ack: sender, }; - handler.sender.send(msg).await.unwrap(); - result_receivers.push(receiver); + messages_tx.send(read_message).await.unwrap(); + ack_receivers.push(receiver); } + drop(messages_tx); - // FIXME: Uncomment after we start awaiting for PAFs - //for receiver in result_receivers { - // let result = receiver.await.unwrap(); - // assert!(result.is_ok()); - //} + let receiver_stream = ReceiverStream::new(messages_rx); + let _handle = writer.streaming_write(receiver_stream).await.unwrap(); + for receiver in ack_receivers { + let result = receiver.await.unwrap(); + assert_eq!(result, ReadAck::Ack); + } context.delete_stream(stream_name).await.unwrap(); } @@ -212,18 +232,32 @@ mod tests { .await .unwrap(); + let _consumer = context + .create_consumer_on_stream( + consumer::Config { + name: Some(stream_name.to_string()), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }, + stream_name, + ) + .await + .unwrap(); + let cancel_token = CancellationToken::new(); - let handler = WriterHandle::new( - stream_name.to_string(), - 0, - Default::default(), + let writer = ISBWriter::new( + 10, + vec![BufferWriterConfig { + streams: vec![(stream_name.to_string(), 0)], + ..Default::default() + }], context.clone(), - 500, - 1000, cancel_token.clone(), - ); + ) + .await; - let mut receivers = Vec::new(); + let mut ack_receivers = Vec::new(); + let (tx, rx) = tokio::sync::mpsc::channel(500); // Publish 100 messages successfully for i in 0..100 { let message = Message { @@ -238,14 +272,23 @@ mod tests { }, headers: HashMap::new(), }; - receivers.push(handler.write(message).await.unwrap()); + let (sender, receiver) = oneshot::channel(); + let read_message = ReadMessage { + message, + ack: sender, + }; + tx.send(read_message).await.unwrap(); + ack_receivers.push(receiver); } + let receiver_stream = ReceiverStream::new(rx); + let _handle = writer.streaming_write(receiver_stream).await.unwrap(); + // Attempt to publish the 101th message, which should get stuck in the retry loop // because the max message size is set to 1024 let message = Message { keys: vec!["key_101".to_string()], - value: vec![0; 1024].into(), + value: vec![0; 1025].into(), offset: None, event_time: Utc::now(), id: MessageID { @@ -255,111 +298,27 @@ mod tests { }, headers: HashMap::new(), }; - let receiver = handler.write(message).await.unwrap(); - receivers.push(receiver); + let (sender, receiver) = oneshot::channel(); + let read_message = ReadMessage { + message, + ack: sender, + }; + tx.send(read_message).await.unwrap(); + ack_receivers.push(receiver); + drop(tx); // Cancel the token to exit the retry loop cancel_token.cancel(); // Check the results - // FIXME: Uncomment after we start awaiting for PAFs - //for (i, receiver) in receivers.into_iter().enumerate() { - // let result = receiver.await.unwrap(); - // if i < 100 { - // assert!(result.is_ok()); - // } else { - // assert!(result.is_err()); - // } - //} - - context.delete_stream(stream_name).await.unwrap(); - } - - #[cfg(feature = "nats-tests")] - #[ignore] - #[tokio::test] - async fn benchmark_publish_messages() { - let js_url = "localhost:4222"; - // Create JetStream context - let client = async_nats::connect(js_url).await.unwrap(); - let context = jetstream::new(client); - - let stream_name = "benchmark_publish"; - let _stream = context - .get_or_create_stream(stream::Config { - name: stream_name.into(), - subjects: vec![stream_name.into()], - ..Default::default() - }) - .await - .unwrap(); - - let cancel_token = CancellationToken::new(); - let handler = WriterHandle::new( - stream_name.to_string(), - 0, - Default::default(), - context.clone(), - 500, - 1000, - cancel_token.clone(), - ); - - let (tx, mut rx) = mpsc::channel(100); - let test_start_time = Instant::now(); - let duration = Duration::from_secs(10); - - // Task to publish messages - let publish_task = tokio::spawn(async move { - let mut i = 0; - let mut sent_count = 0; - let mut start_time = Instant::now(); - while Instant::now().duration_since(test_start_time) < duration { - let message = Message { - keys: vec![format!("key_{}", i)], - value: format!("message {}", i).as_bytes().to_vec().into(), - offset: None, - event_time: Utc::now(), - id: MessageID { - vertex_name: "".to_string(), - offset: format!("offset_{}", i), - index: i, - }, - headers: HashMap::new(), - }; - tx.send(handler.write(message).await.unwrap()) - .await - .unwrap(); - sent_count += 1; - i += 1; - - if start_time.elapsed().as_secs() >= 1 { - info!("Messages sent: {}", sent_count); - sent_count = 0; - start_time = Instant::now(); - } + for (i, receiver) in ack_receivers.into_iter().enumerate() { + let result = receiver.await.unwrap(); + if i < 100 { + assert_eq!(result, ReadAck::Ack); + } else { + assert_eq!(result, ReadAck::Nak); } - }); - - // Task to await responses - let await_task = tokio::spawn(async move { - let mut start_time = Instant::now(); - let mut count = 0; - while let Some(receiver) = rx.recv().await { - if receiver.await.unwrap().is_ok() { - count += 1; - } - - if start_time.elapsed().as_secs() >= 1 { - info!("Messages received: {}", count); - count = 0; - start_time = Instant::now(); - } - } - }); - - let _ = tokio::join!(publish_task, await_task); - + } context.delete_stream(stream_name).await.unwrap(); } } diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs index 46faf2e95..6e0aff77b 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/reader.rs @@ -1,30 +1,33 @@ +use std::fmt; use std::time::Duration; use async_nats::jetstream::{ consumer::PullConsumer, AckKind, Context, Message as JetstreamMessage, }; - -use tokio::sync::mpsc::Receiver; use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; use tokio::time::{self, Instant}; +use tokio_stream::wrappers::ReceiverStream; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, warn}; +use tracing::{error, info}; use crate::config::pipeline::isb::BufferReaderConfig; use crate::config::pipeline::PipelineConfig; use crate::error::Error; use crate::message::{IntOffset, Message, Offset, ReadAck, ReadMessage}; -use crate::metrics::{forward_pipeline_metrics, pipeline_forward_read_metric_labels}; +use crate::metrics::{ + pipeline_forward_metric_labels, pipeline_isb_metric_labels, pipeline_metrics, +}; use crate::Result; -// The JetstreamReader is a handle to the background actor that continuously fetches messages from Jetstream. -// It can be used to cancel the background task and stop reading from Jetstream. -// The sender end of the channel is not stored in this struct, since the struct is clone-able and the mpsc channel is only closed when all the senders are dropped. -// Storing the Sender end of channel in this struct would make it difficult to close the channel with `cancel` method. +/// The JetstreamReader is a handle to the background actor that continuously fetches messages from Jetstream. +/// It can be used to cancel the background task and stop reading from Jetstream. +/// The sender end of the channel is not stored in this struct, since the struct is clone-able and the mpsc channel is only closed when all the senders are dropped. +/// Storing the Sender end of channel in this struct would make it difficult to close the channel with `cancel` method. #[derive(Clone)] pub(crate) struct JetstreamReader { + stream_name: &'static str, partition_idx: u16, config: BufferReaderConfig, consumer: PullConsumer, @@ -32,7 +35,7 @@ pub(crate) struct JetstreamReader { impl JetstreamReader { pub(crate) async fn new( - stream_name: String, + stream_name: &'static str, partition_idx: u16, js_ctx: Context, config: BufferReaderConfig, @@ -58,141 +61,127 @@ impl JetstreamReader { config.wip_ack_interval = wip_ack_interval; Ok(Self { + stream_name, partition_idx, config: config.clone(), consumer, }) } - // When we encounter an error, we log the error and return from the function. This drops the sender end of the channel. - // The closing of the channel should propagate to the receiver end and the receiver should exit gracefully. - // Within the loop, we only consider cancellationToken cancellation during the permit reservation and fetching messages, - // since rest of the operations should finish immediately. - pub(crate) async fn start( + /// streaming_read is a background task that continuously fetches messages from Jetstream and + /// emits them on a channel. When we encounter an error, we log the error and return from the + /// function. This drops the sender end of the channel. The closing of the channel should propagate + /// to the receiver end and the receiver should exit gracefully. Within the loop, we only consider + /// cancellationToken cancellation during the permit reservation and fetching messages, + /// since rest of the operations should finish immediately. + pub(crate) async fn streaming_read( &self, cancel_token: CancellationToken, pipeline_config: &PipelineConfig, - ) -> Result<(Receiver, JoinHandle>)> { - // FIXME: factor of 2 should be configurable, at the least a const + ) -> Result<(ReceiverStream, JoinHandle>)> { let (messages_tx, messages_rx) = mpsc::channel(2 * pipeline_config.batch_size); let handle: JoinHandle> = tokio::spawn({ - let this = self.clone(); - let pipeline_config = pipeline_config.clone(); + let consumer = self.consumer.clone(); + let partition_idx = self.partition_idx; + let config = self.config.clone(); + let cancel_token = cancel_token.clone(); + let stream_name = self.stream_name; async move { - // FIXME: - let partition: &str = pipeline_config - .from_vertex_config - .first() - .unwrap() - .reader_config - .streams - .first() - .unwrap() - .0 - .as_ref(); - - let labels = pipeline_forward_read_metric_labels( - pipeline_config.pipeline_name.as_ref(), - partition, - pipeline_config.vertex_name.as_ref(), - pipeline_config.vertex_config.to_string().as_ref(), - pipeline_config.replica, - ); - - let chunk_stream = this - .consumer - .messages() - .await - .unwrap() - .chunks_timeout(pipeline_config.batch_size, pipeline_config.read_timeout); - - tokio::pin!(chunk_stream); - - // The .next() call will not return if there is no data even if read_timeout is - // reached. - let mut total_messages = 0; - let mut chunk_time = Instant::now(); + let labels = pipeline_forward_metric_labels("Sink", Some(stream_name)); + + let mut message_stream = consumer.messages().await.map_err(|e| { + Error::ISB(format!( + "Failed to get message stream from Jetstream: {:?}", + e + )) + })?; + let mut start_time = Instant::now(); - while let Some(messages) = chunk_stream.next().await { - debug!( - len = messages.len(), - elapsed_ms = chunk_time.elapsed().as_millis(), - "Received messages from Jetstream", - ); - total_messages += messages.len(); - for message in messages { - let jetstream_message = message.map_err(|e| { - Error::ISB(format!( - "Error while fetching message from Jetstream: {:?}", - e - )) - })?; - - let msg_info = jetstream_message.info().map_err(|e| { - Error::ISB(format!( - "Error while fetching message info from Jetstream: {:?}", - e - )) - })?; - - let mut message: Message = - jetstream_message.payload.clone().try_into().map_err(|e| { - Error::ISB(format!( - "Error while converting Jetstream message to Message: {:?}", - e - )) + let mut total_messages = 0; + loop { + tokio::select! { + _ = cancel_token.cancelled() => { // should we drain from the stream when token is cancelled? + info!(?stream_name, "Cancellation token received, stopping the reader."); + break; + } + message = message_stream.next() => { + let Some(message) = message else { + // stream has been closed because we got none + info!(?stream_name, "Stream has been closed"); + break; + }; + + let jetstream_message = match message { + Ok(message) => message, + Err(e) => { + error!(?e, ?stream_name, "Failed to fetch messages from the Jetstream"); + continue; + } + }; + let msg_info = match jetstream_message.info() { + Ok(info) => info, + Err(e) => { + error!(?e, ?stream_name, "Failed to get message info from Jetstream"); + continue; + } + }; + + let mut message: Message = match jetstream_message.payload.clone().try_into() { + Ok(message) => message, + Err(e) => { + error!( + ?e, ?stream_name, ?jetstream_message, + "Failed to parse message payload received from Jetstream", + ); + continue; + } + }; + + message.offset = Some(Offset::Int(IntOffset::new( + msg_info.stream_sequence, + partition_idx, + ))); + + let (ack_tx, ack_rx) = oneshot::channel(); + tokio::spawn(Self::start_work_in_progress( + jetstream_message, + ack_rx, + config.wip_ack_interval, + )); + + let read_message = ReadMessage { + message, + ack: ack_tx, + }; + + messages_tx.send(read_message).await.map_err(|e| { + Error::ISB(format!("Error while sending message to channel: {:?}", e)) })?; - message.offset = Some(Offset::Int(IntOffset::new( - msg_info.stream_sequence, - this.partition_idx, - ))); - - let (ack_tx, ack_rx) = oneshot::channel(); - - tokio::spawn(Self::start_work_in_progress( - jetstream_message, - ack_rx, - this.config.wip_ack_interval, - )); - - let read_message = ReadMessage { - message, - ack: ack_tx, - }; - - messages_tx.send(read_message).await.map_err(|e| { - Error::ISB(format!("Error while sending message to channel: {:?}", e)) - })?; - - forward_pipeline_metrics() - .forwarder - .data_read - .get_or_create(labels) - .inc(); - - if start_time.elapsed() >= Duration::from_millis(1000) { - info!( - len = total_messages, - elapsed_ms = start_time.elapsed().as_millis(), - "Total messages read from Jetstream" - ); - start_time = Instant::now(); - total_messages = 0; + pipeline_metrics() + .forwarder + .read_total + .get_or_create(labels) + .inc(); + + if start_time.elapsed() >= Duration::from_millis(1000) { + info!( + "Total messages read from Jetstream in {:?} seconds: {}", + start_time.elapsed(), + total_messages + ); + start_time = Instant::now(); + total_messages = 0; + } } } - if cancel_token.is_cancelled() { - warn!("Cancellation token is cancelled. Exiting JetstreamReader"); - break; - } - chunk_time = Instant::now(); } Ok(()) } }); - Ok((messages_rx, handle)) + Ok((ReceiverStream::new(messages_rx), handle)) } // Intended to be run as background task which will continuously send InProgress acks to Jetstream. @@ -204,6 +193,7 @@ impl JetstreamReader { tick: Duration, ) { let mut interval = time::interval_at(Instant::now() + tick, tick); + let start = Instant::now(); loop { let wip = async { @@ -232,6 +222,17 @@ impl JetstreamReader { if let Err(e) = ack_result { error!(?e, "Failed to send Ack to Jetstream for message"); } + pipeline_metrics() + .forwarder + .ack_time + .get_or_create(pipeline_isb_metric_labels()) + .observe(start.elapsed().as_micros() as f64); + + pipeline_metrics() + .forwarder + .ack_total + .get_or_create(pipeline_isb_metric_labels()) + .inc(); return; } ReadAck::Nak => { @@ -246,6 +247,16 @@ impl JetstreamReader { } } +impl fmt::Display for JetstreamReader { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "JetstreamReader {{ stream_name: {}, partition_idx: {}, config: {:?} }}", + self.stream_name, self.partition_idx, self.config + ) + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; @@ -254,10 +265,10 @@ mod tests { use async_nats::jetstream::{consumer, stream}; use bytes::BytesMut; use chrono::Utc; - use tracing::info; use super::*; - use crate::message::{Message, MessageID, Offset}; + use crate::message::ReadAck::Ack; + use crate::message::{Message, MessageID}; use crate::pipeline::isb::jetstream::writer::JetstreamWriter; #[cfg(feature = "nats-tests")] @@ -268,7 +279,7 @@ mod tests { let client = async_nats::connect(js_url).await.unwrap(); let context = jetstream::new(client); - let stream_name = "test_cancellation-2"; + let stream_name = "test_jetstream_read"; context .get_or_create_stream(stream::Config { name: stream_name.into(), @@ -296,14 +307,9 @@ mod tests { streams: vec![], wip_ack_interval: Duration::from_millis(5), }; - let js_reader = JetstreamReader::new( - stream_name.to_string(), - 0, - context.clone(), - buf_reader_config, - ) - .await - .unwrap(); + let js_reader = JetstreamReader::new(stream_name, 0, context.clone(), buf_reader_config) + .await + .unwrap(); let pipeline_cfg_base64 = "eyJtZXRhZGF0YSI6eyJuYW1lIjoic2ltcGxlLXBpcGVsaW5lLW91dCIsIm5hbWVzcGFjZSI6ImRlZmF1bHQiLCJjcmVhdGlvblRpbWVzdGFtcCI6bnVsbH0sInNwZWMiOnsibmFtZSI6Im91dCIsInNpbmsiOnsiYmxhY2tob2xlIjp7fSwicmV0cnlTdHJhdGVneSI6eyJvbkZhaWx1cmUiOiJyZXRyeSJ9fSwibGltaXRzIjp7InJlYWRCYXRjaFNpemUiOjUwMCwicmVhZFRpbWVvdXQiOiIxcyIsImJ1ZmZlck1heExlbmd0aCI6MzAwMDAsImJ1ZmZlclVzYWdlTGltaXQiOjgwfSwic2NhbGUiOnsibWluIjoxfSwidXBkYXRlU3RyYXRlZ3kiOnsidHlwZSI6IlJvbGxpbmdVcGRhdGUiLCJyb2xsaW5nVXBkYXRlIjp7Im1heFVuYXZhaWxhYmxlIjoiMjUlIn19LCJwaXBlbGluZU5hbWUiOiJzaW1wbGUtcGlwZWxpbmUiLCJpbnRlclN0ZXBCdWZmZXJTZXJ2aWNlTmFtZSI6IiIsInJlcGxpY2FzIjowLCJmcm9tRWRnZXMiOlt7ImZyb20iOiJpbiIsInRvIjoib3V0IiwiY29uZGl0aW9ucyI6bnVsbCwiZnJvbVZlcnRleFR5cGUiOiJTb3VyY2UiLCJmcm9tVmVydGV4UGFydGl0aW9uQ291bnQiOjEsImZyb21WZXJ0ZXhMaW1pdHMiOnsicmVhZEJhdGNoU2l6ZSI6NTAwLCJyZWFkVGltZW91dCI6IjFzIiwiYnVmZmVyTWF4TGVuZ3RoIjozMDAwMCwiYnVmZmVyVXNhZ2VMaW1pdCI6ODB9LCJ0b1ZlcnRleFR5cGUiOiJTaW5rIiwidG9WZXJ0ZXhQYXJ0aXRpb25Db3VudCI6MSwidG9WZXJ0ZXhMaW1pdHMiOnsicmVhZEJhdGNoU2l6ZSI6NTAwLCJyZWFkVGltZW91dCI6IjFzIiwiYnVmZmVyTWF4TGVuZ3RoIjozMDAwMCwiYnVmZmVyVXNhZ2VMaW1pdCI6ODB9fV0sIndhdGVybWFyayI6eyJtYXhEZWxheSI6IjBzIn19LCJzdGF0dXMiOnsicGhhc2UiOiIiLCJyZXBsaWNhcyI6MCwiZGVzaXJlZFJlcGxpY2FzIjowLCJsYXN0U2NhbGVkQXQiOm51bGx9fQ==".to_string(); @@ -311,17 +317,15 @@ mod tests { let pipeline_config = PipelineConfig::load(pipeline_cfg_base64, env_vars).unwrap(); let reader_cancel_token = CancellationToken::new(); let (mut js_reader_rx, js_reader_task) = js_reader - .start(reader_cancel_token.clone(), &pipeline_config) + .streaming_read(reader_cancel_token.clone(), &pipeline_config) .await .unwrap(); let writer_cancel_token = CancellationToken::new(); let writer = JetstreamWriter::new( - stream_name.to_string(), - 0, + vec![(stream_name.to_string(), 0)], Default::default(), context.clone(), - 5000, writer_cancel_token.clone(), ); @@ -338,18 +342,20 @@ mod tests { }, headers: HashMap::new(), }; - let (success_tx, success_rx) = oneshot::channel::>(); let message_bytes: BytesMut = message.try_into().unwrap(); - writer.write(message_bytes.into(), success_tx).await; - success_rx.await.unwrap().unwrap(); + writer + .write((stream_name.to_string(), 0), message_bytes.into()) + .await + .await + .unwrap(); } - info!("Sent 10 messages"); + // Cancel the token to exit the retry loop writer_cancel_token.cancel(); let mut buffer = vec![]; for _ in 0..10 { - let Some(val) = js_reader_rx.recv().await else { + let Some(val) = js_reader_rx.next().await else { break; }; buffer.push(val); @@ -362,12 +368,112 @@ mod tests { ); reader_cancel_token.cancel(); - // The token cancellation won't abort the task since we are using chunks_timeout in - // Jetstream reader. - // js_reader_task.await.unwrap().unwrap(); - js_reader_task.abort(); - let _ = js_reader_task.await; - assert!(js_reader_rx.is_closed()); + js_reader_task.await.unwrap().unwrap(); + + context.delete_stream(stream_name).await.unwrap(); + } + + #[cfg(feature = "nats-tests")] + #[tokio::test] + async fn test_jetstream_ack() { + let js_url = "localhost:4222"; + // Create JetStream context + let client = async_nats::connect(js_url).await.unwrap(); + let context = jetstream::new(client); + + let stream_name = "test_ack"; + context + .get_or_create_stream(stream::Config { + name: stream_name.into(), + subjects: vec![stream_name.into()], + max_message_size: 1024, + ..Default::default() + }) + .await + .unwrap(); + + let _consumer = context + .create_consumer_on_stream( + consumer::Config { + name: Some(stream_name.to_string()), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }, + stream_name, + ) + .await + .unwrap(); + + let buf_reader_config = BufferReaderConfig { + partitions: 0, + streams: vec![], + wip_ack_interval: Duration::from_millis(5), + }; + let js_reader = JetstreamReader::new(stream_name, 0, context.clone(), buf_reader_config) + .await + .unwrap(); + + let pipeline_cfg_base64 = "eyJtZXRhZGF0YSI6eyJuYW1lIjoic2ltcGxlLXBpcGVsaW5lLW91dCIsIm5hbWVzcGFjZSI6ImRlZmF1bHQiLCJjcmVhdGlvblRpbWVzdGFtcCI6bnVsbH0sInNwZWMiOnsibmFtZSI6Im91dCIsInNpbmsiOnsiYmxhY2tob2xlIjp7fSwicmV0cnlTdHJhdGVneSI6eyJvbkZhaWx1cmUiOiJyZXRyeSJ9fSwibGltaXRzIjp7InJlYWRCYXRjaFNpemUiOjUwMCwicmVhZFRpbWVvdXQiOiIxcyIsImJ1ZmZlck1heExlbmd0aCI6MzAwMDAsImJ1ZmZlclVzYWdlTGltaXQiOjgwfSwic2NhbGUiOnsibWluIjoxfSwidXBkYXRlU3RyYXRlZ3kiOnsidHlwZSI6IlJvbGxpbmdVcGRhdGUiLCJyb2xsaW5nVXBkYXRlIjp7Im1heFVuYXZhaWxhYmxlIjoiMjUlIn19LCJwaXBlbGluZU5hbWUiOiJzaW1wbGUtcGlwZWxpbmUiLCJpbnRlclN0ZXBCdWZmZXJTZXJ2aWNlTmFtZSI6IiIsInJlcGxpY2FzIjowLCJmcm9tRWRnZXMiOlt7ImZyb20iOiJpbiIsInRvIjoib3V0IiwiY29uZGl0aW9ucyI6bnVsbCwiZnJvbVZlcnRleFR5cGUiOiJTb3VyY2UiLCJmcm9tVmVydGV4UGFydGl0aW9uQ291bnQiOjEsImZyb21WZXJ0ZXhMaW1pdHMiOnsicmVhZEJhdGNoU2l6ZSI6NTAwLCJyZWFkVGltZW91dCI6IjFzIiwiYnVmZmVyTWF4TGVuZ3RoIjozMDAwMCwiYnVmZmVyVXNhZ2VMaW1pdCI6ODB9LCJ0b1ZlcnRleFR5cGUiOiJTaW5rIiwidG9WZXJ0ZXhQYXJ0aXRpb25Db3VudCI6MSwidG9WZXJ0ZXhMaW1pdHMiOnsicmVhZEJhdGNoU2l6ZSI6NTAwLCJyZWFkVGltZW91dCI6IjFzIiwiYnVmZmVyTWF4TGVuZ3RoIjozMDAwMCwiYnVmZmVyVXNhZ2VMaW1pdCI6ODB9fV0sIndhdGVybWFyayI6eyJtYXhEZWxheSI6IjBzIn19LCJzdGF0dXMiOnsicGhhc2UiOiIiLCJyZXBsaWNhcyI6MCwiZGVzaXJlZFJlcGxpY2FzIjowLCJsYXN0U2NhbGVkQXQiOm51bGx9fQ==".to_string(); + + let env_vars = [("NUMAFLOW_ISBSVC_JETSTREAM_URL", "localhost:4222")]; + let pipeline_config = PipelineConfig::load(pipeline_cfg_base64, env_vars).unwrap(); + let reader_cancel_token = CancellationToken::new(); + let (mut js_reader_rx, js_reader_task) = js_reader + .streaming_read(reader_cancel_token.clone(), &pipeline_config) + .await + .unwrap(); + + let writer_cancel_token = CancellationToken::new(); + let writer = JetstreamWriter::new( + vec![(stream_name.to_string(), 0)], + Default::default(), + context.clone(), + writer_cancel_token.clone(), + ); + + // write 5 messages + for i in 0..5 { + let message = Message { + keys: vec![format!("key_{}", i)], + value: format!("message {}", i).as_bytes().to_vec().into(), + offset: None, + event_time: Utc::now(), + id: MessageID { + vertex_name: "vertex".to_string(), + offset: format!("offset_{}", i), + index: i, + }, + headers: HashMap::new(), + }; + let message_bytes: BytesMut = message.try_into().unwrap(); + writer + .write((stream_name.to_string(), 0), message_bytes.into()) + .await + .await + .unwrap(); + } + // Cancel the token to exit the retry loop + writer_cancel_token.cancel(); + + for _ in 0..5 { + let Some(val) = js_reader_rx.next().await else { + break; + }; + val.ack.send(Ack).unwrap() + } + + let mut consumer: PullConsumer = context + .get_consumer_from_stream(stream_name, stream_name) + .await + .unwrap(); + + let consumer_info = consumer.info().await.unwrap(); + + assert_eq!(consumer_info.num_pending, 0); + assert_eq!(consumer_info.num_ack_pending, 0); + + reader_cancel_token.cancel(); + js_reader_task.await.unwrap().unwrap(); context.delete_stream(stream_name).await.unwrap(); } diff --git a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs index 9fbc7603a..28a8ca6ec 100644 --- a/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs +++ b/rust/numaflow-core/src/pipeline/isb/jetstream/writer.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use std::time::Duration; @@ -8,51 +9,50 @@ use async_nats::jetstream::publish::PublishAck; use async_nats::jetstream::stream::RetentionPolicy::Limits; use async_nats::jetstream::Context; use bytes::Bytes; -use tokio::sync::mpsc::Receiver; -use tokio::sync::{mpsc, oneshot}; -use tokio::time::sleep; +use tokio::sync::{oneshot, Semaphore}; +use tokio::time::{sleep, Instant}; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, warn}; use crate::config::pipeline::isb::BufferWriterConfig; use crate::error::Error; -use crate::message::{IntOffset, Offset}; +use crate::message::{IntOffset, Offset, ReadAck}; +use crate::metrics::{pipeline_isb_metric_labels, pipeline_metrics}; +use crate::pipeline::isb::jetstream::Stream; use crate::Result; #[derive(Clone, Debug)] /// Writes to JetStream ISB. Exposes both write and blocking methods to write messages. /// It accepts a cancellation token to stop infinite retries during shutdown. -pub(super) struct JetstreamWriter { - stream_name: String, - partition_idx: u16, +/// JetstreamWriter is one to many mapping of streams to write messages to. It also +/// maintains the buffer usage metrics for each stream. +pub(crate) struct JetstreamWriter { + streams: Vec, config: BufferWriterConfig, js_ctx: Context, - is_full: Arc, - paf_resolver_tx: mpsc::Sender, + is_full: HashMap>, cancel_token: CancellationToken, } impl JetstreamWriter { /// Creates a JetStream Writer and a background task to make sure the Write futures (PAFs) are /// successful. Batch Size determines the maximum pending futures. - pub(super) fn new( - stream_name: String, - partition_idx: u16, + pub(crate) fn new( + streams: Vec, config: BufferWriterConfig, js_ctx: Context, - paf_batch_size: usize, cancel_token: CancellationToken, ) -> Self { - let (paf_resolver_tx, paf_resolver_rx) = - mpsc::channel::(paf_batch_size); + let is_full = streams + .iter() + .map(|stream| (stream.0.clone(), Arc::new(AtomicBool::new(false)))) + .collect::>(); let this = Self { - stream_name, - partition_idx, + streams, config, js_ctx, - is_full: Arc::new(AtomicBool::new(false)), - paf_resolver_tx, + is_full, cancel_token, }; @@ -64,33 +64,33 @@ impl JetstreamWriter { } }); - // spawn a task for resolving PAFs - let mut resolver_actor = PafResolverActor::new(this.clone(), paf_resolver_rx); - tokio::spawn(async move { - resolver_actor.run().await; - }); - this } - /// Checks the buffer usage metrics (soft and solid usage) for a given stream. + /// Checks the buffer usage metrics (soft and solid usage) for each stream in the streams vector. /// If the usage is greater than the bufferUsageLimit, it sets the is_full flag to true. async fn check_stream_status(&mut self) { let mut interval = tokio::time::interval(self.config.refresh_interval); loop { tokio::select! { _ = interval.tick() => { - match Self::fetch_buffer_usage(self.js_ctx.clone(), self.stream_name.as_str(), self.config.max_length).await { - Ok((soft_usage, solid_usage)) => { - if solid_usage >= self.config.usage_limit && soft_usage >= self.config.usage_limit { - self.is_full.store(true, Ordering::Relaxed); - } else { - self.is_full.store(false, Ordering::Relaxed); + for stream in &self.streams { + match Self::fetch_buffer_usage(self.js_ctx.clone(), stream.0.as_str(), self.config.max_length).await { + Ok((soft_usage, solid_usage)) => { + if solid_usage >= self.config.usage_limit && soft_usage >= self.config.usage_limit { + if let Some(is_full) = self.is_full.get(stream.0.as_str()) { + is_full.store(true, Ordering::Relaxed); + } + } else if let Some(is_full) = self.is_full.get(stream.0.as_str()) { + is_full.store(false, Ordering::Relaxed); + } + } + Err(e) => { + error!(?e, "Failed to fetch buffer usage for stream {}, updating isFull to true", stream.0.as_str()); + if let Some(is_full) = self.is_full.get(stream.0.as_str()) { + is_full.store(true, Ordering::Relaxed); + } } - } - Err(e) => { - error!(?e, "Failed to fetch buffer usage, updating isFull to true"); - self.is_full.store(true, Ordering::Relaxed); } } } @@ -101,7 +101,7 @@ impl JetstreamWriter { } } - /// Fetches the buffer usage metrics (soft and solid usage) for a given stream. + /// Fetches the buffer usage metrics (soft and solid usage) for the given stream. /// /// Soft Usage: /// Formula: (NumPending + NumAckPending) / maxLength @@ -154,20 +154,31 @@ impl JetstreamWriter { /// Writes the message to the JetStream ISB and returns a future which can be /// awaited to get the PublishAck. It will do infinite retries until the message /// gets published successfully. If it returns an error it means it is fatal error - pub(super) async fn write(&self, payload: Vec, callee_tx: oneshot::Sender>) { + pub(super) async fn write(&self, stream: Stream, payload: Vec) -> PublishAckFuture { let js_ctx = self.js_ctx.clone(); + let mut counter = 500u64; + // loop till we get a PAF, there could be other reasons why PAFs cannot be created. let paf = loop { - // let's write only if the buffer is not full - match self.is_full.load(Ordering::Relaxed) { - true => { + // let's write only if the buffer is not full for the stream + match self + .is_full + .get(&stream.0) + .map(|is_full| is_full.load(Ordering::Relaxed)) + { + Some(true) => { // FIXME: add metrics - info!(%self.stream_name, "stream is full"); + if counter >= 500 { + warn!(stream=?stream.0, "stream is full (throttled logging)"); + counter = 0; + } + counter += 1; + // FIXME: consider buffer-full strategy } - false => match js_ctx - .publish(self.stream_name.clone(), Bytes::from(payload.clone())) + Some(false) => match js_ctx + .publish(stream.0.clone(), Bytes::from(payload.clone())) .await { Ok(paf) => { @@ -177,40 +188,36 @@ impl JetstreamWriter { error!(?e, "publishing failed, retrying"); } }, + None => { + error!("Stream {} not found in is_full map", stream.0); + } } // short-circuit out in failure mode if shutdown has been initiated if self.cancel_token.is_cancelled() { error!("Shutdown signal received, exiting write loop"); - callee_tx - .send(Err(Error::ISB("Shutdown signal received".to_string()))) - .unwrap(); - return; } // sleep to avoid busy looping sleep(self.config.retry_interval).await; }; - // send the paf and callee_tx over - self.paf_resolver_tx - .send(ResolveAndPublishResult { - paf, - payload, - callee_tx, - }) - .await - .expect("send should not fail"); + paf } /// Writes the message to the JetStream ISB and returns the PublishAck. It will do /// infinite retries until the message gets published successfully. If it returns /// an error it means it is fatal non-retryable error. - pub(super) async fn blocking_write(&self, payload: Vec) -> Result { + pub(super) async fn blocking_write( + &self, + stream: Stream, + payload: Vec, + ) -> Result { let js_ctx = self.js_ctx.clone(); - let start_time = tokio::time::Instant::now(); + let start_time = Instant::now(); + info!("Blocking write for stream {}", stream.0); loop { match js_ctx - .publish(self.stream_name.clone(), Bytes::from(payload.clone())) + .publish(stream.0.clone(), Bytes::from(payload.clone())) .await { Ok(paf) => match paf.await { @@ -219,7 +226,7 @@ impl JetstreamWriter { // should we return an error here? Because duplicate messages are not fatal // But it can mess up the watermark progression because the offset will be // same as the previous message offset - warn!(ack = ?ack, "Duplicate message detected, ignoring"); + warn!(?ack, "Duplicate message detected, ignoring"); } debug!( elapsed_ms = start_time.elapsed().as_millis(), @@ -245,81 +252,109 @@ impl JetstreamWriter { } /// ResolveAndPublishResult resolves the result of the write PAF operation. -/// It contains the PublishAckFuture which can be awaited to get the PublishAck. Once PAF has +/// It contains the list of pafs(one message can be written to multiple streams) +/// and the payload that was written. Once the PAFs for all the streams have been /// resolved, the information is published to callee_tx. #[derive(Debug)] -pub(super) struct ResolveAndPublishResult { - paf: PublishAckFuture, - payload: Vec, - callee_tx: oneshot::Sender>, +pub(crate) struct ResolveAndPublishResult { + pub(crate) pafs: Vec<(Stream, PublishAckFuture)>, + pub(crate) payload: Vec, + // Acknowledgement oneshot to notify the reader that the message has been written + pub(crate) ack_tx: oneshot::Sender, } /// Resolves the PAF from the write call, if not successful it will do a blocking write so that /// it is eventually successful. Once the PAF has been resolved (by either means) it will notify /// the top-level callee via the oneshot rx. -struct PafResolverActor { +pub(crate) struct PafResolver { + sem: Arc, js_writer: JetstreamWriter, - receiver: Receiver, } -impl PafResolverActor { - fn new(js_writer: JetstreamWriter, receiver: Receiver) -> Self { - PafResolverActor { +impl PafResolver { + pub(crate) fn new(concurrency: usize, js_writer: JetstreamWriter) -> Self { + PafResolver { + sem: Arc::new(Semaphore::new(concurrency)), // concurrency limit for resolving PAFs js_writer, - receiver, } } - /// Tries to the resolve the original PAF from the write call. If it is successful, will send - /// the successful result to the top-level callee's oneshot channel. If the original PAF does - /// not successfully resolve, it will do blocking write till write to JetStream succeeds. - async fn successfully_resolve_paf(&mut self, result: ResolveAndPublishResult) { - match result.paf.await { - Ok(ack) => { - if ack.duplicate { - warn!("Duplicate message detected, ignoring {:?}", ack); - } - result - .callee_tx - .send(Ok(Offset::Int(IntOffset::new( - ack.sequence, - self.js_writer.partition_idx, - )))) - .unwrap_or_else(|e| { - error!("Failed to send offset: {:?}", e); - }) - } - Err(e) => { - error!(?e, "Failed to resolve the future, trying blocking write"); - match self.js_writer.blocking_write(result.payload.clone()).await { + /// resolve_pafs resolves the PAFs for the given result. It will try to resolve the PAFs + /// asynchronously, if it fails it will do a blocking write to resolve the PAFs. + /// At any point in time, we will only have X PAF resolvers running, this will help us create a + /// natural backpressure. + pub(crate) async fn resolve_pafs(&self, result: ResolveAndPublishResult) -> Result<()> { + let start_time = Instant::now(); + let permit = Arc::clone(&self.sem) + .acquire_owned() + .await + .map_err(|_e| Error::ISB("Failed to acquire semaphore permit".to_string()))?; + let mut offsets = Vec::new(); + + let js_writer = self.js_writer.clone(); + tokio::spawn(async move { + let _permit = permit; + for (stream, paf) in result.pafs { + match paf.await { Ok(ack) => { if ack.duplicate { - warn!("Duplicate message detected, ignoring {:?}", ack); + warn!( + "Duplicate message detected for stream {}, ignoring {:?}", + stream.0, ack + ); } - result - .callee_tx - .send(Ok(Offset::Int(IntOffset::new( - ack.sequence, - self.js_writer.partition_idx, - )))) - .unwrap() + offsets.push(( + stream.clone(), + Offset::Int(IntOffset::new(ack.sequence, stream.1)), + )); } Err(e) => { - error!(?e, "Blocking write failed"); - result - .callee_tx - .send(Err(Error::ISB("Shutdown signal received".to_string()))) - .unwrap() + error!( + ?e, + "Failed to resolve the future for stream {}, trying blocking write", + stream.0 + ); + match js_writer + .blocking_write(stream.clone(), result.payload.clone()) + .await + { + Ok(ack) => { + if ack.duplicate { + warn!( + "Duplicate message detected for stream {}, ignoring {:?}", + stream.0, ack + ); + } + offsets.push(( + stream.clone(), + Offset::Int(IntOffset::new(ack.sequence, stream.1)), + )); + } + Err(e) => { + error!(?e, "Blocking write failed for stream {}", stream.0); + // Since we failed to write to the stream, we need to send a NAK to the reader + result.ack_tx.send(ReadAck::Nak).unwrap_or_else(|e| { + error!("Failed to send error for stream {}: {:?}", stream.0, e); + }); + return; + } + } } } } - } - } - async fn run(&mut self) { - while let Some(result) = self.receiver.recv().await { - self.successfully_resolve_paf(result).await; - } + // Send an ack to the reader + result.ack_tx.send(ReadAck::Ack).unwrap_or_else(|e| { + error!("Failed to send ack: {:?}", e); + }); + + pipeline_metrics() + .isb + .paf_resolution_time + .get_or_create(pipeline_isb_metric_labels()) + .observe(start_time.elapsed().as_micros() as f64); + }); + Ok(()) } } @@ -334,7 +369,7 @@ mod tests { use chrono::Utc; use super::*; - use crate::message::{Message, MessageID, Offset}; + use crate::message::{Message, MessageID}; #[cfg(feature = "nats-tests")] #[tokio::test] @@ -368,11 +403,9 @@ mod tests { .unwrap(); let writer = JetstreamWriter::new( - stream_name.to_string(), - 0, + vec![(stream_name.to_string(), 0)], Default::default(), context.clone(), - 500, cln_token.clone(), ); @@ -389,10 +422,11 @@ mod tests { headers: HashMap::new(), }; - let (success_tx, success_rx) = oneshot::channel::>(); let message_bytes: BytesMut = message.try_into().unwrap(); - writer.write(message_bytes.into(), success_tx).await; - assert!(success_rx.await.is_ok()); + let paf = writer + .write((stream_name.to_string(), 0), message_bytes.into()) + .await; + assert!(paf.await.is_ok()); context.delete_stream(stream_name).await.unwrap(); } @@ -429,11 +463,9 @@ mod tests { .unwrap(); let writer = JetstreamWriter::new( - stream_name.to_string(), - 0, + vec![(stream_name.to_string(), 0)], Default::default(), context.clone(), - 500, cln_token.clone(), ); @@ -451,7 +483,9 @@ mod tests { }; let message_bytes: BytesMut = message.try_into().unwrap(); - let result = writer.blocking_write(message_bytes.into()).await; + let result = writer + .blocking_write((stream_name.to_string(), 0), message_bytes.into()) + .await; assert!(result.is_ok()); let publish_ack = result.unwrap(); @@ -493,11 +527,9 @@ mod tests { let cancel_token = CancellationToken::new(); let writer = JetstreamWriter::new( - stream_name.to_string(), - 0, + vec![(stream_name.to_string(), 0)], Default::default(), context.clone(), - 500, cancel_token.clone(), ); @@ -516,10 +548,11 @@ mod tests { }, headers: HashMap::new(), }; - let (success_tx, success_rx) = oneshot::channel::>(); let message_bytes: BytesMut = message.try_into().unwrap(); - writer.write(message_bytes.into(), success_tx).await; - result_receivers.push(success_rx); + let paf = writer + .write((stream_name.to_string(), 0), message_bytes.into()) + .await; + result_receivers.push(paf); } // Attempt to publish a message which has a payload size greater than the max_message_size @@ -536,32 +569,28 @@ mod tests { }, headers: HashMap::new(), }; - let (success_tx, success_rx) = oneshot::channel::>(); let message_bytes: BytesMut = message.try_into().unwrap(); - writer.write(message_bytes.into(), success_tx).await; - result_receivers.push(success_rx); + let paf = writer + .write((stream_name.to_string(), 0), message_bytes.into()) + .await; + result_receivers.push(paf); // Cancel the token to exit the retry loop cancel_token.cancel(); // Check the results for (i, receiver) in result_receivers.into_iter().enumerate() { - let result = receiver.await.unwrap(); if i < 10 { assert!( - result.is_ok(), + receiver.await.is_ok(), "Message {} should be published successfully", i ); } else { assert!( - result.is_err(), + receiver.await.is_err(), "Message 11 should fail with cancellation error" ); - assert_eq!( - result.err().unwrap().to_string(), - "ISB Error - Shutdown signal received", - ); } } @@ -677,14 +706,12 @@ mod tests { let cancel_token = CancellationToken::new(); let writer = JetstreamWriter::new( - stream_name.to_string(), - 0, + vec![(stream_name.to_string(), 0)], BufferWriterConfig { max_length: 100, ..Default::default() }, context.clone(), - 500, cancel_token.clone(), ); @@ -703,13 +730,23 @@ mod tests { } let start_time = Instant::now(); - while !writer.is_full.load(Ordering::Relaxed) && start_time.elapsed().as_millis() < 1000 { + while !writer + .is_full + .get(stream_name) + .map(|is_full| is_full.load(Ordering::Relaxed)) + .unwrap() + && start_time.elapsed().as_millis() < 1000 + { sleep(Duration::from_millis(5)).await; } // Verify the is_full flag assert!( - writer.is_full.load(Ordering::Relaxed), + writer + .is_full + .get(stream_name) + .map(|is_full| is_full.load(Ordering::Relaxed)) + .unwrap(), "Buffer should be full after publishing messages" ); diff --git a/rust/numaflow-core/src/shared.rs b/rust/numaflow-core/src/shared.rs index 63753fe85..0117040c4 100644 --- a/rust/numaflow-core/src/shared.rs +++ b/rust/numaflow-core/src/shared.rs @@ -1,2 +1,12 @@ -pub mod server_info; -pub mod utils; +/// All SDKs have to provide server info for all gRPC endpoints, so there is a lot of share. +pub(crate) mod server_info; + +/// All utilities related to gRPC. +pub(crate) mod grpc; + +/// Start metrics servers, pending readers, and possible other metrics related helpers. +pub(crate) mod metrics; + +/// Shared methods for creating Sources, Sinks, Transformers, etc. as they are required for both +/// MonoVertex and Pipeline. +pub(crate) mod create_components; diff --git a/rust/numaflow-core/src/shared/create_components.rs b/rust/numaflow-core/src/shared/create_components.rs new file mode 100644 index 000000000..b09f243de --- /dev/null +++ b/rust/numaflow-core/src/shared/create_components.rs @@ -0,0 +1,402 @@ +use std::time::Duration; + +use numaflow_pb::clients::sink::sink_client::SinkClient; +use numaflow_pb::clients::source::source_client::SourceClient; +use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; +use tokio_util::sync::CancellationToken; +use tonic::transport::Channel; + +use crate::config::components::sink::{SinkConfig, SinkType}; +use crate::config::components::source::{SourceConfig, SourceType}; +use crate::config::components::transformer::TransformerConfig; +use crate::shared::grpc; +use crate::shared::server_info::{sdk_server_info, ContainerType}; +use crate::sink::{SinkClientType, SinkWriter, SinkWriterBuilder}; +use crate::source::generator::new_generator; +use crate::source::pulsar::new_pulsar_source; +use crate::source::user_defined::new_source; +use crate::source::Source; +use crate::transformer::Transformer; +use crate::{config, error, metrics, source}; + +/// Creates a sink writer based on the configuration +pub(crate) async fn create_sink_writer( + batch_size: usize, + read_timeout: Duration, + primary_sink: SinkConfig, + fallback_sink: Option, + cln_token: &CancellationToken, +) -> error::Result<( + SinkWriter, + Option>, + Option>, +)> { + let (sink_writer_builder, sink_rpc_client) = match primary_sink.sink_type.clone() { + SinkType::Log(_) => ( + SinkWriterBuilder::new(batch_size, read_timeout, SinkClientType::Log), + None, + ), + SinkType::Blackhole(_) => ( + SinkWriterBuilder::new(batch_size, read_timeout, SinkClientType::Blackhole), + None, + ), + SinkType::UserDefined(ud_config) => { + let sink_server_info = + sdk_server_info(ud_config.server_info_path.clone().into(), cln_token.clone()) + .await?; + + let metric_labels = metrics::sdk_info_labels( + config::get_component_type().to_string(), + config::get_vertex_name().to_string(), + sink_server_info.language, + sink_server_info.version, + ContainerType::Sourcer.to_string(), + ); + + metrics::global_metrics() + .sdk_info + .get_or_create(&metric_labels) + .set(1); + + let mut sink_grpc_client = SinkClient::new( + grpc::create_rpc_channel(ud_config.socket_path.clone().into()).await?, + ) + .max_encoding_message_size(ud_config.grpc_max_message_size) + .max_encoding_message_size(ud_config.grpc_max_message_size); + grpc::wait_until_sink_ready(cln_token, &mut sink_grpc_client).await?; + ( + SinkWriterBuilder::new( + batch_size, + read_timeout, + SinkClientType::UserDefined(sink_grpc_client.clone()), + ) + .retry_config(primary_sink.retry_config.unwrap_or_default()), + Some(sink_grpc_client), + ) + } + }; + + if let Some(fb_sink) = fallback_sink { + return match fb_sink.sink_type.clone() { + SinkType::Log(_) => Ok(( + sink_writer_builder + .fb_sink_client(SinkClientType::Log) + .build() + .await?, + sink_rpc_client.clone(), + None, + )), + SinkType::Blackhole(_) => Ok(( + sink_writer_builder + .fb_sink_client(SinkClientType::Blackhole) + .build() + .await?, + sink_rpc_client.clone(), + None, + )), + SinkType::UserDefined(ud_config) => { + let fb_server_info = + sdk_server_info(ud_config.server_info_path.clone().into(), cln_token.clone()) + .await?; + + let metric_labels = metrics::sdk_info_labels( + config::get_component_type().to_string(), + config::get_vertex_name().to_string(), + fb_server_info.language, + fb_server_info.version, + ContainerType::Sourcer.to_string(), + ); + + metrics::global_metrics() + .sdk_info + .get_or_create(&metric_labels) + .set(1); + + let mut sink_grpc_client = SinkClient::new( + grpc::create_rpc_channel(ud_config.socket_path.clone().into()).await?, + ) + .max_encoding_message_size(ud_config.grpc_max_message_size) + .max_encoding_message_size(ud_config.grpc_max_message_size); + grpc::wait_until_sink_ready(cln_token, &mut sink_grpc_client).await?; + + Ok(( + sink_writer_builder + .fb_sink_client(SinkClientType::UserDefined(sink_grpc_client.clone())) + .build() + .await?, + sink_rpc_client.clone(), + Some(sink_grpc_client), + )) + } + }; + } + Ok((sink_writer_builder.build().await?, sink_rpc_client, None)) +} + +/// Creates a transformer if it is configured +pub async fn create_transformer( + batch_size: usize, + transformer_config: Option, + cln_token: CancellationToken, +) -> error::Result<(Option, Option>)> { + if let Some(transformer_config) = transformer_config { + if let config::components::transformer::TransformerType::UserDefined(ud_transformer) = + &transformer_config.transformer_type + { + let server_info = sdk_server_info( + ud_transformer.server_info_path.clone().into(), + cln_token.clone(), + ) + .await?; + let metric_labels = metrics::sdk_info_labels( + config::get_component_type().to_string(), + config::get_vertex_name().to_string(), + server_info.language, + server_info.version, + ContainerType::Sourcer.to_string(), + ); + metrics::global_metrics() + .sdk_info + .get_or_create(&metric_labels) + .set(1); + + let mut transformer_grpc_client = SourceTransformClient::new( + grpc::create_rpc_channel(ud_transformer.socket_path.clone().into()).await?, + ) + .max_encoding_message_size(ud_transformer.grpc_max_message_size) + .max_encoding_message_size(ud_transformer.grpc_max_message_size); + grpc::wait_until_transformer_ready(&cln_token, &mut transformer_grpc_client).await?; + return Ok(( + Some( + Transformer::new( + batch_size, + transformer_config.concurrency, + transformer_grpc_client.clone(), + ) + .await?, + ), + Some(transformer_grpc_client), + )); + } + } + Ok((None, None)) +} + +/// Creates a source type based on the configuration +pub async fn create_source( + batch_size: usize, + read_timeout: Duration, + source_config: &SourceConfig, + cln_token: CancellationToken, +) -> error::Result<(Source, Option>)> { + match &source_config.source_type { + SourceType::Generator(generator_config) => { + let (generator_read, generator_ack, generator_lag) = + new_generator(generator_config.clone(), batch_size)?; + Ok(( + Source::new( + batch_size, + source::SourceType::Generator(generator_read, generator_ack, generator_lag), + ), + None, + )) + } + SourceType::UserDefined(udsource_config) => { + let server_info = sdk_server_info( + udsource_config.server_info_path.clone().into(), + cln_token.clone(), + ) + .await?; + + let metric_labels = metrics::sdk_info_labels( + config::get_component_type().to_string(), + config::get_vertex_name().to_string(), + server_info.language, + server_info.version, + ContainerType::Sourcer.to_string(), + ); + metrics::global_metrics() + .sdk_info + .get_or_create(&metric_labels) + .set(1); + + // TODO: Add sdk info metric + let mut source_grpc_client = SourceClient::new( + grpc::create_rpc_channel(udsource_config.socket_path.clone().into()).await?, + ) + .max_encoding_message_size(udsource_config.grpc_max_message_size) + .max_encoding_message_size(udsource_config.grpc_max_message_size); + grpc::wait_until_source_ready(&cln_token, &mut source_grpc_client).await?; + let (ud_read, ud_ack, ud_lag) = + new_source(source_grpc_client.clone(), batch_size, read_timeout).await?; + Ok(( + Source::new( + batch_size, + source::SourceType::UserDefinedSource(ud_read, ud_ack, ud_lag), + ), + Some(source_grpc_client), + )) + } + SourceType::Pulsar(pulsar_config) => { + let pulsar = new_pulsar_source(pulsar_config.clone(), batch_size, read_timeout).await?; + Ok(( + Source::new(batch_size, source::SourceType::Pulsar(pulsar)), + None, + )) + } + } +} + +// Retrieve value from mounted secret volume +// "/var/numaflow/secrets/${secretRef.name}/${secretRef.key}" is expected to be the file path +pub(crate) fn get_secret_from_volume(name: &str, key: &str) -> Result { + let path = format!("/var/numaflow/secrets/{name}/{key}"); + let val = std::fs::read_to_string(path.clone()) + .map_err(|e| format!("Reading secret from file {path}: {e:?}"))?; + Ok(val.trim().into()) +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use numaflow::source::{Message, Offset, SourceReadRequest}; + use numaflow::{sink, source, sourcetransform}; + use numaflow_pb::clients::sink::sink_client::SinkClient; + use numaflow_pb::clients::source::source_client::SourceClient; + use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; + use tokio::sync::mpsc; + use tokio::sync::mpsc::Sender; + use tokio::time::sleep; + use tokio_util::sync::CancellationToken; + + use crate::shared::grpc::{ + create_rpc_channel, wait_until_sink_ready, wait_until_source_ready, + wait_until_transformer_ready, + }; + + struct SimpleSource {} + + #[tonic::async_trait] + impl source::Sourcer for SimpleSource { + async fn read(&self, _request: SourceReadRequest, _transmitter: Sender) {} + + async fn ack(&self, _offset: Vec) {} + + async fn pending(&self) -> usize { + 0 + } + + async fn partitions(&self) -> Option> { + Some(vec![0]) + } + } + + struct SimpleTransformer; + #[tonic::async_trait] + impl sourcetransform::SourceTransformer for SimpleTransformer { + async fn transform( + &self, + _input: sourcetransform::SourceTransformRequest, + ) -> Vec { + vec![] + } + } + + struct InMemorySink {} + + #[tonic::async_trait] + impl sink::Sinker for InMemorySink { + async fn sink(&self, mut _input: mpsc::Receiver) -> Vec { + vec![] + } + } + + #[tokio::test] + async fn test_wait_until_ready() { + // Start the source server + let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let source_sock_file = tmp_dir.path().join("source.sock"); + let server_info_file = tmp_dir.path().join("source-server-info"); + + let server_info = server_info_file.clone(); + let source_socket = source_sock_file.clone(); + let source_server_handle = tokio::spawn(async move { + source::Server::new(SimpleSource {}) + .with_socket_file(source_socket) + .with_server_info_file(server_info) + .start_with_shutdown(source_shutdown_rx) + .await + .unwrap(); + }); + + // Start the sink server + let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); + let sink_tmp_dir = tempfile::TempDir::new().unwrap(); + let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); + let server_info_file = sink_tmp_dir.path().join("sink-server-info"); + + let server_info = server_info_file.clone(); + let sink_socket = sink_sock_file.clone(); + let sink_server_handle = tokio::spawn(async move { + sink::Server::new(InMemorySink {}) + .with_socket_file(sink_socket) + .with_server_info_file(server_info) + .start_with_shutdown(sink_shutdown_rx) + .await + .unwrap(); + }); + + // Start the transformer server + let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let transformer_sock_file = tmp_dir.path().join("transformer.sock"); + let server_info_file = tmp_dir.path().join("transformer-server-info"); + + let server_info = server_info_file.clone(); + let transformer_socket = transformer_sock_file.clone(); + let transformer_server_handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer {}) + .with_socket_file(transformer_socket) + .with_server_info_file(server_info) + .start_with_shutdown(transformer_shutdown_rx) + .await + .unwrap(); + }); + + // Wait for the servers to start + sleep(Duration::from_millis(100)).await; + + let cln_token = CancellationToken::new(); + + let mut source_grpc_client = + SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()); + wait_until_source_ready(&cln_token, &mut source_grpc_client) + .await + .unwrap(); + + let mut sink_grpc_client = + SinkClient::new(create_rpc_channel(sink_sock_file.clone()).await.unwrap()); + wait_until_sink_ready(&cln_token, &mut sink_grpc_client) + .await + .unwrap(); + + let mut transformer_grpc_client = Some(SourceTransformClient::new( + create_rpc_channel(transformer_sock_file.clone()) + .await + .unwrap(), + )); + wait_until_transformer_ready(&cln_token, transformer_grpc_client.as_mut().unwrap()) + .await + .unwrap(); + + source_shutdown_tx.send(()).unwrap(); + sink_shutdown_tx.send(()).unwrap(); + transformer_shutdown_tx.send(()).unwrap(); + + source_server_handle.await.unwrap(); + sink_server_handle.await.unwrap(); + transformer_server_handle.await.unwrap(); + } +} diff --git a/rust/numaflow-core/src/shared/grpc.rs b/rust/numaflow-core/src/shared/grpc.rs new file mode 100644 index 000000000..d6246b60a --- /dev/null +++ b/rust/numaflow-core/src/shared/grpc.rs @@ -0,0 +1,127 @@ +use std::path::PathBuf; +use std::time::Duration; + +use axum::http::Uri; +use backoff::retry::Retry; +use backoff::strategy::fixed; +use chrono::{DateTime, TimeZone, Timelike, Utc}; +use numaflow_pb::clients::sink::sink_client::SinkClient; +use numaflow_pb::clients::source::source_client::SourceClient; +use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; +use prost_types::Timestamp; +use tokio::net::UnixStream; +use tokio::time::sleep; +use tokio_util::sync::CancellationToken; +use tonic::transport::{Channel, Endpoint}; +use tonic::Request; +use tower::service_fn; +use tracing::info; + +use crate::error; +use crate::error::Error; + +/// Waits until the source server is ready, by doing health checks +pub(crate) async fn wait_until_source_ready( + cln_token: &CancellationToken, + client: &mut SourceClient, +) -> error::Result<()> { + info!("Waiting for source client to be ready..."); + loop { + if cln_token.is_cancelled() { + return Err(Error::Forwarder( + "Cancellation token is cancelled".to_string(), + )); + } + match client.is_ready(Request::new(())).await { + Ok(_) => break, + Err(_) => sleep(Duration::from_secs(1)).await, + } + info!("Waiting for source client to be ready..."); + } + Ok(()) +} + +/// Waits until the sink server is ready, by doing health checks +pub(crate) async fn wait_until_sink_ready( + cln_token: &CancellationToken, + client: &mut SinkClient, +) -> error::Result<()> { + loop { + if cln_token.is_cancelled() { + return Err(Error::Forwarder( + "Cancellation token is cancelled".to_string(), + )); + } + match client.is_ready(Request::new(())).await { + Ok(_) => break, + Err(_) => sleep(Duration::from_secs(1)).await, + } + info!("Waiting for sink client to be ready..."); + } + Ok(()) +} + +/// Waits until the transformer server is ready, by doing health checks +pub(crate) async fn wait_until_transformer_ready( + cln_token: &CancellationToken, + client: &mut SourceTransformClient, +) -> error::Result<()> { + loop { + if cln_token.is_cancelled() { + return Err(Error::Forwarder( + "Cancellation token is cancelled".to_string(), + )); + } + match client.is_ready(Request::new(())).await { + Ok(_) => break, + Err(_) => sleep(Duration::from_secs(1)).await, + } + info!("Waiting for transformer client to be ready..."); + } + Ok(()) +} + +pub(crate) fn prost_timestamp_from_utc(t: DateTime) -> Option { + Some(Timestamp { + seconds: t.timestamp(), + nanos: t.nanosecond() as i32, + }) +} + +pub(crate) async fn create_rpc_channel(socket_path: PathBuf) -> error::Result { + const RECONNECT_INTERVAL: u64 = 1000; + const MAX_RECONNECT_ATTEMPTS: usize = 5; + + let interval = fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); + + let channel = Retry::retry( + interval, + || async { connect_with_uds(socket_path.clone()).await }, + |_: &Error| true, + ) + .await?; + Ok(channel) +} + +/// Connects to the UDS socket and returns a channel +pub(crate) async fn connect_with_uds(uds_path: PathBuf) -> error::Result { + let channel = Endpoint::try_from("http://[::]:50051") + .map_err(|e| Error::Connection(format!("Failed to create endpoint: {:?}", e)))? + .connect_with_connector(service_fn(move |_: Uri| { + let uds_socket = uds_path.clone(); + async move { + Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( + UnixStream::connect(uds_socket).await?, + )) + } + })) + .await + .map_err(|e| Error::Connection(format!("Failed to connect: {:?}", e)))?; + Ok(channel) +} + +pub(crate) fn utc_from_timestamp(t: Option) -> DateTime { + t.map_or(Utc.timestamp_nanos(-1), |t| { + DateTime::from_timestamp(t.seconds, t.nanos as u32).unwrap_or(Utc.timestamp_nanos(-1)) + }) +} diff --git a/rust/numaflow-core/src/shared/metrics.rs b/rust/numaflow-core/src/shared/metrics.rs new file mode 100644 index 000000000..0fe06e05d --- /dev/null +++ b/rust/numaflow-core/src/shared/metrics.rs @@ -0,0 +1,44 @@ +use std::net::SocketAddr; +use std::time::Duration; + +use tokio::task::JoinHandle; +use tracing::error; + +use crate::config::components::metrics::MetricsConfig; +use crate::metrics::{ + start_metrics_https_server, PendingReader, PendingReaderBuilder, UserDefinedContainerState, +}; +use crate::source::Source; + +/// Starts the metrics server +pub(crate) async fn start_metrics_server( + metrics_config: MetricsConfig, + metrics_state: UserDefinedContainerState, +) -> JoinHandle<()> { + tokio::spawn(async move { + // Start the metrics server, which server the prometheus metrics. + let metrics_addr: SocketAddr = + format!("0.0.0.0:{}", metrics_config.metrics_server_listen_port) + .parse() + .expect("Invalid address"); + + if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { + error!("metrics server error: {:?}", e); + } + }) +} + +/// Creates a pending reader +pub(crate) async fn create_pending_reader( + metrics_config: &MetricsConfig, + lag_reader_grpc_client: Source, +) -> PendingReader { + PendingReaderBuilder::new(lag_reader_grpc_client) + .lag_checking_interval(Duration::from_secs( + metrics_config.lag_check_interval_in_secs.into(), + )) + .refresh_interval(Duration::from_secs( + metrics_config.lag_refresh_interval_in_secs.into(), + )) + .build() +} diff --git a/rust/numaflow-core/src/shared/server_info.rs b/rust/numaflow-core/src/shared/server_info.rs index 9e7cf0b04..40ec6b37d 100644 --- a/rust/numaflow-core/src/shared/server_info.rs +++ b/rust/numaflow-core/src/shared/server_info.rs @@ -88,7 +88,7 @@ pub(crate) struct ServerInfo { pub(crate) metadata: Option>, // Metadata is optional } -/// check_for_server_compatibility waits until the server info file is ready and check whether the +/// sdk_server_info waits until the server info file is ready and check whether the /// server is compatible with Numaflow. pub(crate) async fn sdk_server_info( file_path: PathBuf, diff --git a/rust/numaflow-core/src/shared/utils.rs b/rust/numaflow-core/src/shared/utils.rs deleted file mode 100644 index c035c67fc..000000000 --- a/rust/numaflow-core/src/shared/utils.rs +++ /dev/null @@ -1,354 +0,0 @@ -use std::net::SocketAddr; -use std::path::PathBuf; -use std::time::Duration; - -use axum::http::Uri; -use backoff::retry::Retry; -use backoff::strategy::fixed; -use chrono::{DateTime, TimeZone, Timelike, Utc}; -use numaflow_pb::clients::sink::sink_client::SinkClient; -use numaflow_pb::clients::source::source_client::SourceClient; -use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; -use prost_types::Timestamp; -use tokio::net::UnixStream; -use tokio::task::JoinHandle; -use tokio::time::sleep; -use tokio_util::sync::CancellationToken; -use tonic::transport::{Channel, Endpoint}; -use tonic::Request; -use tower::service_fn; -use tracing::info; - -use crate::config::components::metrics::MetricsConfig; -use crate::config::components::sink::SinkType; -use crate::config::monovertex::MonovertexConfig; -use crate::error; -use crate::metrics::{ - start_metrics_https_server, PendingReader, PendingReaderBuilder, UserDefinedContainerState, -}; -use crate::shared::server_info::sdk_server_info; -use crate::sink::{SinkClientType, SinkHandle}; -use crate::source::SourceHandle; -use crate::Error; -use crate::Result; - -pub(crate) async fn start_metrics_server( - metrics_config: MetricsConfig, - metrics_state: UserDefinedContainerState, -) -> JoinHandle<()> { - tokio::spawn(async move { - // Start the metrics server, which server the prometheus metrics. - let metrics_addr: SocketAddr = - format!("0.0.0.0:{}", metrics_config.metrics_server_listen_port) - .parse() - .expect("Invalid address"); - - if let Err(e) = start_metrics_https_server(metrics_addr, metrics_state).await { - error!("metrics server error: {:?}", e); - } - }) -} - -pub(crate) async fn create_pending_reader( - mvtx_config: &MonovertexConfig, - lag_reader_grpc_client: SourceHandle, -) -> PendingReader { - PendingReaderBuilder::new( - mvtx_config.name.clone(), - mvtx_config.replica, - lag_reader_grpc_client, - ) - .lag_checking_interval(Duration::from_secs( - mvtx_config.metrics_config.lag_check_interval_in_secs.into(), - )) - .refresh_interval(Duration::from_secs( - mvtx_config - .metrics_config - .lag_refresh_interval_in_secs - .into(), - )) - .build() -} -pub(crate) async fn wait_until_source_ready( - cln_token: &CancellationToken, - client: &mut SourceClient, -) -> Result<()> { - info!("Waiting for source client to be ready..."); - loop { - if cln_token.is_cancelled() { - return Err(Error::Forwarder( - "Cancellation token is cancelled".to_string(), - )); - } - match client.is_ready(Request::new(())).await { - Ok(_) => break, - Err(_) => sleep(Duration::from_secs(1)).await, - } - info!("Waiting for source client to be ready..."); - } - Ok(()) -} - -pub(crate) async fn wait_until_sink_ready( - cln_token: &CancellationToken, - client: &mut SinkClient, -) -> Result<()> { - loop { - if cln_token.is_cancelled() { - return Err(Error::Forwarder( - "Cancellation token is cancelled".to_string(), - )); - } - match client.is_ready(Request::new(())).await { - Ok(_) => break, - Err(_) => sleep(Duration::from_secs(1)).await, - } - info!("Waiting for sink client to be ready..."); - } - Ok(()) -} - -pub(crate) async fn wait_until_transformer_ready( - cln_token: &CancellationToken, - client: &mut SourceTransformClient, -) -> Result<()> { - loop { - if cln_token.is_cancelled() { - return Err(Error::Forwarder( - "Cancellation token is cancelled".to_string(), - )); - } - match client.is_ready(Request::new(())).await { - Ok(_) => break, - Err(_) => sleep(Duration::from_secs(1)).await, - } - info!("Waiting for transformer client to be ready..."); - } - Ok(()) -} - -pub(crate) fn utc_from_timestamp(t: Option) -> DateTime { - t.map_or(Utc.timestamp_nanos(-1), |t| { - DateTime::from_timestamp(t.seconds, t.nanos as u32).unwrap_or(Utc.timestamp_nanos(-1)) - }) -} - -pub(crate) fn prost_timestamp_from_utc(t: DateTime) -> Option { - Some(Timestamp { - seconds: t.timestamp(), - nanos: t.nanosecond() as i32, - }) -} - -pub(crate) async fn create_rpc_channel(socket_path: PathBuf) -> Result { - const RECONNECT_INTERVAL: u64 = 1000; - const MAX_RECONNECT_ATTEMPTS: usize = 5; - - let interval = fixed::Interval::from_millis(RECONNECT_INTERVAL).take(MAX_RECONNECT_ATTEMPTS); - - let channel = Retry::retry( - interval, - || async { connect_with_uds(socket_path.clone()).await }, - |_: &Error| true, - ) - .await?; - Ok(channel) -} - -pub(crate) async fn connect_with_uds(uds_path: PathBuf) -> Result { - let channel = Endpoint::try_from("http://[::]:50051") - .map_err(|e| Error::Connection(format!("Failed to create endpoint: {:?}", e)))? - .connect_with_connector(service_fn(move |_: Uri| { - let uds_socket = uds_path.clone(); - async move { - Ok::<_, std::io::Error>(hyper_util::rt::TokioIo::new( - UnixStream::connect(uds_socket).await?, - )) - } - })) - .await - .map_err(|e| Error::Connection(format!("Failed to connect: {:?}", e)))?; - Ok(channel) -} - -pub(crate) async fn create_sink_handle( - batch_size: usize, - sink_type: &SinkType, - cln_token: &CancellationToken, -) -> Result<(SinkHandle, Option>)> { - match sink_type { - SinkType::Log(_) => Ok(( - SinkHandle::new(SinkClientType::Log, batch_size).await?, - None, - )), - SinkType::Blackhole(_) => Ok(( - SinkHandle::new(SinkClientType::Blackhole, batch_size).await?, - None, - )), - SinkType::UserDefined(ud_config) => { - _ = sdk_server_info(ud_config.server_info_path.clone().into(), cln_token.clone()) - .await?; - let mut sink_grpc_client = - SinkClient::new(create_rpc_channel(ud_config.socket_path.clone().into()).await?) - .max_encoding_message_size(ud_config.grpc_max_message_size) - .max_encoding_message_size(ud_config.grpc_max_message_size); - wait_until_sink_ready(cln_token, &mut sink_grpc_client).await?; - // TODO: server info? - - Ok(( - SinkHandle::new( - SinkClientType::UserDefined(sink_grpc_client.clone()), - batch_size, - ) - .await?, - Some(sink_grpc_client), - )) - } - } -} - -// Retrieve value from mounted secret volume -// "/var/numaflow/secrets/${secretRef.name}/${secretRef.key}" is expected to be the file path -pub(crate) fn get_secret_from_volume(name: &str, key: &str) -> std::result::Result { - let path = format!("/var/numaflow/secrets/{name}/{key}"); - let val = std::fs::read_to_string(path.clone()) - .map_err(|e| format!("Reading secret from file {path}: {e:?}"))?; - Ok(val.trim().into()) -} - -#[cfg(test)] -mod tests { - use numaflow::source::{Message, Offset, SourceReadRequest}; - use numaflow::{sink, source, sourcetransform}; - use tokio::sync::mpsc; - use tokio::sync::mpsc::Sender; - use tokio_util::sync::CancellationToken; - - use super::*; - use crate::shared::utils::create_rpc_channel; - - struct SimpleSource {} - - #[tonic::async_trait] - impl source::Sourcer for SimpleSource { - async fn read(&self, _request: SourceReadRequest, _transmitter: Sender) {} - - async fn ack(&self, _offset: Vec) {} - - async fn pending(&self) -> usize { - 0 - } - - async fn partitions(&self) -> Option> { - Some(vec![0]) - } - } - - struct SimpleTransformer; - #[tonic::async_trait] - impl sourcetransform::SourceTransformer for SimpleTransformer { - async fn transform( - &self, - _input: sourcetransform::SourceTransformRequest, - ) -> Vec { - vec![] - } - } - - struct InMemorySink {} - - #[tonic::async_trait] - impl sink::Sinker for InMemorySink { - async fn sink(&self, mut _input: mpsc::Receiver) -> Vec { - vec![] - } - } - - #[tokio::test] - async fn test_wait_until_ready() { - // Start the source server - let (source_shutdown_tx, source_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let source_sock_file = tmp_dir.path().join("source.sock"); - let server_info_file = tmp_dir.path().join("source-server-info"); - - let server_info = server_info_file.clone(); - let source_socket = source_sock_file.clone(); - let source_server_handle = tokio::spawn(async move { - source::Server::new(SimpleSource {}) - .with_socket_file(source_socket) - .with_server_info_file(server_info) - .start_with_shutdown(source_shutdown_rx) - .await - .unwrap(); - }); - - // Start the sink server - let (sink_shutdown_tx, sink_shutdown_rx) = tokio::sync::oneshot::channel(); - let sink_tmp_dir = tempfile::TempDir::new().unwrap(); - let sink_sock_file = sink_tmp_dir.path().join("sink.sock"); - let server_info_file = sink_tmp_dir.path().join("sink-server-info"); - - let server_info = server_info_file.clone(); - let sink_socket = sink_sock_file.clone(); - let sink_server_handle = tokio::spawn(async move { - sink::Server::new(InMemorySink {}) - .with_socket_file(sink_socket) - .with_server_info_file(server_info) - .start_with_shutdown(sink_shutdown_rx) - .await - .unwrap(); - }); - - // Start the transformer server - let (transformer_shutdown_tx, transformer_shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = tempfile::TempDir::new().unwrap(); - let transformer_sock_file = tmp_dir.path().join("transformer.sock"); - let server_info_file = tmp_dir.path().join("transformer-server-info"); - - let server_info = server_info_file.clone(); - let transformer_socket = transformer_sock_file.clone(); - let transformer_server_handle = tokio::spawn(async move { - sourcetransform::Server::new(SimpleTransformer {}) - .with_socket_file(transformer_socket) - .with_server_info_file(server_info) - .start_with_shutdown(transformer_shutdown_rx) - .await - .unwrap(); - }); - - // Wait for the servers to start - sleep(Duration::from_millis(100)).await; - - let cln_token = CancellationToken::new(); - - let mut source_grpc_client = - SourceClient::new(create_rpc_channel(source_sock_file.clone()).await.unwrap()); - wait_until_source_ready(&cln_token, &mut source_grpc_client) - .await - .unwrap(); - - let mut sink_grpc_client = - SinkClient::new(create_rpc_channel(sink_sock_file.clone()).await.unwrap()); - wait_until_sink_ready(&cln_token, &mut sink_grpc_client) - .await - .unwrap(); - - let mut transformer_grpc_client = Some(SourceTransformClient::new( - create_rpc_channel(transformer_sock_file.clone()) - .await - .unwrap(), - )); - wait_until_transformer_ready(&cln_token, transformer_grpc_client.as_mut().unwrap()) - .await - .unwrap(); - - source_shutdown_tx.send(()).unwrap(); - sink_shutdown_tx.send(()).unwrap(); - transformer_shutdown_tx.send(()).unwrap(); - - source_server_handle.await.unwrap(); - sink_server_handle.await.unwrap(); - transformer_server_handle.await.unwrap(); - } -} diff --git a/rust/numaflow-core/src/sink.rs b/rust/numaflow-core/src/sink.rs index b289dc94c..144cefef1 100644 --- a/rust/numaflow-core/src/sink.rs +++ b/rust/numaflow-core/src/sink.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::time::Duration; use numaflow_pb::clients::sink::sink_client::SinkClient; use tokio::sync::mpsc::Receiver; @@ -6,14 +7,14 @@ use tokio::sync::{mpsc, oneshot}; use tokio::task::JoinHandle; use tokio::time::sleep; use tokio::{pin, time}; +use tokio_stream::wrappers::ReceiverStream; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; use tonic::transport::Channel; -use tracing::{debug, error, warn}; +use tracing::{debug, error, info, warn}; use user_defined::UserDefinedSink; use crate::config::components::sink::{OnFailureStrategy, RetryConfig}; -use crate::config::pipeline::SinkVtxConfig; use crate::error::Error; use crate::message::{Message, ReadAck, ReadMessage, ResponseFromSink, ResponseStatusFromSink}; use crate::Result; @@ -35,6 +36,7 @@ pub(crate) trait LocalSink { async fn sink(&mut self, messages: Vec) -> Result>; } +/// ActorMessage is a message that is sent to the SinkActor. enum ActorMessage { Sink { messages: Vec, @@ -42,8 +44,9 @@ enum ActorMessage { }, } +/// SinkActor is an actor that handles messages sent to the Sink. struct SinkActor { - actor_messages: mpsc::Receiver, + actor_messages: Receiver, sink: T, } @@ -51,7 +54,7 @@ impl SinkActor where T: Sink, { - fn new(actor_messages: mpsc::Receiver, sink: T) -> Self { + fn new(actor_messages: Receiver, sink: T) -> Self { Self { actor_messages, sink, @@ -71,21 +74,57 @@ where } } -#[derive(Clone)] -pub(crate) struct SinkHandle { - sender: mpsc::Sender, -} - pub(crate) enum SinkClientType { Log, Blackhole, UserDefined(SinkClient), } -impl SinkHandle { - pub(crate) async fn new(sink_client: SinkClientType, batch_size: usize) -> Result { - let (sender, receiver) = mpsc::channel(batch_size); - match sink_client { +/// SinkWriter is a writer that writes messages to the Sink. +#[derive(Clone)] +pub(super) struct SinkWriter { + batch_size: usize, + chunk_timeout: Duration, + retry_config: RetryConfig, + sink_handle: mpsc::Sender, + fb_sink_handle: Option>, +} + +/// SinkWriterBuilder is a builder to build a SinkWriter. +pub struct SinkWriterBuilder { + batch_size: usize, + chunk_timeout: Duration, + retry_config: RetryConfig, + sink_client: SinkClientType, + fb_sink_client: Option, +} + +impl SinkWriterBuilder { + pub fn new(batch_size: usize, chunk_timeout: Duration, sink_type: SinkClientType) -> Self { + Self { + batch_size, + chunk_timeout, + retry_config: RetryConfig::default(), + sink_client: sink_type, + fb_sink_client: None, + } + } + + pub fn retry_config(mut self, retry_config: RetryConfig) -> Self { + self.retry_config = retry_config; + self + } + + pub fn fb_sink_client(mut self, fb_sink_client: SinkClientType) -> Self { + self.fb_sink_client = Some(fb_sink_client); + self + } + + /// Build the SinkWriter, it also starts the SinkActor to handle messages. + pub async fn build(self) -> Result { + let (sender, receiver) = mpsc::channel(self.batch_size); + + match self.sink_client { SinkClientType::Log => { let log_sink = log::LogSink; tokio::spawn(async { @@ -114,87 +153,140 @@ impl SinkHandle { }); } }; - Ok(Self { sender }) + + let fb_sink_handle = if let Some(fb_sink_client) = self.fb_sink_client { + let (fb_sender, fb_receiver) = mpsc::channel(self.batch_size); + match fb_sink_client { + SinkClientType::Log => { + let log_sink = log::LogSink; + tokio::spawn(async { + let mut actor = SinkActor::new(fb_receiver, log_sink); + while let Some(msg) = actor.actor_messages.recv().await { + actor.handle_message(msg).await; + } + }); + } + SinkClientType::Blackhole => { + let blackhole_sink = blackhole::BlackholeSink; + tokio::spawn(async { + let mut actor = SinkActor::new(fb_receiver, blackhole_sink); + while let Some(msg) = actor.actor_messages.recv().await { + actor.handle_message(msg).await; + } + }); + } + SinkClientType::UserDefined(sink_client) => { + let sink = UserDefinedSink::new(sink_client).await?; + tokio::spawn(async { + let mut actor = SinkActor::new(fb_receiver, sink); + while let Some(msg) = actor.actor_messages.recv().await { + actor.handle_message(msg).await; + } + }); + } + }; + Some(fb_sender) + } else { + None + }; + + Ok(SinkWriter { + batch_size: self.batch_size, + chunk_timeout: self.chunk_timeout, + retry_config: self.retry_config, + sink_handle: sender, + fb_sink_handle, + }) } +} - pub(crate) async fn sink(&self, messages: Vec) -> Result> { +impl SinkWriter { + /// Sink the messages to the Sink. + async fn sink(&self, messages: Vec) -> Result> { let (tx, rx) = oneshot::channel(); let msg = ActorMessage::Sink { messages, respond_to: tx, }; - let _ = self.sender.send(msg).await; + let _ = self.sink_handle.send(msg).await; rx.await.unwrap() } -} -#[derive(Clone)] -pub(super) struct SinkWriter { - batch_size: usize, - read_timeout: time::Duration, - config: SinkVtxConfig, - sink_handle: SinkHandle, - fb_sink_handle: Option, -} + /// Sink the messages to the Fallback Sink. + async fn fb_sink(&self, messages: Vec) -> Result> { + if self.fb_sink_handle.is_none() { + return Err(Error::Sink( + "Response contains fallback messages but no fallback sink is configured" + .to_string(), + )); + } -impl SinkWriter { - pub(super) async fn new( - batch_size: usize, - read_timeout: time::Duration, - config: SinkVtxConfig, - sink_handle: SinkHandle, - fb_sink_handle: Option, - ) -> Result { - Ok(Self { - batch_size, - read_timeout, - config, - sink_handle, - fb_sink_handle, - }) + let (tx, rx) = oneshot::channel(); + let msg = ActorMessage::Sink { + messages, + respond_to: tx, + }; + let _ = self.fb_sink_handle.as_ref().unwrap().send(msg).await; + rx.await.unwrap() } - pub(super) async fn start( + /// Streaming write the messages to the Sink, it will keep writing messages until the stream is + /// closed or the cancellation token is triggered. + pub(super) async fn streaming_write( &self, - messages_rx: Receiver, + messages_stream: ReceiverStream, cancellation_token: CancellationToken, ) -> Result>> { let handle: JoinHandle> = tokio::spawn({ let mut this = self.clone(); async move { - let chunk_stream = tokio_stream::wrappers::ReceiverStream::new(messages_rx) - .chunks_timeout(this.batch_size, this.read_timeout); + let chunk_stream = + messages_stream.chunks_timeout(this.batch_size, this.chunk_timeout); pin!(chunk_stream); - while let Some(batch) = chunk_stream.next().await { + let mut processed_msgs_count: usize = 0; + let mut last_logged_at = std::time::Instant::now(); + + loop { + let batch = match chunk_stream.next().await { + Some(batch) => batch, + None => { + break; + } + }; + if batch.is_empty() { continue; } - let messages: Vec = - batch.iter().map(|rm| rm.message.clone()).collect(); + let n = batch.len(); + let (messages, senders): (Vec<_>, Vec<_>) = + batch.into_iter().map(|rm| (rm.message, rm.ack)).unzip(); - match this - .write_to_sink(messages, cancellation_token.clone()) - .await - { + match this.write(messages, cancellation_token.clone()).await { Ok(_) => { - for rm in batch { - let _ = rm.ack.send(ReadAck::Ack); + for sender in senders { + let _ = sender.send(ReadAck::Ack); } } Err(e) => { error!(?e, "Error writing to sink"); - for rm in batch { - let _ = rm.ack.send(ReadAck::Nak); + for sender in senders { + let _ = sender.send(ReadAck::Nak); } } } - if cancellation_token.is_cancelled() { - warn!("Cancellation token is cancelled. Exiting SinkWriter"); - break; + processed_msgs_count += n; + if last_logged_at.elapsed().as_millis() >= 1000 { + info!( + "Processed {} messages at {:?}", + processed_msgs_count, + std::time::Instant::now() + ); + processed_msgs_count = 0; + last_logged_at = std::time::Instant::now(); } } @@ -204,8 +296,8 @@ impl SinkWriter { Ok(handle) } - // Writes the messages to the sink and handles fallback messages if present - async fn write_to_sink( + /// Write the messages to the Sink. + pub(crate) async fn write( &mut self, messages: Vec, cln_token: CancellationToken, @@ -223,12 +315,7 @@ impl SinkWriter { // only breaks out of this loop based on the retry strategy unless all the messages have been written to sink // successfully. - let retry_config = &self - .config - .sink_config - .retry_config - .clone() - .unwrap_or_default(); + let retry_config = &self.retry_config.clone(); loop { while attempts < retry_config.sink_max_retry_attempts { @@ -347,11 +434,8 @@ impl SinkWriter { messages_to_send: &mut Vec, retry_config: &RetryConfig, ) -> Result { - let start_time = time::Instant::now(); - match self.sink_handle.sink(messages_to_send.clone()).await { + match self.sink(messages_to_send.clone()).await { Ok(response) => { - debug!("Sink latency - {}ms", start_time.elapsed().as_millis()); - // create a map of id to result, since there is no strict requirement // for the udsink to return the results in the same order as the requests let result_map = response @@ -385,7 +469,7 @@ impl SinkWriter { return Ok(true); } - sleep(tokio::time::Duration::from_millis( + sleep(Duration::from_millis( retry_config.sink_retry_interval_in_ms as u64, )) .await; @@ -410,7 +494,6 @@ impl SinkWriter { )); } - let fallback_client = self.fb_sink_handle.as_mut().unwrap(); let mut attempts = 0; let mut fallback_error_map = HashMap::new(); // start with the original set of message to be sent. @@ -426,8 +509,8 @@ impl SinkWriter { let sleep_interval = default_retry.interval.unwrap(); while attempts < max_attempts { - let start_time = tokio::time::Instant::now(); - match fallback_client.sink(messages_to_send.clone()).await { + let start_time = time::Instant::now(); + match self.fb_sink(messages_to_send.clone()).await { Ok(fb_response) => { debug!( "Fallback sink latency - {}ms", @@ -496,3 +579,274 @@ impl SinkWriter { Ok(()) } } + +impl Drop for SinkWriter { + fn drop(&mut self) {} +} + +#[cfg(test)] +mod tests { + use chrono::Utc; + use numaflow::sink; + use tokio::time::Duration; + use tokio_util::sync::CancellationToken; + + use super::*; + use crate::message::{Message, MessageID}; + use crate::shared::grpc::create_rpc_channel; + + struct SimpleSink; + #[tonic::async_trait] + impl sink::Sinker for SimpleSink { + async fn sink(&self, mut input: Receiver) -> Vec { + let mut responses: Vec = Vec::new(); + while let Some(datum) = input.recv().await { + if datum.keys.first().unwrap() == "fallback" { + responses.push(sink::Response::fallback(datum.id)); + continue; + } else if datum.keys.first().unwrap() == "error" { + responses.push(sink::Response::failure( + datum.id, + "simple error".to_string(), + )); + } else { + responses.push(sink::Response::ok(datum.id)); + } + } + responses + } + } + + #[tokio::test] + async fn test_write() { + let mut sink_writer = + SinkWriterBuilder::new(10, Duration::from_secs(1), SinkClientType::Log) + .build() + .await + .unwrap(); + + let messages: Vec = (0..5) + .map(|i| Message { + keys: vec![format!("key_{}", i)], + value: format!("message {}", i).as_bytes().to_vec().into(), + offset: None, + event_time: Utc::now(), + id: MessageID { + vertex_name: "vertex".to_string(), + offset: format!("offset_{}", i), + index: i, + }, + headers: HashMap::new(), + }) + .collect(); + + let result = sink_writer + .write(messages.clone(), CancellationToken::new()) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn test_streaming_write() { + let sink_writer = + SinkWriterBuilder::new(10, Duration::from_millis(100), SinkClientType::Log) + .build() + .await + .unwrap(); + + let messages: Vec = (0..10) + .map(|i| Message { + keys: vec![format!("key_{}", i)], + value: format!("message {}", i).as_bytes().to_vec().into(), + offset: None, + event_time: Utc::now(), + id: MessageID { + vertex_name: "vertex".to_string(), + offset: format!("offset_{}", i), + index: i, + }, + headers: HashMap::new(), + }) + .collect(); + + let (tx, rx) = mpsc::channel(10); + let mut ack_rxs = vec![]; + for msg in messages { + let (ack_tx, ack_rx) = oneshot::channel(); + let _ = tx + .send(ReadMessage { + message: msg, + ack: ack_tx, + }) + .await; + ack_rxs.push(ack_rx); + } + drop(tx); + + let handle = sink_writer + .streaming_write(ReceiverStream::new(rx), CancellationToken::new()) + .await + .unwrap(); + + let _ = handle.await.unwrap(); + for ack_rx in ack_rxs { + assert_eq!(ack_rx.await.unwrap(), ReadAck::Ack); + } + } + + #[tokio::test] + async fn test_streaming_write_error() { + // start the server + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sink.sock"); + let server_info_file = tmp_dir.path().join("sink-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + + let _server_handle = tokio::spawn(async move { + sink::Server::new(SimpleSink) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(shutdown_rx) + .await + .expect("failed to start sink server"); + }); + + // wait for the server to start + sleep(Duration::from_millis(100)).await; + + let sink_writer = SinkWriterBuilder::new( + 10, + Duration::from_millis(100), + SinkClientType::UserDefined(SinkClient::new( + create_rpc_channel(sock_file).await.unwrap(), + )), + ) + .build() + .await + .unwrap(); + + let messages: Vec = (0..10) + .map(|i| Message { + keys: vec!["error".to_string()], + value: format!("message {}", i).as_bytes().to_vec().into(), + offset: None, + event_time: Utc::now(), + id: MessageID { + vertex_name: "vertex".to_string(), + offset: format!("offset_{}", i), + index: i, + }, + headers: HashMap::new(), + }) + .collect(); + + let (tx, rx) = mpsc::channel(10); + let mut ack_rxs = vec![]; + for msg in messages { + let (ack_tx, ack_rx) = oneshot::channel(); + let _ = tx + .send(ReadMessage { + message: msg, + ack: ack_tx, + }) + .await; + ack_rxs.push(ack_rx); + } + drop(tx); + let cln_token = CancellationToken::new(); + let handle = sink_writer + .streaming_write(ReceiverStream::new(rx), cln_token.clone()) + .await + .unwrap(); + + // cancel the token after 1 second to exit from the retry loop + tokio::spawn(async move { + sleep(Duration::from_secs(1)).await; + cln_token.cancel(); + }); + + let _ = handle.await.unwrap(); + // since the writes fail, all the messages will be NAKed + for ack_rx in ack_rxs { + assert_eq!(ack_rx.await.unwrap(), ReadAck::Nak); + } + } + + #[tokio::test] + async fn test_fallback_write() { + // start the server + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sink.sock"); + let server_info_file = tmp_dir.path().join("sink-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + + let _server_handle = tokio::spawn(async move { + sink::Server::new(SimpleSink) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(shutdown_rx) + .await + .expect("failed to start sink server"); + }); + + // wait for the server to start + sleep(Duration::from_millis(100)).await; + + let sink_writer = SinkWriterBuilder::new( + 10, + Duration::from_millis(100), + SinkClientType::UserDefined(SinkClient::new( + create_rpc_channel(sock_file).await.unwrap(), + )), + ) + .fb_sink_client(SinkClientType::Log) + .build() + .await + .unwrap(); + + let messages: Vec = (0..20) + .map(|i| Message { + keys: vec!["fallback".to_string()], + value: format!("message {}", i).as_bytes().to_vec().into(), + offset: None, + event_time: Utc::now(), + id: MessageID { + vertex_name: "vertex".to_string(), + offset: format!("offset_{}", i), + index: i, + }, + headers: HashMap::new(), + }) + .collect(); + + let (tx, rx) = mpsc::channel(20); + let mut ack_rxs = vec![]; + for msg in messages { + let (ack_tx, ack_rx) = oneshot::channel(); + let _ = tx + .send(ReadMessage { + message: msg, + ack: ack_tx, + }) + .await; + ack_rxs.push(ack_rx); + } + drop(tx); + let cln_token = CancellationToken::new(); + let handle = sink_writer + .streaming_write(ReceiverStream::new(rx), cln_token.clone()) + .await + .unwrap(); + + let _ = handle.await.unwrap(); + for ack_rx in ack_rxs { + assert_eq!(ack_rx.await.unwrap(), ReadAck::Ack); + } + } +} diff --git a/rust/numaflow-core/src/sink/blackhole.rs b/rust/numaflow-core/src/sink/blackhole.rs index 41ddfd06d..d3cc7a53c 100644 --- a/rust/numaflow-core/src/sink/blackhole.rs +++ b/rust/numaflow-core/src/sink/blackhole.rs @@ -23,10 +23,8 @@ mod tests { use super::BlackholeSink; use crate::message::IntOffset; - use crate::{ - message::{Message, MessageID, Offset, ResponseFromSink, ResponseStatusFromSink}, - sink::Sink, - }; + use crate::message::{Message, MessageID, Offset, ResponseFromSink, ResponseStatusFromSink}; + use crate::sink::Sink; #[tokio::test] async fn test_black_hole() { diff --git a/rust/numaflow-core/src/sink/log.rs b/rust/numaflow-core/src/sink/log.rs index 4e53d8b79..970ab66bd 100644 --- a/rust/numaflow-core/src/sink/log.rs +++ b/rust/numaflow-core/src/sink/log.rs @@ -1,7 +1,7 @@ +use crate::sink::Sink; use crate::{ error, message::{Message, ResponseFromSink, ResponseStatusFromSink}, - sink::Sink, }; pub(crate) struct LogSink; @@ -39,10 +39,8 @@ mod tests { use super::LogSink; use crate::message::IntOffset; - use crate::{ - message::{Message, MessageID, Offset, ResponseFromSink, ResponseStatusFromSink}, - sink::Sink, - }; + use crate::message::{Message, MessageID, Offset, ResponseFromSink, ResponseStatusFromSink}; + use crate::sink::Sink; #[tokio::test] async fn test_log_sink() { diff --git a/rust/numaflow-core/src/sink/user_defined.rs b/rust/numaflow-core/src/sink/user_defined.rs index 5799291ea..81ac3d202 100644 --- a/rust/numaflow-core/src/sink/user_defined.rs +++ b/rust/numaflow-core/src/sink/user_defined.rs @@ -1,7 +1,3 @@ -use crate::message::{Message, ResponseFromSink}; -use crate::sink::Sink; -use crate::Error; -use crate::Result; use numaflow_pb::clients::sink::sink_client::SinkClient; use numaflow_pb::clients::sink::{Handshake, SinkRequest, SinkResponse, TransmissionStatus}; use tokio::sync::mpsc; @@ -9,9 +5,14 @@ use tokio_stream::wrappers::ReceiverStream; use tonic::transport::Channel; use tonic::{Request, Streaming}; +use crate::message::{Message, ResponseFromSink}; +use crate::sink::Sink; +use crate::Error; +use crate::Result; + const DEFAULT_CHANNEL_SIZE: usize = 1000; -/// User-Defined Sink code writes messages to a custom [Sink]. +/// User-Defined Sink code writes messages to a custom [SinkWriter]. pub struct UserDefinedSink { sink_tx: mpsc::Sender, resp_stream: Streaming, @@ -44,7 +45,7 @@ impl UserDefinedSink { "failed to receive handshake response".to_string(), ))?; - // Handshake cannot be None during the initial phase and it has to set `sot` to true. + // Handshake cannot be None during the initial phase, and it has to set `sot` to true. if handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(Error::Sink("invalid handshake response".to_string())); } @@ -125,7 +126,7 @@ mod tests { use super::*; use crate::error::Result; use crate::message::{Message, MessageID}; - use crate::shared::utils::create_rpc_channel; + use crate::shared::grpc::create_rpc_channel; use crate::sink::user_defined::UserDefinedSink; struct Logger; diff --git a/rust/numaflow-core/src/source.rs b/rust/numaflow-core/src/source.rs index f851268b7..b48f85222 100644 --- a/rust/numaflow-core/src/source.rs +++ b/rust/numaflow-core/src/source.rs @@ -1,9 +1,22 @@ +use numaflow_pulsar::source::PulsarSource; +use tokio::sync::{mpsc, oneshot}; +use tokio::task::JoinHandle; +use tokio::time; +use tokio_stream::wrappers::ReceiverStream; +use tokio_util::sync::CancellationToken; +use tracing::{error, info}; + +use crate::config::{get_vertex_name, is_mono_vertex}; +use crate::message::{ReadAck, ReadMessage}; +use crate::metrics::{ + monovertex_metrics, mvtx_forward_metric_labels, pipeline_forward_metric_labels, + pipeline_isb_metric_labels, pipeline_metrics, +}; +use crate::Result; use crate::{ message::{Message, Offset}, reader::LagReader, }; -use numaflow_pulsar::source::PulsarSource; -use tokio::sync::{mpsc, oneshot}; /// [User-Defined Source] extends Numaflow to add custom sources supported outside the builtins. /// @@ -26,7 +39,7 @@ pub(crate) trait SourceReader { /// Name of the source. fn name(&self) -> &'static str; - async fn read(&mut self) -> crate::Result>; + async fn read(&mut self) -> Result>; #[allow(dead_code)] /// number of partitions processed by this source. @@ -36,7 +49,21 @@ pub(crate) trait SourceReader { /// Set of Ack related items that has to be implemented to become a Source. pub(crate) trait SourceAcker { /// acknowledge an offset. The implementor might choose to do it in an asynchronous way. - async fn ack(&mut self, _: Vec) -> crate::Result<()>; + async fn ack(&mut self, _: Vec) -> Result<()>; +} + +pub(crate) enum SourceType { + UserDefinedSource( + user_defined::UserDefinedSourceRead, + user_defined::UserDefinedSourceAck, + user_defined::UserDefinedSourceLagReader, + ), + Generator( + generator::GeneratorRead, + generator::GeneratorAck, + generator::GeneratorLagReader, + ), + Pulsar(PulsarSource), } enum ActorMessage { @@ -45,14 +72,14 @@ enum ActorMessage { respond_to: oneshot::Sender<&'static str>, }, Read { - respond_to: oneshot::Sender>>, + respond_to: oneshot::Sender>>, }, Ack { - respond_to: oneshot::Sender>, + respond_to: oneshot::Sender>, offsets: Vec, }, Pending { - respond_to: oneshot::Sender>>, + respond_to: oneshot::Sender>>, }, } @@ -103,13 +130,16 @@ where } } +/// Source is used to read, ack, and get the pending messages count from the source. #[derive(Clone)] -pub(crate) struct SourceHandle { +pub(crate) struct Source { + read_batch_size: usize, sender: mpsc::Sender, } -impl SourceHandle { - pub(crate) fn new(src_type: SourceType, batch_size: usize) -> Self { +impl Source { + /// Create a new StreamingSource. It starts the read and ack actors in the background. + pub(crate) fn new(batch_size: usize, src_type: SourceType) -> Self { let (sender, receiver) = mpsc::channel(batch_size); match src_type { SourceType::UserDefinedSource(reader, acker, lag_reader) => { @@ -142,21 +172,26 @@ impl SourceHandle { }); } }; - Self { sender } + Self { + read_batch_size: batch_size, + sender, + } } - pub(crate) async fn read(&self) -> crate::Result> { + /// read messages from the source by communicating with the read actor. + async fn read(source_handle: mpsc::Sender) -> Result> { let (sender, receiver) = oneshot::channel(); let msg = ActorMessage::Read { respond_to: sender }; // Ignore send errors. If send fails, so does the recv.await below. There's no reason // to check for the same failure twice. - let _ = self.sender.send(msg).await; + let _ = source_handle.send(msg).await; receiver .await .map_err(|e| crate::error::Error::ActorPatternRecv(e.to_string()))? } - pub(crate) async fn ack(&self, offsets: Vec) -> crate::Result<()> { + /// ack the offsets by communicating with the ack actor. + async fn ack(source_handle: mpsc::Sender, offsets: Vec) -> Result<()> { let (sender, receiver) = oneshot::channel(); let msg = ActorMessage::Ack { respond_to: sender, @@ -164,13 +199,14 @@ impl SourceHandle { }; // Ignore send errors. If send fails, so does the recv.await below. There's no reason // to check for the same failure twice. - let _ = self.sender.send(msg).await; + let _ = source_handle.send(msg).await; receiver .await .map_err(|e| crate::error::Error::ActorPatternRecv(e.to_string()))? } - pub(crate) async fn pending(&self) -> crate::error::Result> { + /// get the pending messages count by communicating with the pending actor. + pub(crate) async fn pending(&self) -> Result> { let (sender, receiver) = oneshot::channel(); let msg = ActorMessage::Pending { respond_to: sender }; // Ignore send errors. If send fails, so does the recv.await below. There's no reason @@ -180,18 +216,335 @@ impl SourceHandle { .await .map_err(|e| crate::error::Error::ActorPatternRecv(e.to_string()))? } + + /// Starts streaming messages from the source. It returns a stream of messages and + /// a handle to the spawned task. + pub(crate) fn streaming_read( + &self, + cln_token: CancellationToken, + ) -> Result<(ReceiverStream, JoinHandle>)> { + let batch_size = self.read_batch_size; + let (messages_tx, messages_rx) = mpsc::channel(batch_size); + let source_handle = self.sender.clone(); + + let pipeline_labels = pipeline_forward_metric_labels("Source", Some(get_vertex_name())); + let mvtx_labels = mvtx_forward_metric_labels(); + + info!("Started streaming source with batch size: {}", batch_size); + let handle = tokio::spawn(async move { + let mut processed_msgs_count: usize = 0; + let mut last_logged_at = tokio::time::Instant::now(); + + loop { + if cln_token.is_cancelled() { + info!("Cancellation token is cancelled. Stopping the source."); + return Ok(()); + } + let permit_time = tokio::time::Instant::now(); + // Reserve the permits before invoking the read method. + let mut permit = match messages_tx.reserve_many(batch_size).await { + Ok(permit) => { + info!( + "Reserved permits for {} messages in {:?}", + batch_size, + permit_time.elapsed() + ); + permit + } + Err(e) => { + error!("Error while reserving permits: {:?}", e); + return Err(crate::error::Error::Source(e.to_string())); + } + }; + + let read_start_time = tokio::time::Instant::now(); + let messages = match Self::read(source_handle.clone()).await { + Ok(messages) => messages, + Err(e) => { + error!("Error while reading messages: {:?}", e); + return Err(e); + } + }; + let n = messages.len(); + if is_mono_vertex() { + monovertex_metrics() + .read_total + .get_or_create(mvtx_labels) + .inc_by(n as u64); + monovertex_metrics() + .read_time + .get_or_create(mvtx_labels) + .observe(read_start_time.elapsed().as_micros() as f64); + } else { + pipeline_metrics() + .forwarder + .read_total + .get_or_create(pipeline_labels) + .inc_by(n as u64); + pipeline_metrics() + .forwarder + .read_time + .get_or_create(pipeline_labels) + .observe(read_start_time.elapsed().as_micros() as f64); + } + + let mut ack_batch = Vec::with_capacity(n); + for message in messages { + let (resp_ack_tx, resp_ack_rx) = oneshot::channel(); + let offset = message.offset.clone().unwrap(); + + let read_message = ReadMessage { + message, + ack: resp_ack_tx, + }; + + // store the ack one shot in the batch to invoke ack later. + ack_batch.push((offset, resp_ack_rx)); + + match permit.next() { + Some(permit) => { + permit.send(read_message); + } + None => { + unreachable!( + "Permits should be reserved for all messages in the batch" + ); + } + } + } + + // start a background task to invoke ack on the source for the offsets that are acked. + tokio::spawn(Self::invoke_ack( + read_start_time, + source_handle.clone(), + ack_batch, + )); + + processed_msgs_count += n; + if last_logged_at.elapsed().as_secs() >= 1 { + info!( + "Processed {} messages in {:?}", + processed_msgs_count, + std::time::Instant::now() + ); + processed_msgs_count = 0; + last_logged_at = tokio::time::Instant::now(); + } + } + }); + Ok((ReceiverStream::new(messages_rx), handle)) + } + + /// Listens to the oneshot receivers and invokes ack on the source for the offsets that are acked. + async fn invoke_ack( + e2e_start_time: time::Instant, + source_handle: mpsc::Sender, + ack_rx_batch: Vec<(Offset, oneshot::Receiver)>, + ) -> Result<()> { + let n = ack_rx_batch.len(); + let mut offsets_to_ack = Vec::with_capacity(n); + + for (offset, oneshot_rx) in ack_rx_batch { + match oneshot_rx.await { + Ok(ReadAck::Ack) => { + offsets_to_ack.push(offset); + } + Ok(ReadAck::Nak) => { + error!("Nak received for offset: {:?}", offset); + } + Err(e) => { + error!( + "Error receiving ack for offset: {:?}, error: {:?}", + offset, e + ); + } + } + } + + let start = time::Instant::now(); + if !offsets_to_ack.is_empty() { + Self::ack(source_handle, offsets_to_ack).await?; + } + + if is_mono_vertex() { + monovertex_metrics() + .ack_time + .get_or_create(mvtx_forward_metric_labels()) + .observe(start.elapsed().as_micros() as f64); + + monovertex_metrics() + .ack_total + .get_or_create(mvtx_forward_metric_labels()) + .inc_by(n as u64); + + monovertex_metrics() + .e2e_time + .get_or_create(mvtx_forward_metric_labels()) + .observe(e2e_start_time.elapsed().as_micros() as f64); + } else { + pipeline_metrics() + .forwarder + .ack_time + .get_or_create(pipeline_isb_metric_labels()) + .observe(start.elapsed().as_micros() as f64); + + pipeline_metrics() + .forwarder + .ack_total + .get_or_create(pipeline_isb_metric_labels()) + .inc_by(n as u64); + + pipeline_metrics() + .forwarder + .processed_time + .get_or_create(pipeline_isb_metric_labels()) + .observe(e2e_start_time.elapsed().as_micros() as f64); + } + Ok(()) + } } -pub(crate) enum SourceType { - UserDefinedSource( - user_defined::UserDefinedSourceRead, - user_defined::UserDefinedSourceAck, - user_defined::UserDefinedSourceLagReader, - ), - Generator( - generator::GeneratorRead, - generator::GeneratorAck, - generator::GeneratorLagReader, - ), - Pulsar(PulsarSource), +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::time::Duration; + + use chrono::Utc; + use futures::StreamExt; + use numaflow::source; + use numaflow::source::{Message, Offset, SourceReadRequest}; + use numaflow_pb::clients::source::source_client::SourceClient; + use tokio::sync::mpsc::Sender; + use tokio_util::sync::CancellationToken; + + use crate::shared::grpc::create_rpc_channel; + use crate::source::user_defined::new_source; + use crate::source::{Source, SourceType}; + + struct SimpleSource { + num: usize, + sent_count: AtomicUsize, + yet_to_ack: std::sync::RwLock>, + } + + impl SimpleSource { + fn new(num: usize) -> Self { + Self { + num, + sent_count: AtomicUsize::new(0), + yet_to_ack: std::sync::RwLock::new(HashSet::new()), + } + } + } + + #[tonic::async_trait] + impl source::Sourcer for SimpleSource { + async fn read(&self, request: SourceReadRequest, transmitter: Sender) { + let event_time = Utc::now(); + let mut message_offsets = Vec::with_capacity(request.count); + + for i in 0..request.count { + if self.sent_count.load(Ordering::SeqCst) >= self.num { + return; + } + + let offset = format!("{}-{}", event_time.timestamp_nanos_opt().unwrap(), i); + transmitter + .send(Message { + value: b"hello".to_vec(), + event_time, + offset: Offset { + offset: offset.clone().into_bytes(), + partition_id: 0, + }, + keys: vec![], + headers: Default::default(), + }) + .await + .unwrap(); + message_offsets.push(offset); + self.sent_count.fetch_add(1, Ordering::SeqCst); + } + self.yet_to_ack.write().unwrap().extend(message_offsets); + } + + async fn ack(&self, offsets: Vec) { + for offset in offsets { + self.yet_to_ack + .write() + .unwrap() + .remove(&String::from_utf8(offset.offset).unwrap()); + } + } + + async fn pending(&self) -> usize { + self.yet_to_ack.read().unwrap().len() + } + + async fn partitions(&self) -> Option> { + Some(vec![1, 2]) + } + } + + #[tokio::test] + async fn test_source() { + // start the server + let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); + let tmp_dir = tempfile::TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("source.sock"); + let server_info_file = tmp_dir.path().join("source-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let server_handle = tokio::spawn(async move { + // a simple source which generates total of 100 messages + source::Server::new(SimpleSource::new(100)) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(shutdown_rx) + .await + .unwrap() + }); + + // wait for the server to start + // TODO: flaky + tokio::time::sleep(Duration::from_millis(100)).await; + + let client = SourceClient::new(create_rpc_channel(sock_file).await.unwrap()); + + let (src_read, src_ack, lag_reader) = new_source(client, 5, Duration::from_millis(1000)) + .await + .map_err(|e| panic!("failed to create source reader: {:?}", e)) + .unwrap(); + + let source = Source::new( + 5, + SourceType::UserDefinedSource(src_read, src_ack, lag_reader), + ); + + let cln_token = CancellationToken::new(); + + let (mut stream, handle) = source.streaming_read(cln_token.clone()).unwrap(); + let mut offsets = vec![]; + // we should read all the 100 messages + for _ in 0..100 { + let message = stream.next().await.unwrap(); + assert_eq!(message.message.value, "hello".as_bytes()); + offsets.push(message.message.offset.clone().unwrap()); + } + + // ack all the messages + Source::ack(source.sender.clone(), offsets).await.unwrap(); + + // since we acked all the messages, pending should be 0 + let pending = source.pending().await.unwrap(); + assert_eq!(pending, Some(0)); + + cln_token.cancel(); + let _ = handle.await.unwrap(); + drop(source); + let _ = shutdown_tx.send(()); + server_handle.await.unwrap(); + } } diff --git a/rust/numaflow-core/src/source/generator.rs b/rust/numaflow-core/src/source/generator.rs index 3c91bbf1c..22bdf94d5 100644 --- a/rust/numaflow-core/src/source/generator.rs +++ b/rust/numaflow-core/src/source/generator.rs @@ -33,10 +33,8 @@ mod stream_generator { use tracing::warn; use crate::config::components::source::GeneratorConfig; - use crate::message::{ - get_vertex_name, get_vertex_replica, Message, MessageID, Offset, StringOffset, - }; - + use crate::config::{get_vertex_name, get_vertex_replica}; + use crate::message::{Message, MessageID, Offset, StringOffset}; #[pin_project] pub(super) struct StreamGenerator { /// the content generated by Generator. diff --git a/rust/numaflow-core/src/source/pulsar.rs b/rust/numaflow-core/src/source/pulsar.rs index 6d8d1d33f..0b81f2615 100644 --- a/rust/numaflow-core/src/source/pulsar.rs +++ b/rust/numaflow-core/src/source/pulsar.rs @@ -1,9 +1,11 @@ use std::time::Duration; +use numaflow_pulsar::source::{PulsarMessage, PulsarSource, PulsarSourceConfig}; + +use crate::config::get_vertex_name; use crate::error::Error; -use crate::message::{get_vertex_name, IntOffset, Message, MessageID, Offset}; +use crate::message::{IntOffset, Message, MessageID, Offset}; use crate::source; -use numaflow_pulsar::source::{PulsarMessage, PulsarSource, PulsarSourceConfig}; impl TryFrom for Message { type Error = Error; diff --git a/rust/numaflow-core/src/source/user_defined.rs b/rust/numaflow-core/src/source/user_defined.rs index 03162b53a..b75564bfb 100644 --- a/rust/numaflow-core/src/source/user_defined.rs +++ b/rust/numaflow-core/src/source/user_defined.rs @@ -242,7 +242,7 @@ mod tests { use tokio::sync::mpsc::Sender; use super::*; - use crate::shared::utils::create_rpc_channel; + use crate::shared::grpc::create_rpc_channel; struct SimpleSource { num: usize, diff --git a/rust/numaflow-core/src/transformer.rs b/rust/numaflow-core/src/transformer.rs index af407e159..d987d6205 100644 --- a/rust/numaflow-core/src/transformer.rs +++ b/rust/numaflow-core/src/transformer.rs @@ -1,4 +1,305 @@ +use std::sync::Arc; + +use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; +use tokio::sync::{mpsc, oneshot, OwnedSemaphorePermit, Semaphore}; +use tokio::task::JoinHandle; +use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::StreamExt; +use tonic::transport::Channel; +use user_defined::ActorMessage; + +use crate::message::{ReadAck, ReadMessage}; +use crate::transformer::user_defined::UserDefinedTransformer; +use crate::Result; + /// User-Defined Transformer extends Numaflow to add custom sources supported outside the builtins. /// /// [User-Defined Transformer]: https://numaflow.numaproj.io/user-guide/sources/transformer/overview/#build-your-own-transformer pub(crate) mod user_defined; + +/// StreamingTransformer, transforms messages in a streaming fashion. +pub(crate) struct Transformer { + batch_size: usize, + sender: mpsc::Sender, + concurrency: usize, +} +impl Transformer { + pub(crate) async fn new( + batch_size: usize, + concurrency: usize, + client: SourceTransformClient, + ) -> Result { + let (sender, mut receiver) = mpsc::channel(batch_size); + let mut client = UserDefinedTransformer::new(batch_size, client).await?; + + tokio::spawn(async move { + while let Some(msg) = receiver.recv().await { + client.handle_message(msg).await; + } + }); + + Ok(Self { + batch_size, + concurrency, + sender, + }) + } + + /// Applies the transformation on the message and sends it to the next stage, it blocks if the + /// concurrency limit is reached. + pub(crate) async fn transform( + transform_handle: mpsc::Sender, + permit: OwnedSemaphorePermit, + read_msg: ReadMessage, + output_tx: mpsc::Sender, + ) -> Result<()> { + // only if we have tasks < max_concurrency + + let output_tx = output_tx.clone(); + + // invoke transformer and then wait for the one-shot + tokio::spawn(async move { + let _permit = permit; + let message = read_msg.message.clone(); + + let (sender, receiver) = oneshot::channel(); + let msg = ActorMessage::Transform { + message, + respond_to: sender, + }; + + // invoke trf + transform_handle.send(msg).await.unwrap(); + + // wait for one-shot + match receiver.await { + Ok(Ok(mut transformed_messages)) => { + // FIXME: handle the case where the transformer does flat map operation + if let Some(transformed_msg) = transformed_messages.pop() { + output_tx + .send(ReadMessage { + message: transformed_msg, + ack: read_msg.ack, + }) + .await + .unwrap(); + } + } + Err(_) | Ok(Err(_)) => { + let _ = read_msg.ack.send(ReadAck::Nak); + } + } + }); + + Ok(()) + } + + /// Starts reading messages in the form of chunks and transforms them and + /// sends them to the next stage. + pub(crate) fn transform_stream( + &self, + input_stream: ReceiverStream, + ) -> Result<(ReceiverStream, JoinHandle>)> { + let (output_tx, output_rx) = mpsc::channel(self.batch_size); + + let transform_handle = self.sender.clone(); + // FIXME: batch_size should not be used, introduce a new config called udf concurrenc + let semaphore = Arc::new(Semaphore::new(self.concurrency)); + + let handle = tokio::spawn(async move { + let mut input_stream = input_stream; + + while let Some(read_msg) = input_stream.next().await { + let permit = semaphore.clone().acquire_owned().await.unwrap(); + + Self::transform( + transform_handle.clone(), + permit, + read_msg, + output_tx.clone(), + ) + .await?; + } + Ok(()) + }); + + Ok((ReceiverStream::new(output_rx), handle)) + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use numaflow::sourcetransform; + use numaflow_pb::clients::sourcetransformer::source_transform_client::SourceTransformClient; + use tempfile::TempDir; + use tokio::sync::oneshot; + + use super::*; + use crate::message::{Message, MessageID, Offset, ReadMessage}; + use crate::shared::grpc::create_rpc_channel; + + struct SimpleTransformer; + + #[tonic::async_trait] + impl sourcetransform::SourceTransformer for SimpleTransformer { + async fn transform( + &self, + input: sourcetransform::SourceTransformRequest, + ) -> Vec { + let message = sourcetransform::Message::new(input.value, chrono::offset::Utc::now()) + .keys(input.keys); + vec![message] + } + } + + #[tokio::test] + async fn transformer_operations() -> Result<()> { + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let tmp_dir = TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sourcetransform.sock"); + let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(shutdown_rx) + .await + .expect("server failed"); + }); + + // wait for the server to start + tokio::time::sleep(Duration::from_millis(100)).await; + + let client = SourceTransformClient::new(create_rpc_channel(sock_file).await?); + let transformer = Transformer::new(500, 10, client).await?; + + let message = Message { + keys: vec!["first".into()], + value: "hello".into(), + offset: Some(Offset::String(crate::message::StringOffset::new( + "0".to_string(), + 0, + ))), + event_time: chrono::Utc::now(), + id: MessageID { + vertex_name: "vertex_name".to_string(), + offset: "0".to_string(), + index: 0, + }, + headers: Default::default(), + }; + + let (tx, _) = oneshot::channel(); + + let read_message = ReadMessage { + message: message.clone(), + ack: tx, + }; + + let (output_tx, mut output_rx) = mpsc::channel(10); + + let semaphore = Arc::new(Semaphore::new(10)); + let permit = semaphore.clone().acquire_owned().await.unwrap(); + Transformer::transform(transformer.sender.clone(), permit, read_message, output_tx).await?; + + let transformed_message = output_rx.recv().await.unwrap(); + assert_eq!(transformed_message.message.value, "hello"); + + // we need to drop the transformer, because if there are any in-flight requests + // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 + drop(transformer); + + shutdown_tx + .send(()) + .expect("failed to send shutdown signal"); + tokio::time::sleep(Duration::from_millis(50)).await; + assert!( + handle.is_finished(), + "Expected gRPC server to have shut down" + ); + Ok(()) + } + + #[tokio::test] + async fn test_transform_stream() -> Result<()> { + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let tmp_dir = TempDir::new().unwrap(); + let sock_file = tmp_dir.path().join("sourcetransform.sock"); + let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); + + let server_info = server_info_file.clone(); + let server_socket = sock_file.clone(); + let handle = tokio::spawn(async move { + sourcetransform::Server::new(SimpleTransformer) + .with_socket_file(server_socket) + .with_server_info_file(server_info) + .start_with_shutdown(shutdown_rx) + .await + .expect("server failed"); + }); + + // wait for the server to start + tokio::time::sleep(Duration::from_millis(100)).await; + + let client = SourceTransformClient::new(create_rpc_channel(sock_file).await?); + let transformer = Transformer::new(500, 10, client).await?; + + let (input_tx, input_rx) = mpsc::channel(10); + let input_stream = ReceiverStream::new(input_rx); + + for i in 0..5 { + let message = Message { + keys: vec![format!("key_{}", i)], + value: format!("value_{}", i).into(), + offset: Some(Offset::String(crate::message::StringOffset::new( + i.to_string(), + 0, + ))), + event_time: chrono::Utc::now(), + id: MessageID { + vertex_name: "vertex_name".to_string(), + offset: i.to_string(), + index: i as i32, + }, + headers: Default::default(), + }; + let (tx, _) = oneshot::channel(); + let read_message = ReadMessage { message, ack: tx }; + + input_tx.send(read_message).await.unwrap(); + } + drop(input_tx); + + let (output_stream, transform_handle) = transformer.transform_stream(input_stream)?; + + let mut output_rx = output_stream.into_inner(); + + for i in 0..5 { + let transformed_message = output_rx.recv().await.unwrap(); + assert_eq!(transformed_message.message.value, format!("value_{}", i)); + } + + // we need to drop the transformer, because if there are any in-flight requests + // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 + drop(transformer); + + shutdown_tx + .send(()) + .expect("failed to send shutdown signal"); + tokio::time::sleep(Duration::from_millis(50)).await; + assert!( + handle.is_finished(), + "Expected gRPC server to have shut down" + ); + assert!( + transform_handle.is_finished(), + "Expected transformer to have shut down" + ); + Ok(()) + } +} diff --git a/rust/numaflow-core/src/transformer/user_defined.rs b/rust/numaflow-core/src/transformer/user_defined.rs index bbacfbbfa..8ebb40971 100644 --- a/rust/numaflow-core/src/transformer/user_defined.rs +++ b/rust/numaflow-core/src/transformer/user_defined.rs @@ -1,40 +1,52 @@ use std::collections::HashMap; +use std::sync::{Arc, Mutex}; use numaflow_pb::clients::sourcetransformer::{ self, source_transform_client::SourceTransformClient, SourceTransformRequest, SourceTransformResponse, }; use tokio::sync::{mpsc, oneshot}; -use tokio::task::JoinHandle; use tokio_stream::wrappers::ReceiverStream; -use tokio_util::sync::CancellationToken; use tonic::transport::Channel; use tonic::{Request, Streaming}; -use tracing::warn; +use crate::config::get_vertex_name; use crate::error::{Error, Result}; -use crate::message::{get_vertex_name, Message, MessageID, Offset}; -use crate::shared::utils::utc_from_timestamp; +use crate::message::{Message, MessageID, Offset}; +use crate::shared::grpc::utc_from_timestamp; -const DROP: &str = "U+005C__DROP__"; +type ResponseSenderMap = + Arc>>)>>>; -/// TransformerClient is a client to interact with the transformer server. -struct SourceTransformer { - actor_messages: mpsc::Receiver, +// fields which will not be changed +struct ParentMessageInfo { + offset: Offset, + headers: HashMap, +} + +pub enum ActorMessage { + Transform { + message: Message, + respond_to: oneshot::Sender>>, + }, +} + +/// UserDefinedTransformer exposes methods to do user-defined transformations. +pub(super) struct UserDefinedTransformer { read_tx: mpsc::Sender, - resp_stream: Streaming, + senders: ResponseSenderMap, } -impl SourceTransformer { - async fn new( +impl UserDefinedTransformer { + /// Performs handshake with the server and creates a new UserDefinedTransformer. + pub(super) async fn new( batch_size: usize, mut client: SourceTransformClient, - actor_messages: mpsc::Receiver, ) -> Result { let (read_tx, read_rx) = mpsc::channel(batch_size); let read_stream = ReceiverStream::new(read_rx); - // do a handshake for read with the server before we start sending read requests + // perform handshake let handshake_request = SourceTransformRequest { request: None, handshake: Some(sourcetransformer::Handshake { sot: true }), @@ -49,184 +61,82 @@ impl SourceTransformer { .await? .into_inner(); - // first response from the server will be the handshake response. We need to check if the - // server has accepted the handshake. let handshake_response = resp_stream.message().await?.ok_or(Error::Transformer( "failed to receive handshake response".to_string(), ))?; - // handshake cannot to None during the initial phase and it has to set `sot` to true. if handshake_response.handshake.map_or(true, |h| !h.sot) { return Err(Error::Transformer("invalid handshake response".to_string())); } - Ok(Self { - actor_messages, - read_tx, - resp_stream, - }) - } + // map to track the oneshot sender for each request along with the message info + let sender_map = Arc::new(Mutex::new(HashMap::new())); - async fn handle_message(&mut self, message: ActorMessage) { - match message { - ActorMessage::Transform { - messages, - respond_to, - } => { - let result = self.transform_fn(messages).await; - let _ = respond_to.send(result); - } - } - } + let transformer = Self { + read_tx, + senders: sender_map.clone(), + }; - async fn transform_fn(&mut self, messages: Vec) -> Result> { - // fields which will not be changed - struct MessageInfo { - offset: Offset, - headers: HashMap, - } + // background task to receive responses from the server and send them to the appropriate + // oneshot sender based on the message id + tokio::spawn(Self::receive_responses(sender_map, resp_stream)); - let mut tracker: HashMap = HashMap::with_capacity(messages.len()); - for message in &messages { - tracker.insert( - message.id.to_string(), - MessageInfo { - offset: message - .offset - .clone() - .ok_or(Error::Transformer("Message offset is missing".to_string()))?, - headers: message.headers.clone(), - }, - ); - } + Ok(transformer) + } - // Cancellation token is used to cancel either sending task (if an error occurs while receiving) or receiving messages (if an error occurs on sending task) - let token = CancellationToken::new(); - - // Send transform requests to the source transformer server - let sender_task: JoinHandle> = tokio::spawn({ - let read_tx = self.read_tx.clone(); - let token = token.clone(); - async move { - for msg in messages { - let result = tokio::select! { - result = read_tx.send(msg.into()) => result, - _ = token.cancelled() => { - warn!("Cancellation token was cancelled while sending source transform requests"); - return Ok(()); + // receive responses from the server and gets the corresponding oneshot sender from the map + // and sends the response. + async fn receive_responses( + sender_map: ResponseSenderMap, + mut resp_stream: Streaming, + ) { + while let Some(resp) = resp_stream.message().await.unwrap() { + let msg_id = resp.id; + for (i, result) in resp.results.into_iter().enumerate() { + if let Some((msg_info, sender)) = sender_map + .lock() + .expect("map entry should always be present") + .remove(&msg_id) + { + let message = Message { + id: MessageID { + vertex_name: get_vertex_name().to_string(), + index: i as i32, + offset: msg_info.offset.to_string(), }, + keys: result.keys, + value: result.value.into(), + offset: None, + event_time: utc_from_timestamp(result.event_time), + headers: msg_info.headers.clone(), }; - - match result { - Ok(()) => continue, - Err(e) => { - token.cancel(); - return Err(Error::Transformer(e.to_string())); - } - }; + let _ = sender.send(Ok(vec![message])); } - Ok(()) - } - }); - - // Receive transformer results - let mut messages = Vec::new(); - while !tracker.is_empty() { - let resp = tokio::select! { - _ = token.cancelled() => { - break; - }, - resp = self.resp_stream.message() => {resp} - }; - - let resp = match resp { - Ok(Some(val)) => val, - Ok(None) => { - // Logging at warning level since we don't expect this to happen - warn!("Source transformer server closed its sending end of the stream. No more messages to receive"); - token.cancel(); - break; - } - Err(e) => { - token.cancel(); - return Err(Error::Transformer(format!( - "gRPC error while receiving messages from source transformer server: {e:?}" - ))); - } - }; - - let Some((_, msg_info)) = tracker.remove_entry(&resp.id) else { - token.cancel(); - return Err(Error::Transformer(format!( - "Received message with unknown ID {}", - resp.id - ))); - }; - - for (i, result) in resp.results.into_iter().enumerate() { - // TODO: Expose metrics - if result.tags.iter().any(|x| x == DROP) { - continue; - } - let message = Message { - id: MessageID { - vertex_name: get_vertex_name().to_string(), - index: i as i32, - offset: msg_info.offset.to_string(), - }, - keys: result.keys, - value: result.value.into(), - offset: None, - event_time: utc_from_timestamp(result.event_time), - headers: msg_info.headers.clone(), - }; - messages.push(message); } } - - sender_task.await.unwrap().map_err(|e| { - Error::Transformer(format!( - "Sending messages to gRPC transformer failed: {e:?}", - )) - })?; - - Ok(messages) } -} -enum ActorMessage { - Transform { - messages: Vec, - respond_to: oneshot::Sender>>, - }, -} + /// Handles the incoming message and sends it to the server for transformation. + pub(super) async fn handle_message(&mut self, message: ActorMessage) { + match message { + ActorMessage::Transform { + message, + respond_to, + } => { + let msg_id = message.id.to_string(); + let msg_info = ParentMessageInfo { + offset: message.offset.clone().unwrap(), + headers: message.headers.clone(), + }; -#[derive(Clone)] -pub(crate) struct SourceTransformHandle { - sender: mpsc::Sender, -} + self.senders + .lock() + .unwrap() + .insert(msg_id, (msg_info, respond_to)); -impl SourceTransformHandle { - pub(crate) async fn new(client: SourceTransformClient) -> Result { - let batch_size = 500; - let (sender, receiver) = mpsc::channel(batch_size); - let mut client = SourceTransformer::new(batch_size, client, receiver).await?; - tokio::spawn(async move { - while let Some(msg) = client.actor_messages.recv().await { - client.handle_message(msg).await; + self.read_tx.send(message.into()).await.unwrap(); } - }); - Ok(Self { sender }) - } - - pub(crate) async fn transform(&self, messages: Vec) -> Result> { - let (sender, receiver) = oneshot::channel(); - let msg = ActorMessage::Transform { - messages, - respond_to: sender, - }; - let _ = self.sender.send(msg).await; - receiver.await.unwrap() + } } } @@ -240,9 +150,8 @@ mod tests { use tempfile::TempDir; use crate::message::{MessageID, StringOffset}; - use crate::shared::utils::create_rpc_channel; - use crate::transformer::user_defined::SourceTransformHandle; - + use crate::shared::grpc::create_rpc_channel; + use crate::transformer::user_defined::{ActorMessage, UserDefinedTransformer}; struct NowCat; #[tonic::async_trait] @@ -279,9 +188,10 @@ mod tests { // wait for the server to start tokio::time::sleep(Duration::from_millis(100)).await; - let client = SourceTransformHandle::new(SourceTransformClient::new( - create_rpc_channel(sock_file).await?, - )) + let mut client = UserDefinedTransformer::new( + 500, + SourceTransformClient::new(create_rpc_channel(sock_file).await?), + ) .await?; let message = crate::message::Message { @@ -300,9 +210,20 @@ mod tests { headers: Default::default(), }; - let resp = - tokio::time::timeout(Duration::from_secs(2), client.transform(vec![message])).await??; - assert_eq!(resp.len(), 1); + let (tx, rx) = tokio::sync::oneshot::channel(); + + let _ = tokio::time::timeout( + Duration::from_secs(2), + client.handle_message(ActorMessage::Transform { + message, + respond_to: tx, + }), + ) + .await?; + + let messages = rx.await?; + assert!(messages.is_ok()); + assert_eq!(messages.unwrap().len(), 1); // we need to drop the client, because if there are any in-flight requests // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 @@ -318,75 +239,4 @@ mod tests { ); Ok(()) } - - struct FilterCat; - - #[tonic::async_trait] - impl sourcetransform::SourceTransformer for FilterCat { - async fn transform( - &self, - input: sourcetransform::SourceTransformRequest, - ) -> Vec { - let message = sourcetransform::Message::new(input.value, chrono::offset::Utc::now()) - .keys(input.keys) - .tags(vec![crate::transformer::user_defined::DROP.to_string()]); - vec![message] - } - } - - #[tokio::test] - async fn transformer_operations_with_drop() -> Result<(), Box> { - let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); - let tmp_dir = TempDir::new()?; - let sock_file = tmp_dir.path().join("sourcetransform.sock"); - let server_info_file = tmp_dir.path().join("sourcetransformer-server-info"); - - let server_info = server_info_file.clone(); - let server_socket = sock_file.clone(); - let handle = tokio::spawn(async move { - sourcetransform::Server::new(FilterCat) - .with_socket_file(server_socket) - .with_server_info_file(server_info) - .start_with_shutdown(shutdown_rx) - .await - .expect("server failed"); - }); - - // wait for the server to start - tokio::time::sleep(Duration::from_millis(100)).await; - - let client = SourceTransformHandle::new(SourceTransformClient::new( - create_rpc_channel(sock_file).await?, - )) - .await?; - - let message = crate::message::Message { - keys: vec!["second".into()], - value: "hello".into(), - offset: Some(crate::message::Offset::String(StringOffset::new( - "0".to_string(), - 0, - ))), - event_time: chrono::Utc::now(), - id: MessageID { - vertex_name: "vertex_name".to_string(), - offset: "0".to_string(), - index: 0, - }, - headers: Default::default(), - }; - - let resp = client.transform(vec![message]).await?; - assert!(resp.is_empty()); - - // we need to drop the client, because if there are any in-flight requests - // server fails to shut down. https://github.com/numaproj/numaflow-rs/issues/85 - drop(client); - - shutdown_tx - .send(()) - .expect("failed to send shutdown signal"); - handle.await.expect("failed to join server task"); - Ok(()) - } } diff --git a/rust/servesink/Cargo.toml b/rust/servesink/Cargo.toml index 3e5f8677f..72f2802c5 100644 --- a/rust/servesink/Cargo.toml +++ b/rust/servesink/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] tonic = "0.12.3" -tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] } +tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] } numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", rev = "ddd879588e11455921f1ca958ea2b3c076689293" } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } diff --git a/rust/serving/Cargo.toml b/rust/serving/Cargo.toml index 0af3c74e9..04fa96c28 100644 --- a/rust/serving/Cargo.toml +++ b/rust/serving/Cargo.toml @@ -16,7 +16,7 @@ axum-macros = "0.4.1" hyper-util = { version = "0.1.6", features = ["client-legacy"] } serde = { version = "1.0.204", features = ["derive"] } serde_json = "1.0.120" -tokio = { version = "1.39.3", features = ["full"] } +tokio = { version = "1.41.1", features = ["full"] } tower = "0.4.13" tower-http = { version = "0.5.2", features = ["trace", "timeout"] } tracing = "0.1.40" diff --git a/rust/src/bin/main.rs b/rust/src/bin/main.rs index 4ffd64ed7..d56cfff59 100644 --- a/rust/src/bin/main.rs +++ b/rust/src/bin/main.rs @@ -1,5 +1,7 @@ use std::env; +use std::time::Duration; +use tokio::time; use tracing::{error, info}; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt;