From b478cb588c72edc99ac85516ab4a52073109d7c4 Mon Sep 17 00:00:00 2001 From: Yihai Lin Date: Thu, 16 Jan 2025 14:49:28 +0800 Subject: [PATCH] feat(json): Add `json_object_keys` function. --- src/common/function/src/scalars/json.rs | 2 + .../src/scalars/json/json_object_keys.rs | 173 ++++++++++++++++++ .../common/function/json/json.result | 25 +++ .../standalone/common/function/json/json.sql | 7 + 4 files changed, 207 insertions(+) create mode 100644 src/common/function/src/scalars/json/json_object_keys.rs diff --git a/src/common/function/src/scalars/json.rs b/src/common/function/src/scalars/json.rs index 2c420c1661e1..fab40fcb2e93 100644 --- a/src/common/function/src/scalars/json.rs +++ b/src/common/function/src/scalars/json.rs @@ -15,6 +15,7 @@ use std::sync::Arc; mod json_get; mod json_is; +mod json_object_keys; mod json_path_exists; mod json_path_match; mod json_to_string; @@ -49,6 +50,7 @@ impl JsonFunction { registry.register(Arc::new(JsonIsArray)); registry.register(Arc::new(JsonIsObject)); + registry.register(Arc::new(json_object_keys::JsonObjectKeysFunction)); registry.register(Arc::new(json_path_exists::JsonPathExistsFunction)); registry.register(Arc::new(json_path_match::JsonPathMatchFunction)); } diff --git a/src/common/function/src/scalars/json/json_object_keys.rs b/src/common/function/src/scalars/json/json_object_keys.rs new file mode 100644 index 000000000000..7e08e523bef3 --- /dev/null +++ b/src/common/function/src/scalars/json/json_object_keys.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self, Display}; + +use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; +use common_query::prelude::Signature; +use datafusion::logical_expr::Volatility; +use datatypes::data_type::ConcreteDataType; +use datatypes::prelude::VectorRef; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{MutableVector, StringVectorBuilder}; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; + +/// Get all the keys from the JSON object. +#[derive(Clone, Debug, Default)] +pub struct JsonObjectKeysFunction; + +const NAME: &str = "json_object_keys"; + +impl Function for JsonObjectKeysFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::string_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ConcreteDataType::json_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + let jsons = &columns[0]; + + let size = jsons.len(); + let mut results = StringVectorBuilder::with_capacity(size); + + for i in 0..size { + let json = jsons.get_ref(i); + match json.data_type() { + // JSON data type uses binary vector + ConcreteDataType::Binary(_) => { + if let Ok(Some(json)) = json.as_binary() + && let Ok(json) = jsonb::from_slice(json) + && let Some(keys) = json.object_keys() + { + results.push(Some(&keys.to_string())); + } else { + results.push(None) + } + } + + _ => { + return UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(); + } + } + } + + Ok(results.to_vector()) + } +} + +impl Display for JsonObjectKeysFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "JSON_OBJECT_KEYS") + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::vectors::BinaryVector; + + use super::*; + + #[test] + fn test_json_object_keys_function() { + let json_object_keys = JsonObjectKeysFunction; + + assert_eq!("json_object_keys", json_object_keys.name()); + assert_eq!( + ConcreteDataType::string_datatype(), + json_object_keys + .return_type(&[ConcreteDataType::string_datatype()]) + .unwrap() + ); + + assert!(matches!(json_object_keys.signature(), + Signature { + type_signature: TypeSignature::Exact(valid_types), + volatility: Volatility::Immutable + } if valid_types == vec![ConcreteDataType::json_datatype()], + )); + + let json_strings = [ + Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()), + Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()), + Some(r#"[1,2,3]"#.to_string()), + Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()), + Some(r#"null"#.to_string()), + Some(r#"null"#.to_string()), + ]; + + let results = [ + Some(r#"["a","b","c"]"#), + Some(r#"["a","b"]"#), + None, + Some(r#"["a","b"]"#), + None, + None, + ]; + + let jsonbs = json_strings + .into_iter() + .map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec())) + .collect::>(); + + let json_vector = BinaryVector::from(jsonbs); + let args: Vec = vec![Arc::new(json_vector)]; + let vector = json_object_keys + .eval(FunctionContext::default(), &args) + .unwrap(); + + assert_eq!(6, vector.len()); + + for (i, expected) in results.iter().enumerate() { + let result = vector.get_ref(i); + match expected { + Some(expected_value) => { + assert!(!result.is_null()); + let result_value = result.as_string().unwrap().unwrap(); + assert_eq!(*expected_value, result_value); + } + None => { + assert!(result.is_null()); + } + } + } + } +} diff --git a/tests/cases/standalone/common/function/json/json.result b/tests/cases/standalone/common/function/json/json.result index 62c562cb9fbe..7edd3568c2d5 100644 --- a/tests/cases/standalone/common/function/json/json.result +++ b/tests/cases/standalone/common/function/json/json.result @@ -104,3 +104,28 @@ SELECT json_path_match(parse_json('null'), '$.a == 1'); | | +------------------------------------------------------------+ +--- json_object_keys --- +SELECT json_object_keys(parse_json('{"a": 1, "b": [1, 2, 3]}')) AS keys; + ++-----------+ +| keys | ++-----------+ +| ["a","b"] | ++-----------+ + +SELECT json_object_keys(parse_json('{"a": 1, "b": 2, "c": 3}')) AS keys; + ++---------------+ +| keys | ++---------------+ +| ["a","b","c"] | ++---------------+ + +SELECT json_object_keys(parse_json('[1, 2, 3]')) AS keys; + ++------+ +| keys | ++------+ +| | ++------+ + diff --git a/tests/cases/standalone/common/function/json/json.sql b/tests/cases/standalone/common/function/json/json.sql index f8d6527ecc96..c6a3734056a5 100644 --- a/tests/cases/standalone/common/function/json/json.sql +++ b/tests/cases/standalone/common/function/json/json.sql @@ -26,3 +26,10 @@ SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), '$.b[1 to last] >= 2') SELECT json_path_match(parse_json('{"a":1,"b":[1,2,3]}'), 'null'); SELECT json_path_match(parse_json('null'), '$.a == 1'); + +--- json_object_keys --- +SELECT json_object_keys(parse_json('{"a": 1, "b": [1, 2, 3]}')) AS keys; + +SELECT json_object_keys(parse_json('{"a": 1, "b": 2, "c": 3}')) AS keys; + +SELECT json_object_keys(parse_json('[1, 2, 3]')) AS keys; \ No newline at end of file