From 88d4540a14ae045ba6eab9db60eff6bfd79d88f0 Mon Sep 17 00:00:00 2001 From: Timofey Stepanov Date: Thu, 9 Jan 2025 06:47:43 -0800 Subject: [PATCH] `ExpectHaveCommonSchema` So far I used a one-line error message, aligned with other `Expect*` set of functions. If we will want to switch to multiline error messages, we should probably do it consistently for all the functions. PiperOrigin-RevId: 713654374 Change-Id: I5bd9e9b8703a4de3bf9e208bc742fcab3e4e2e2a --- koladata/operators/comparison.cc | 9 ++---- koladata/operators/masking.h | 6 ++-- koladata/schema_utils.cc | 17 +++++++++++ koladata/schema_utils.h | 5 ++++ koladata/schema_utils_test.cc | 19 ++++++++++++ .../operators/tests/comparison_equal_test.py | 29 ++++++++++++------- .../tests/comparison_full_equal_test.py | 29 ++++++++++++------- .../operators/tests/masking_coalesce_test.py | 6 ++-- .../operators/tests/masking_cond_test.py | 8 ++++- .../tests/masking_disjoint_coalesce_test.py | 8 ++--- 10 files changed, 96 insertions(+), 40 deletions(-) diff --git a/koladata/operators/comparison.cc b/koladata/operators/comparison.cc index 1fbde235..6181b5d8 100644 --- a/koladata/operators/comparison.cc +++ b/koladata/operators/comparison.cc @@ -103,13 +103,8 @@ absl::StatusOr GreaterEqual(const DataSlice& x, const DataSlice& y) { absl::StatusOr Equal(const DataSlice& x, const DataSlice& y) { // NOTE: Casting is handled internally by EqualOp. The schema compatibility is // still verified to ensure that e.g. ITEMID and OBJECT are not compared. - RETURN_IF_ERROR( - schema::CommonSchema(x.GetSchemaImpl(), y.GetSchemaImpl()).status()) - .With([&](const absl::Status& status) { - return AssembleErrorMessage(status, - {.db = DataBag::ImmutableEmptyWithFallbacks( - {x.GetBag(), y.GetBag()})}); - }); + RETURN_IF_ERROR(ExpectHaveCommonSchema({"x", "y"}, x, y)) + .With(OpError("kd.comparison.equal")); return DataSliceOp()( x, y, internal::DataItem(schema::kMask), nullptr); } diff --git a/koladata/operators/masking.h b/koladata/operators/masking.h index a2dfa3aa..98ebdb53 100644 --- a/koladata/operators/masking.h +++ b/koladata/operators/masking.h @@ -32,7 +32,6 @@ #include "koladata/internal/op_utils/presence_or.h" #include "koladata/internal/op_utils/utils.h" #include "koladata/operators/arolla_bridge.h" -#include "koladata/repr_utils.h" #include "koladata/schema_utils.h" #include "arolla/util/status_macros_backport.h" @@ -52,9 +51,10 @@ inline absl::StatusOr ApplyMask(const DataSlice& obj, // kde.masking.coalesce. inline absl::StatusOr Coalesce(const DataSlice& x, const DataSlice& y) { + RETURN_IF_ERROR(ExpectHaveCommonSchema({"x", "y"}, x, y)) + .With(OpError("kd.masking.coalesce")); auto res_db = DataBag::CommonDataBag({x.GetBag(), y.GetBag()}); - ASSIGN_OR_RETURN(auto aligned_slices, AlignSchemas({x, y}), - AssembleErrorMessage(_, {.db = res_db})); + ASSIGN_OR_RETURN(auto aligned_slices, AlignSchemas({x, y})); return DataSliceOp()( aligned_slices.slices[0], aligned_slices.slices[1], aligned_slices.common_schema, std::move(res_db)); diff --git a/koladata/schema_utils.cc b/koladata/schema_utils.cc index 4c64c8c2..10997240 100644 --- a/koladata/schema_utils.cc +++ b/koladata/schema_utils.cc @@ -220,6 +220,23 @@ absl::Status ExpectConsistentStringOrBytesImpl( } // namespace schema_utils_internal +absl::Status ExpectHaveCommonSchema( + absl::Span arg_names, const DataSlice& lhs, + const DataSlice& rhs) { + if (arg_names.size() != 2) { + return absl::InternalError("arg_names must have exactly 2 elements"); + } + if (schema::CommonSchema(lhs.GetSchemaImpl(), rhs.GetSchemaImpl()).ok()) { + return absl::OkStatus(); + } + return absl::InvalidArgumentError( + absl::StrFormat("arguments `%s` and `%s` must contain values castable to " + "a common type, got %s and %s", + arg_names[0], arg_names[1], + schema_utils_internal::DescribeSliceSchema(lhs), + schema_utils_internal::DescribeSliceSchema(rhs))); +} + absl::Status ExpectHaveCommonPrimitiveSchema( absl::Span arg_names, const DataSlice& lhs, const DataSlice& rhs) { diff --git a/koladata/schema_utils.h b/koladata/schema_utils.h index 08a4726e..4d207ee2 100644 --- a/koladata/schema_utils.h +++ b/koladata/schema_utils.h @@ -90,6 +90,11 @@ inline absl::Status ExpectConsistentStringOrBytes(absl::string_view arg_name, {&arg}); } +// Returns OK if the DataSlices contain values castable to a common type. +absl::Status ExpectHaveCommonSchema( + absl::Span arg_names, const DataSlice& lhs, + const DataSlice& rhs); + // Returns OK if the DataSlices contain values castable to a common primitive // type. // NOTE: arg_names must have exactly 2 elements. diff --git a/koladata/schema_utils_test.cc b/koladata/schema_utils_test.cc index 71daf59f..0d8256b3 100644 --- a/koladata/schema_utils_test.cc +++ b/koladata/schema_utils_test.cc @@ -481,6 +481,25 @@ TEST(SchemaUtilsTest, ExpectConsistentStringOrBytes) { "slice of OBJECT with items of types BYTES, STRING")); } +TEST(SchemaUtilsTest, ExpectHaveCommonSchema) { + auto empty_and_unknown = test::DataItem(std::nullopt, schema::kObject); + auto integer = test::DataSlice({1, 2, std::nullopt}); + auto floating = test::DataSlice({1, 2, std::nullopt}); + auto bytes = test::DataSlice({"a", "b", std::nullopt}); + auto bytes_any = + test::DataSlice({"a", "b", std::nullopt}, schema::kAny); + auto schema = test::DataItem(std::nullopt, schema::kSchema); + + EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, bytes, empty_and_unknown), + IsOk()); + EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, bytes, bytes_any), IsOk()); + EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, integer, bytes), IsOk()); + EXPECT_THAT(ExpectHaveCommonSchema({"foo", "bar"}, integer, schema), + StatusIs(absl::StatusCode::kInvalidArgument, + "arguments `foo` and `bar` must contain values castable " + "to a common type, got INT32 and SCHEMA")); +} + TEST(SchemaUtilsTest, ExpectHaveCommonPrimitiveSchema) { auto empty_and_unknown = test::DataItem(std::nullopt, schema::kObject); auto integer = test::DataSlice({1, 2, std::nullopt}); diff --git a/py/koladata/operators/tests/comparison_equal_test.py b/py/koladata/operators/tests/comparison_equal_test.py index 4f08bca7..9fc6f2f5 100644 --- a/py/koladata/operators/tests/comparison_equal_test.py +++ b/py/koladata/operators/tests/comparison_equal_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kde.comparison.equal.""" +import re from absl.testing import absltest from absl.testing import parameterized @@ -115,32 +115,39 @@ def test_qtype_signatures(self): def test_raises_on_incompatible_schemas(self): with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) INT32: INT32 - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA() and INT32' + ), ): expr_eval.eval(kde.comparison.equal(ENTITY_1, ds(1))) db = data_bag.DataBag.empty() with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) [0-9a-f]{32}:0: SCHEMA\(x=INT32\) - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and SCHEMA()' + ), ): expr_eval.eval(kde.comparison.equal(db.new(x=1), db.new())) with self.assertRaisesRegex( exceptions.KodaError, - 'cannot find a common schema for provided schemas', + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and OBJECT with an' + ' item of type ITEMID' + ), ): expr_eval.eval(kde.comparison.equal(db.new(x=1), db.obj())) with self.assertRaisesRegex( exceptions.KodaError, - 'cannot find a common schema for provided schemas', + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and ITEMID' + ), ): expr_eval.eval( kde.comparison.equal( diff --git a/py/koladata/operators/tests/comparison_full_equal_test.py b/py/koladata/operators/tests/comparison_full_equal_test.py index 52fcbe09..49526192 100644 --- a/py/koladata/operators/tests/comparison_full_equal_test.py +++ b/py/koladata/operators/tests/comparison_full_equal_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re + from absl.testing import absltest from absl.testing import parameterized from arolla import arolla @@ -109,32 +111,39 @@ def test_qtype_signatures(self): def test_raises_on_incompatible_schemas(self): with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) INT32: INT32 - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA() and INT32' + ), ): expr_eval.eval(kde.comparison.full_equal(ENTITY_1, ds(1))) db = data_bag.DataBag.empty() with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) [0-9a-f]{32}:0: SCHEMA\(x=INT32\) - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and SCHEMA()' + ), ): expr_eval.eval(kde.comparison.full_equal(db.new(x=1), db.new())) with self.assertRaisesRegex( exceptions.KodaError, - 'cannot find a common schema for provided schemas', + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and OBJECT with an' + ' item of type ITEMID' + ), ): expr_eval.eval(kde.comparison.full_equal(db.new(x=1), db.obj())) with self.assertRaisesRegex( exceptions.KodaError, - 'cannot find a common schema for provided schemas', + re.escape( + 'kd.comparison.equal: arguments `x` and `y` must contain values' + ' castable to a common type, got SCHEMA(x=INT32) and ITEMID' + ), ): expr_eval.eval( kde.comparison.full_equal( diff --git a/py/koladata/operators/tests/masking_coalesce_test.py b/py/koladata/operators/tests/masking_coalesce_test.py index fa016bdf..043014f1 100644 --- a/py/koladata/operators/tests/masking_coalesce_test.py +++ b/py/koladata/operators/tests/masking_coalesce_test.py @@ -143,10 +143,8 @@ def test_incompatible_schema_error(self): y = data_bag.DataBag.empty().new() with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) INT32: INT32 - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + 'kd.masking.coalesce: arguments `x` and `y` must contain values' + ' castable to a common type, got INT32 and SCHEMA()', ): expr_eval.eval(kde.masking.coalesce(x, y)) diff --git a/py/koladata/operators/tests/masking_cond_test.py b/py/koladata/operators/tests/masking_cond_test.py index 98602743..d27da1f8 100644 --- a/py/koladata/operators/tests/masking_cond_test.py +++ b/py/koladata/operators/tests/masking_cond_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re + from absl.testing import absltest from absl.testing import parameterized from arolla import arolla @@ -184,7 +186,11 @@ def test_incompatible_schema_error(self): x = ds([1, None]) y = data_bag.DataBag.empty().new() with self.assertRaisesRegex( - exceptions.KodaError, 'cannot find a common schema for provided schemas' + exceptions.KodaError, + re.escape( + 'kd.masking.coalesce: arguments `x` and `y` must contain values' + ' castable to a common type, got INT32 and SCHEMA()' + ), ): expr_eval.eval(kde.masking.cond(ds(arolla.present()), x, y)) diff --git a/py/koladata/operators/tests/masking_disjoint_coalesce_test.py b/py/koladata/operators/tests/masking_disjoint_coalesce_test.py index e882a536..e0999f6c 100644 --- a/py/koladata/operators/tests/masking_disjoint_coalesce_test.py +++ b/py/koladata/operators/tests/masking_disjoint_coalesce_test.py @@ -151,10 +151,10 @@ def test_incompatible_schema_error(self): y = data_bag.DataBag.empty().new() & ds(arolla.missing()) with self.assertRaisesRegex( exceptions.KodaError, - r"""cannot find a common schema for provided schemas - - the common schema\(s\) INT32: INT32 - the first conflicting schema [0-9a-f]{32}:0: SCHEMA\(\)""", + re.escape( + 'kd.masking.coalesce: arguments `x` and `y` must contain values' + ' castable to a common type, got INT32 and SCHEMA()' + ), ): expr_eval.eval(kde.masking.disjoint_coalesce(x, y))