From a2d00f147245e90c8e4e14fb84bb5a475a5166b7 Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Tue, 1 Oct 2024 17:59:42 +0200 Subject: [PATCH] feat: Bitwise operations / aggregations (#18994) Co-authored-by: ritchie --- crates/polars-compute/src/bitwise/mod.rs | 285 ++++++++++++++++++ crates/polars-compute/src/lib.rs | 1 + crates/polars-core/Cargo.toml | 1 + .../src/chunked_array/ops/bitwise_reduce.rs | 80 +++++ .../polars-core/src/chunked_array/ops/mod.rs | 12 + crates/polars-core/src/frame/column/mod.rs | 28 ++ .../frame/group_by/aggregations/boolean.rs | 50 +++ .../src/frame/group_by/aggregations/mod.rs | 71 +++++ crates/polars-core/src/frame/group_by/mod.rs | 32 +- .../src/series/implementations/boolean.rs | 62 ++++ .../src/series/implementations/floats.rs | 40 ++- .../src/series/implementations/mod.rs | 37 +++ crates/polars-core/src/series/series_trait.rs | 33 ++ crates/polars-core/src/utils/mod.rs | 8 +- crates/polars-expr/Cargo.toml | 1 + .../src/expressions/aggregation.rs | 28 ++ crates/polars-expr/src/planner.rs | 2 + crates/polars-lazy/Cargo.toml | 7 + crates/polars-ops/Cargo.toml | 1 + crates/polars-ops/src/series/ops/bitwise.rs | 57 ++++ crates/polars-ops/src/series/ops/mod.rs | 2 + crates/polars-plan/Cargo.toml | 2 + crates/polars-plan/src/dsl/bitwise.rs | 50 +++ crates/polars-plan/src/dsl/expr.rs | 4 + .../src/dsl/function_expr/bitwise.rs | 114 +++++++ .../polars-plan/src/dsl/function_expr/mod.rs | 12 + .../src/dsl/function_expr/schema.rs | 2 + crates/polars-plan/src/dsl/mod.rs | 2 + crates/polars-plan/src/plans/aexpr/mod.rs | 8 + crates/polars-plan/src/plans/aexpr/schema.rs | 7 + .../polars-plan/src/plans/aexpr/traverse.rs | 4 + .../src/plans/conversion/expr_to_ir.rs | 5 + .../src/plans/conversion/ir_to_dsl.rs | 5 + crates/polars-plan/src/plans/format.rs | 10 + crates/polars-plan/src/plans/ir/format.rs | 10 + crates/polars-plan/src/plans/iterator.rs | 2 + crates/polars-plan/src/plans/visitor/expr.rs | 2 + crates/polars-python/Cargo.toml | 3 +- crates/polars-python/src/expr/bitwise.rs | 42 +++ crates/polars-python/src/expr/mod.rs | 2 + .../src/lazyframe/visitor/expr_nodes.rs | 13 + crates/polars-stream/Cargo.toml | 1 + .../src/physical_plan/lower_expr.rs | 6 + crates/polars/Cargo.toml | 1 + .../reference/expressions/computation.rst | 8 + .../source/reference/series/computation.rst | 8 + py-polars/polars/expr/expr.py | 36 +++ py-polars/polars/series/series.py | 27 ++ .../tests/unit/operations/test_bitwise.py | 188 ++++++++++++ 49 files changed, 1403 insertions(+), 9 deletions(-) create mode 100644 crates/polars-compute/src/bitwise/mod.rs create mode 100644 crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs create mode 100644 crates/polars-ops/src/series/ops/bitwise.rs create mode 100644 crates/polars-plan/src/dsl/bitwise.rs create mode 100644 crates/polars-plan/src/dsl/function_expr/bitwise.rs create mode 100644 crates/polars-python/src/expr/bitwise.rs diff --git a/crates/polars-compute/src/bitwise/mod.rs b/crates/polars-compute/src/bitwise/mod.rs new file mode 100644 index 000000000000..578acb91b1ac --- /dev/null +++ b/crates/polars-compute/src/bitwise/mod.rs @@ -0,0 +1,285 @@ +use std::convert::identity; + +use arrow::array::{BooleanArray, PrimitiveArray}; +use arrow::datatypes::ArrowDataType; +use arrow::legacy::utils::CustomIterTools; +use bytemuck::Zeroable; + +pub trait BitwiseKernel { + type Scalar; + + fn count_ones(&self) -> PrimitiveArray; + fn count_zeros(&self) -> PrimitiveArray; + + fn leading_ones(&self) -> PrimitiveArray; + fn leading_zeros(&self) -> PrimitiveArray; + + fn trailing_ones(&self) -> PrimitiveArray; + fn trailing_zeros(&self) -> PrimitiveArray; + + fn reduce_and(&self) -> Option; + fn reduce_or(&self) -> Option; + fn reduce_xor(&self) -> Option; + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar; +} + +macro_rules! impl_bitwise_kernel { + ($(($T:ty, $to_bits:expr, $from_bits:expr)),+ $(,)?) => { + $( + impl BitwiseKernel for PrimitiveArray<$T> { + type Scalar = $T; + + #[inline(never)] + fn count_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(|&v| $to_bits(v).count_ones()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn count_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self + .values() + .iter() + .map(|&v| $to_bits(v).count_zeros()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn leading_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(|&v| $to_bits(v).leading_ones()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn leading_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(|&v| $to_bits(v).leading_zeros()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn trailing_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(|&v| $to_bits(v).trailing_ones()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn trailing_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values().iter() + .map(|&v| $to_bits(v).trailing_zeros()) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn reduce_and(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold(!$to_bits(<$T>::zeroed()), |a, &b| a & $to_bits(b)))) + } + + #[inline(never)] + fn reduce_or(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a | $to_bits(b)))) + } + + #[inline(never)] + fn reduce_xor(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a ^ $to_bits(b)))) + } + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) & $to_bits(rhs)) + } + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) | $to_bits(rhs)) + } + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + $from_bits($to_bits(lhs) ^ $to_bits(rhs)) + } + } + )+ + }; +} + +impl_bitwise_kernel! { + (i8, identity, identity), + (i16, identity, identity), + (i32, identity, identity), + (i64, identity, identity), + (u8, identity, identity), + (u16, identity, identity), + (u32, identity, identity), + (u64, identity, identity), + (f32, f32::to_bits, f32::from_bits), + (f64, f64::to_bits, f64::from_bits), +} + +impl BitwiseKernel for BooleanArray { + type Scalar = bool; + + #[inline(never)] + fn count_ones(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(u32::from) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(never)] + fn count_zeros(&self) -> PrimitiveArray { + PrimitiveArray::new( + ArrowDataType::UInt32, + self.values() + .iter() + .map(|v| u32::from(!v)) + .collect_trusted::>() + .into(), + self.validity().cloned(), + ) + } + + #[inline(always)] + fn leading_ones(&self) -> PrimitiveArray { + self.count_ones() + } + + #[inline(always)] + fn leading_zeros(&self) -> PrimitiveArray { + self.count_zeros() + } + + #[inline(always)] + fn trailing_ones(&self) -> PrimitiveArray { + self.count_ones() + } + + #[inline(always)] + fn trailing_zeros(&self) -> PrimitiveArray { + self.count_zeros() + } + + fn reduce_and(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.unset_bits() == 0) + } + + fn reduce_or(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.set_bits() > 0) + } + + fn reduce_xor(&self) -> Option { + if self.validity().map_or(false, |v| v.unset_bits() > 0) { + return None; + } + + let values = self.values(); + + if values.is_empty() { + return None; + } + + Some(values.set_bits() % 2 == 1) + } + + fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs & rhs + } + fn bit_or(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs | rhs + } + fn bit_xor(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar { + lhs ^ rhs + } +} diff --git a/crates/polars-compute/src/lib.rs b/crates/polars-compute/src/lib.rs index df8e1df6585a..a89303ff8f7f 100644 --- a/crates/polars-compute/src/lib.rs +++ b/crates/polars-compute/src/lib.rs @@ -9,6 +9,7 @@ use arrow::types::NativeType; pub mod arithmetic; pub mod arity; +pub mod bitwise; pub mod comparisons; pub mod filter; pub mod float_sum; diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml index a3f477e84dd6..a3c5b26e8386 100644 --- a/crates/polars-core/Cargo.toml +++ b/crates/polars-core/Cargo.toml @@ -75,6 +75,7 @@ fmt_no_tty = ["comfy-table"] rows = [] # operations +bitwise = ["algorithm_group_by"] zip_with = [] round_series = [] checked_arithmetic = [] diff --git a/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs b/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs new file mode 100644 index 000000000000..5e033b53ab5d --- /dev/null +++ b/crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs @@ -0,0 +1,80 @@ +use arrow::array::{Array, PrimitiveArray}; +use arrow::types::NativeType; +use polars_compute::bitwise::BitwiseKernel; + +use super::{BooleanType, ChunkBitwiseReduce, ChunkedArray, PolarsNumericType}; + +impl ChunkBitwiseReduce for ChunkedArray +where + T: PolarsNumericType, + T::Native: NativeType, + PrimitiveArray: BitwiseKernel, +{ + type Physical = T::Native; + + fn and_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_and(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_and) + } + fn or_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_or(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_or) + } + fn xor_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce( as BitwiseKernel>::bit_xor) + } +} + +impl ChunkBitwiseReduce for ChunkedArray { + type Physical = bool; + + fn and_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_and(arr).unwrap()) + .reduce(|a, b| a & b) + } + fn or_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_or(arr).unwrap()) + .reduce(|a, b| a | b) + } + fn xor_reduce(&self) -> Option { + if self.null_count() > 0 { + return None; + } + + self.downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce(|a, b| a ^ b) + } +} diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index 4d2db88ea38c..3de8df28d746 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -9,6 +9,8 @@ pub(crate) mod append; mod apply; pub mod arity; mod bit_repr; +#[cfg(feature = "bitwise")] +mod bitwise_reduce; pub(crate) mod chunkops; pub(crate) mod compare_inner; #[cfg(feature = "dtype-decimal")] @@ -295,6 +297,16 @@ pub trait ChunkVar { } } +/// Bitwise Reduction Operations. +#[cfg(feature = "bitwise")] +pub trait ChunkBitwiseReduce { + type Physical; + + fn and_reduce(&self) -> Option; + fn or_reduce(&self) -> Option; + fn xor_reduce(&self) -> Option; +} + /// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that /// can be used to filter rows. /// diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index 3a6343415a6a..e1a447d437bb 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -525,6 +525,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_first(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_first(groups) }.into() @@ -533,6 +534,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_last(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_last(groups) }.into() @@ -541,6 +543,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_n_unique(&self, groups: &GroupsProxy) -> Self { // @scalar-opt unsafe { self.as_materialized_series().agg_n_unique(groups) }.into() @@ -549,6 +552,7 @@ impl Column { /// # Safety /// /// Does no bounds checks, groups must be correct. + #[cfg(feature = "algorithm_group_by")] pub unsafe fn agg_quantile( &self, groups: &GroupsProxy, @@ -1003,6 +1007,30 @@ impl Column { }, } } + + pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column { + match self { + Column::Series(s) => f(s).into(), + Column::Scalar(s) => { + ScalarColumn::from_single_value_series(f(&s.as_single_value_series()), s.len()) + .into() + }, + } + } + + pub fn try_apply_unary_elementwise( + &self, + f: impl Fn(&Series) -> PolarsResult, + ) -> PolarsResult { + match self { + Column::Series(s) => f(s).map(Column::from), + Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series( + f(&s.as_single_value_series())?, + s.len(), + ) + .into()), + } + } } impl Default for Column { diff --git a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs index fd7e537dc0ab..36cd8e9a8d41 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/boolean.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/boolean.rs @@ -17,6 +17,56 @@ where ca.into_series() } +#[cfg(feature = "bitwise")] +unsafe fn bitwise_agg( + ca: &BooleanChunked, + groups: &GroupsProxy, + f: fn(&BooleanChunked) -> Option, +) -> Series { + // Prevent a rechunk for every individual group. + let s = if groups.len() > 1 { + ca.rechunk() + } else { + ca.clone() + }; + + match groups { + GroupsProxy::Idx(groups) => _agg_helper_idx_bool::<_>(groups, |(_, idx)| { + debug_assert!(idx.len() <= s.len()); + if idx.is_empty() { + None + } else { + let take = s.take_unchecked(idx); + f(&take) + } + }), + GroupsProxy::Slice { groups, .. } => _agg_helper_slice_bool::<_>(groups, |[first, len]| { + debug_assert!(len <= s.len() as IdxSize); + if len == 0 { + None + } else { + let take = _slice_from_offsets(&s, first, len); + f(&take) + } + }), + } +} + +#[cfg(feature = "bitwise")] +impl BooleanChunked { + pub(crate) unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::and_reduce) + } + + pub(crate) unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::or_reduce) + } + + pub(crate) unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + bitwise_agg(self, groups, ChunkBitwiseReduce::xor_reduce) + } +} + impl BooleanChunked { pub(crate) unsafe fn agg_min(&self, groups: &GroupsProxy) -> Series { // faster paths diff --git a/crates/polars-core/src/frame/group_by/aggregations/mod.rs b/crates/polars-core/src/frame/group_by/aggregations/mod.rs index fa7cd62df9a5..092d660fb4d2 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/mod.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/mod.rs @@ -455,6 +455,77 @@ where } } +/// # Safety +/// +/// No bounds checks on `groups`. +#[cfg(feature = "bitwise")] +unsafe fn bitwise_agg( + ca: &ChunkedArray, + groups: &GroupsProxy, + f: fn(&ChunkedArray) -> Option, +) -> Series +where + ChunkedArray: + ChunkTakeUnchecked<[IdxSize]> + ChunkBitwiseReduce + IntoSeries, +{ + // Prevent a rechunk for every individual group. + let s = if groups.len() > 1 { + ca.rechunk() + } else { + ca.clone() + }; + + match groups { + GroupsProxy::Idx(groups) => agg_helper_idx_on_all::(groups, |idx| { + debug_assert!(idx.len() <= s.len()); + if idx.is_empty() { + None + } else { + let take = unsafe { s.take_unchecked(idx) }; + f(&take) + } + }), + GroupsProxy::Slice { groups, .. } => _agg_helper_slice::(groups, |[first, len]| { + debug_assert!(len <= s.len() as IdxSize); + if len == 0 { + None + } else { + let take = _slice_from_offsets(&s, first, len); + f(&take) + } + }), + } +} + +#[cfg(feature = "bitwise")] +impl ChunkedArray +where + T: PolarsNumericType, + ChunkedArray: + ChunkTakeUnchecked<[IdxSize]> + ChunkBitwiseReduce + IntoSeries, +{ + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::and_reduce) } + } + + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::or_reduce) } + } + + /// # Safety + /// + /// No bounds checks on `groups`. + pub(crate) unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + unsafe { bitwise_agg(self, groups, ChunkBitwiseReduce::xor_reduce) } + } +} + impl ChunkedArray where T: PolarsNumericType + Sync, diff --git a/crates/polars-core/src/frame/group_by/mod.rs b/crates/polars-core/src/frame/group_by/mod.rs index e2fbb90d6e74..89c72f5a0eac 100644 --- a/crates/polars-core/src/frame/group_by/mod.rs +++ b/crates/polars-core/src/frame/group_by/mod.rs @@ -869,10 +869,22 @@ pub enum GroupByMethod { Groups, NUnique, Quantile(f64, QuantileInterpolOptions), - Count { include_nulls: bool }, + Count { + include_nulls: bool, + }, Implode, Std(u8), Var(u8), + #[cfg(feature = "bitwise")] + Bitwise(GroupByBitwiseMethod), +} + +#[cfg(feature = "bitwise")] +#[derive(Copy, Clone, Debug)] +pub enum GroupByBitwiseMethod { + And, + Or, + Xor, } impl Display for GroupByMethod { @@ -895,11 +907,27 @@ impl Display for GroupByMethod { Implode => "list", Std(_) => "std", Var(_) => "var", + #[cfg(feature = "bitwise")] + Bitwise(t) => { + f.write_str("bitwise_")?; + return Display::fmt(t, f); + }, }; write!(f, "{s}") } } +#[cfg(feature = "bitwise")] +impl Display for GroupByBitwiseMethod { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::And => f.write_str("and"), + Self::Or => f.write_str("or"), + Self::Xor => f.write_str("xor"), + } + } +} + // Formatting functions used in eager and lazy code for renaming grouped columns pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { use GroupByMethod::*; @@ -920,6 +948,8 @@ pub fn fmt_group_by_column(name: &str, method: GroupByMethod) -> PlSmallStr { Quantile(quantile, _interpol) => format_pl_smallstr!("{name}_quantile_{quantile:.2}"), Std(_) => format_pl_smallstr!("{name}_agg_std"), Var(_) => format_pl_smallstr!("{name}_agg_var"), + #[cfg(feature = "bitwise")] + Bitwise(f) => format_pl_smallstr!("{name}_agg_bitwise_{f}"), } } diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index a4e9d662226d..14409fbdb91c 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -84,6 +84,19 @@ impl private::PrivateSeries for SeriesWrap { .agg_var(groups, _ddof) } + #[cfg(feature = "bitwise")] + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + #[cfg(feature = "algorithm_group_by")] fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult { IntoGroupsProxy::group_tuples(&self.0, multithreaded, sorted) @@ -308,6 +321,55 @@ impl SeriesTrait for SeriesWrap { let v = sc.value().cast(&DataType::Float64); Ok(Scalar::new(DataType::Float64, v)) } + fn and_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_and(arr).unwrap()) + .reduce(|a, b| a & b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn or_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_or(arr).unwrap()) + .reduce(|a, b| a | b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn xor_reduce(&self) -> PolarsResult { + let dt = DataType::Boolean; + if self.0.null_count() > 0 { + return Ok(Scalar::new(dt, AnyValue::Null)); + } + + Ok(Scalar::new( + dt, + self.0 + .downcast_iter() + .filter(|arr| !arr.is_empty()) + .map(|arr| polars_compute::bitwise::BitwiseKernel::reduce_xor(arr).unwrap()) + .reduce(|a, b| a ^ b) + .map_or(AnyValue::Null, Into::into), + )) + } + fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) } diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index 6f83811561e3..eeba7c9b0a6b 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -5,7 +5,7 @@ use crate::frame::group_by::*; use crate::prelude::*; macro_rules! impl_dyn_series { - ($ca: ident) => { + ($ca: ident, $pdt:ident) => { impl private::PrivateSeries for SeriesWrap<$ca> { fn compute_len(&mut self) { self.0.compute_len() @@ -96,6 +96,19 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + #[cfg(feature = "bitwise")] + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + fn subtract(&self, rhs: &Series) -> PolarsResult { polars_ensure!( self.dtype() == rhs.dtype(), @@ -356,6 +369,27 @@ macro_rules! impl_dyn_series { ) -> PolarsResult { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + #[cfg(feature = "bitwise")] + fn and_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.and_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } + #[cfg(feature = "bitwise")] + fn or_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.or_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } + #[cfg(feature = "bitwise")] + fn xor_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.xor_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) @@ -372,5 +406,5 @@ macro_rules! impl_dyn_series { }; } -impl_dyn_series!(Float32Chunked); -impl_dyn_series!(Float64Chunked); +impl_dyn_series!(Float32Chunked, Float32Type); +impl_dyn_series!(Float64Chunked, Float64Type); diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 6094dff6a838..8d4d2caa3ddf 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -169,6 +169,19 @@ macro_rules! impl_dyn_series { self.0.agg_list(groups) } + #[cfg(feature = "bitwise")] + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + self.0.agg_and(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + self.0.agg_or(groups) + } + #[cfg(feature = "bitwise")] + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + self.0.agg_xor(groups) + } + fn subtract(&self, rhs: &Series) -> PolarsResult { polars_ensure!( self.dtype() == rhs.dtype(), @@ -460,6 +473,30 @@ macro_rules! impl_dyn_series { QuantileAggSeries::quantile_reduce(&self.0, quantile, interpol) } + #[cfg(feature = "bitwise")] + fn and_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.and_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } + + #[cfg(feature = "bitwise")] + fn or_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.or_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } + + #[cfg(feature = "bitwise")] + fn xor_reduce(&self) -> PolarsResult { + let dt = <$pdt as PolarsDataType>::get_dtype(); + let av = self.0.xor_reduce().map_or(AnyValue::Null, Into::into); + + Ok(Scalar::new(dt, av)) + } + fn clone_inner(&self) -> Arc { Arc::new(SeriesWrap(Clone::clone(&self.0))) } diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index d9e11e5c5e8c..46b45633b74e 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -147,6 +147,27 @@ pub(crate) mod private { unsafe fn agg_list(&self, groups: &GroupsProxy) -> Series { Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] + unsafe fn agg_and(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] + unsafe fn agg_or(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } + /// # Safety + /// + /// Does no bounds checks, groups must be correct. + #[cfg(feature = "bitwise")] + unsafe fn agg_xor(&self, groups: &GroupsProxy) -> Series { + Series::full_null(self._field().name().clone(), groups.len(), self._dtype()) + } fn subtract(&self, _rhs: &Series) -> PolarsResult { polars_bail!(opq = subtract, self._dtype()); @@ -481,6 +502,18 @@ pub trait SeriesTrait: ) -> PolarsResult { polars_bail!(opq = quantile, self._dtype()); } + /// Get the bitwise AND of the Series as a new Series of length 1, + fn and_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } + /// Get the bitwise OR of the Series as a new Series of length 1, + fn or_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } + /// Get the bitwise XOR of the Series as a new Series of length 1, + fn xor_reduce(&self) -> PolarsResult { + polars_bail!(opq = sum, self._dtype()); + } /// Clone inner ChunkedArray and wrap in a new Arc fn clone_inner(&self) -> Arc; diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs index c123072d8c36..08b33b06c3b1 100644 --- a/crates/polars-core/src/utils/mod.rs +++ b/crates/polars-core/src/utils/mod.rs @@ -521,15 +521,15 @@ macro_rules! with_match_physical_integer_polars_type {( use $crate::datatypes::DataType::*; use $crate::datatypes::*; match $key_type { - #[cfg(feature = "dtype-i8")] + #[cfg(feature = "dtype-i8")] Int8 => __with_ty__! { Int8Type }, - #[cfg(feature = "dtype-i16")] + #[cfg(feature = "dtype-i16")] Int16 => __with_ty__! { Int16Type }, Int32 => __with_ty__! { Int32Type }, Int64 => __with_ty__! { Int64Type }, - #[cfg(feature = "dtype-u8")] + #[cfg(feature = "dtype-u8")] UInt8 => __with_ty__! { UInt8Type }, - #[cfg(feature = "dtype-u16")] + #[cfg(feature = "dtype-u16")] UInt16 => __with_ty__! { UInt16Type }, UInt32 => __with_ty__! { UInt32Type }, UInt64 => __with_ty__! { UInt64Type }, diff --git a/crates/polars-expr/Cargo.toml b/crates/polars-expr/Cargo.toml index 7a1f974b41ff..1b2b6063de9b 100644 --- a/crates/polars-expr/Cargo.toml +++ b/crates/polars-expr/Cargo.toml @@ -68,6 +68,7 @@ dtype-u8 = ["polars-plan/dtype-u8"] approx_unique = ["polars-plan/approx_unique"] is_in = ["polars-plan/is_in", "polars-ops/is_in"] +bitwise = ["polars-core/bitwise", "polars-plan/bitwise"] round_series = ["polars-plan/round_series", "polars-ops/round_series"] is_between = ["polars-plan/is_between"] dynamic_group_by = ["polars-plan/dynamic_group_by", "polars-time", "temporal"] diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index e41886a29590..e1d2a1e716ab 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -175,6 +175,24 @@ impl PhysicalExpr for AggregationExpr { .var_reduce(ddof) .map(|sc| sc.into_series(s.name().clone())), GroupByMethod::Quantile(_, _) => unimplemented!(), + #[cfg(feature = "bitwise")] + GroupByMethod::Bitwise(f) => match f { + GroupByBitwiseMethod::And => parallel_op_series( + |s| s.and_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + GroupByBitwiseMethod::Or => parallel_op_series( + |s| s.or_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + GroupByBitwiseMethod::Xor => parallel_op_series( + |s| s.xor_reduce().map(|sc| sc.into_series(s.name().clone())), + s, + allow_threading, + ), + }, } } #[allow(clippy::ptr_arg)] @@ -407,6 +425,16 @@ impl PhysicalExpr for AggregationExpr { // implemented explicitly in AggQuantile struct unimplemented!() }, + #[cfg(feature = "bitwise")] + GroupByMethod::Bitwise(f) => { + let (s, groups) = ac.get_final_aggregation(); + let agg_s = match f { + GroupByBitwiseMethod::And => s.agg_and(&groups), + GroupByBitwiseMethod::Or => s.agg_or(&groups), + GroupByBitwiseMethod::Xor => s.agg_xor(&groups), + }; + AggregatedScalar(rename_series(agg_s, keep_name)) + }, GroupByMethod::NanMin => { #[cfg(feature = "propagate_nans")] { diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index c7208b10d63f..b771a717050d 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -386,6 +386,8 @@ fn create_physical_expr_inner( }, I::Std(_, ddof) => GBM::Std(*ddof), I::Var(_, ddof) => GBM::Var(*ddof), + #[cfg(feature = "bitwise")] + I::Bitwise(_, f) => GBM::Bitwise((*f).into()), I::AggGroups(_) => { polars_bail!(InvalidOperation: "agg groups expression only supported in aggregation context") }, diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 333fdc1211d2..2dfd642cde1f 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -159,6 +159,13 @@ true_div = ["polars-plan/true_div"] extract_jsonpath = ["polars-plan/extract_jsonpath", "polars-ops/extract_jsonpath"] # operations +bitwise = [ + "polars-plan/bitwise", + "polars-expr/bitwise", + "polars-core/bitwise", + "polars-stream/bitwise", + "polars-ops/bitwise", +] approx_unique = ["polars-plan/approx_unique"] is_in = ["polars-plan/is_in", "polars-ops/is_in", "polars-expr/is_in"] repeat_by = ["polars-plan/repeat_by"] diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index 2f37857c9cd2..027d846b485e 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -90,6 +90,7 @@ binary_encoding = ["base64", "hex"] string_encoding = ["base64", "hex"] # ops +bitwise = ["polars-core/bitwise"] to_dummies = [] interpolate = [] interpolate_by = [] diff --git a/crates/polars-ops/src/series/ops/bitwise.rs b/crates/polars-ops/src/series/ops/bitwise.rs new file mode 100644 index 000000000000..1471e35feb63 --- /dev/null +++ b/crates/polars-ops/src/series/ops/bitwise.rs @@ -0,0 +1,57 @@ +use polars_core::chunked_array::ops::arity::unary_mut_values; +use polars_core::chunked_array::ChunkedArray; +use polars_core::prelude::DataType; +use polars_core::series::Series; +use polars_core::{with_match_physical_float_polars_type, with_match_physical_integer_polars_type}; +use polars_error::{polars_bail, PolarsResult}; + +use super::*; + +macro_rules! apply_bitwise_op { + ($($op:ident),+ $(,)?) => { + $( + pub fn $op(s: &Series) -> PolarsResult { + match s.dtype() { + DataType::Boolean => { + let ca: &ChunkedArray = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }, + dt if dt.is_integer() => { + with_match_physical_integer_polars_type!(dt, |$T| { + let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::<$T, UInt32Type, _, _>( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }) + }, + dt if dt.is_float() => { + with_match_physical_float_polars_type!(dt, |$T| { + let ca: &ChunkedArray<$T> = s.as_any().downcast_ref().unwrap(); + Ok(unary_mut_values::<$T, UInt32Type, _, _>( + ca, + |a| polars_compute::bitwise::BitwiseKernel::$op(a), + ).into_series()) + }) + }, + dt => { + polars_bail!(InvalidOperation: "dtype {:?} not supported in '{}' operation", dt, stringify!($op)) + }, + } + } + )+ + + }; +} + +apply_bitwise_op! { + count_ones, + count_zeros, + leading_ones, + leading_zeros, + trailing_ones, + trailing_zeros, +} diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index ed4a446f3cca..88b509ff6450 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -5,6 +5,7 @@ mod approx_algo; #[cfg(feature = "approx_unique")] mod approx_unique; mod arg_min_max; +mod bitwise; #[cfg(feature = "business")] mod business; mod clip; @@ -71,6 +72,7 @@ pub use approx_algo::*; #[cfg(feature = "approx_unique")] pub use approx_unique::*; pub use arg_min_max::ArgAgg; +pub use bitwise::*; #[cfg(feature = "business")] pub use business::*; pub use clip::*; diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index 7edc15ea8616..d41be032bcf0 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -106,6 +106,7 @@ nightly = ["polars-utils/nightly", "polars-ops/nightly"] extract_jsonpath = ["polars-ops/extract_jsonpath"] # operations +bitwise = ["polars-core/bitwise", "polars-ops/bitwise"] approx_unique = ["polars-ops/approx_unique"] is_in = ["polars-ops/is_in"] repeat_by = ["polars-ops/repeat_by"] @@ -192,6 +193,7 @@ panic_on_schema = [] [package.metadata.docs.rs] features = [ + "bitwise", "temporal", "serde", "rolling_window", diff --git a/crates/polars-plan/src/dsl/bitwise.rs b/crates/polars-plan/src/dsl/bitwise.rs new file mode 100644 index 000000000000..b9e66c2c632e --- /dev/null +++ b/crates/polars-plan/src/dsl/bitwise.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use super::{AggExpr, BitwiseAggFunction, BitwiseFunction, Expr, FunctionExpr}; + +impl Expr { + /// Evaluate the number of set bits. + pub fn bitwise_count_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::CountOnes)) + } + + /// Evaluate the number of unset bits. + pub fn bitwise_count_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::CountZeros)) + } + + /// Evaluate the number most-significant set bits before seeing an unset bit. + pub fn bitwise_leading_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::LeadingOnes)) + } + + /// Evaluate the number most-significant unset bits before seeing an set bit. + pub fn bitwise_leading_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::LeadingZeros)) + } + + /// Evaluate the number least-significant set bits before seeing an unset bit. + pub fn bitwise_trailing_ones(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::TrailingOnes)) + } + + /// Evaluate the number least-significant unset bits before seeing an set bit. + pub fn bitwise_trailing_zeros(self) -> Self { + self.apply_private(FunctionExpr::Bitwise(BitwiseFunction::TrailingZeros)) + } + + /// Perform an aggregation of bitwise ANDs + pub fn bitwise_and(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::And)) + } + + /// Perform an aggregation of bitwise ORs + pub fn bitwise_or(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::Or)) + } + + /// Perform an aggregation of bitwise XORs + pub fn bitwise_xor(self) -> Self { + Expr::Agg(AggExpr::Bitwise(Arc::new(self), BitwiseAggFunction::Xor)) + } +} diff --git a/crates/polars-plan/src/dsl/expr.rs b/crates/polars-plan/src/dsl/expr.rs index 0bbecd7e1d77..2b9dff7cb5f2 100644 --- a/crates/polars-plan/src/dsl/expr.rs +++ b/crates/polars-plan/src/dsl/expr.rs @@ -37,6 +37,8 @@ pub enum AggExpr { AggGroups(Arc), Std(Arc, u8), Var(Arc, u8), + #[cfg(feature = "bitwise")] + Bitwise(Arc, super::function_expr::BitwiseAggFunction), } impl AsRef for AggExpr { @@ -57,6 +59,8 @@ impl AsRef for AggExpr { AggGroups(e) => e, Std(e, _) => e, Var(e, _) => e, + #[cfg(feature = "bitwise")] + Bitwise(e, _) => e, } } } diff --git a/crates/polars-plan/src/dsl/function_expr/bitwise.rs b/crates/polars-plan/src/dsl/function_expr/bitwise.rs new file mode 100644 index 000000000000..2d4dd779cff0 --- /dev/null +++ b/crates/polars-plan/src/dsl/function_expr/bitwise.rs @@ -0,0 +1,114 @@ +use std::fmt; +use std::sync::Arc; + +use polars_core::prelude::*; + +use super::{ColumnsUdf, SpecialEq}; +use crate::dsl::FieldsMapper; +use crate::map; + +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash)] +pub enum BitwiseFunction { + CountOnes, + CountZeros, + + LeadingOnes, + LeadingZeros, + + TrailingOnes, + TrailingZeros, +} + +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash)] +pub enum BitwiseAggFunction { + And, + Or, + Xor, +} + +impl fmt::Display for BitwiseFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + use BitwiseFunction as B; + + let s = match self { + B::CountOnes => "count_ones", + B::CountZeros => "count_zeros", + B::LeadingOnes => "leading_ones", + B::LeadingZeros => "leading_zeros", + B::TrailingOnes => "trailing_ones", + B::TrailingZeros => "trailing_zeros", + }; + + f.write_str(s) + } +} + +impl From for SpecialEq> { + fn from(func: BitwiseFunction) -> Self { + use BitwiseFunction as B; + + match func { + B::CountOnes => map!(count_ones), + B::CountZeros => map!(count_zeros), + B::LeadingOnes => map!(leading_ones), + B::LeadingZeros => map!(leading_zeros), + B::TrailingOnes => map!(trailing_ones), + B::TrailingZeros => map!(trailing_zeros), + } + } +} + +impl From for GroupByBitwiseMethod { + fn from(value: BitwiseAggFunction) -> Self { + match value { + BitwiseAggFunction::And => Self::And, + BitwiseAggFunction::Or => Self::Or, + BitwiseAggFunction::Xor => Self::Xor, + } + } +} + +impl BitwiseFunction { + pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult { + mapper.try_map_dtype(|dtype| { + let is_valid = match dtype { + DataType::Boolean => true, + dt if dt.is_integer() => true, + dt if dt.is_float() => true, + _ => false, + }; + + if !is_valid { + polars_bail!(InvalidOperation: "dtype {} not supported in '{}' operation", dtype, self); + } + + Ok(DataType::UInt32) + }) + } +} + +fn count_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::count_ones) +} + +fn count_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::count_zeros) +} + +fn leading_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::leading_ones) +} + +fn leading_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::leading_zeros) +} + +fn trailing_ones(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::trailing_ones) +} + +fn trailing_zeros(c: &Column) -> PolarsResult { + c.try_apply_unary_elementwise(polars_ops::series::trailing_zeros) +} diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 6347f6cee7b4..0458b2b4a1d0 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -5,6 +5,8 @@ mod arg_where; #[cfg(feature = "dtype-array")] mod array; mod binary; +#[cfg(feature = "bitwise")] +mod bitwise; mod boolean; mod bounds; #[cfg(feature = "business")] @@ -89,6 +91,8 @@ use schema::FieldsMapper; use serde::{Deserialize, Serialize}; pub(crate) use self::binary::BinaryFunction; +#[cfg(feature = "bitwise")] +pub use self::bitwise::{BitwiseAggFunction, BitwiseFunction}; pub use self::boolean::BooleanFunction; #[cfg(feature = "business")] pub(super) use self::business::BusinessFunction; @@ -127,6 +131,8 @@ pub enum FunctionExpr { StructExpr(StructFunction), #[cfg(feature = "temporal")] TemporalExpr(TemporalFunction), + #[cfg(feature = "bitwise")] + Bitwise(BitwiseFunction), // Other expressions Boolean(BooleanFunction), @@ -376,6 +382,8 @@ impl Hash for FunctionExpr { StructExpr(f) => f.hash(state), #[cfg(feature = "temporal")] TemporalExpr(f) => f.hash(state), + #[cfg(feature = "bitwise")] + Bitwise(f) => f.hash(state), // Other expressions Boolean(f) => f.hash(state), @@ -602,6 +610,8 @@ impl Display for FunctionExpr { StructExpr(func) => return write!(f, "{func}"), #[cfg(feature = "temporal")] TemporalExpr(func) => return write!(f, "{func}"), + #[cfg(feature = "bitwise")] + Bitwise(func) => return write!(f, "bitwise_{func}"), // Other expressions Boolean(func) => return write!(f, "{func}"), @@ -871,6 +881,8 @@ impl From for SpecialEq> { StructExpr(func) => func.into(), #[cfg(feature = "temporal")] TemporalExpr(func) => func.into(), + #[cfg(feature = "bitwise")] + Bitwise(func) => func.into(), // Other expressions Boolean(func) => func.into(), diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 11b190b41d50..7cc5b8c5c7ad 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -26,6 +26,8 @@ impl FunctionExpr { StructExpr(s) => s.get_field(mapper), #[cfg(feature = "temporal")] TemporalExpr(fun) => fun.get_field(mapper), + #[cfg(feature = "bitwise")] + Bitwise(fun) => fun.get_field(mapper), // Other expressions Boolean(func) => func.get_field(mapper), diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 21be2f77252c..477d97b9c299 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -13,6 +13,8 @@ mod arity; #[cfg(feature = "dtype-array")] mod array; pub mod binary; +#[cfg(feature = "bitwise")] +mod bitwise; #[cfg(feature = "temporal")] pub mod dt; mod expr; diff --git a/crates/polars-plan/src/plans/aexpr/mod.rs b/crates/polars-plan/src/plans/aexpr/mod.rs index 42bfff7cabab..53bf24ff838e 100644 --- a/crates/polars-plan/src/plans/aexpr/mod.rs +++ b/crates/polars-plan/src/plans/aexpr/mod.rs @@ -50,6 +50,8 @@ pub enum IRAggExpr { Count(Node, bool), Std(Node, u8), Var(Node, u8), + #[cfg(feature = "bitwise")] + Bitwise(Node, BitwiseAggFunction), AggGroups(Node), } @@ -62,6 +64,8 @@ impl Hash for IRAggExpr { }, Self::Quantile { interpol, .. } => interpol.hash(state), Self::Std(_, v) | Self::Var(_, v) => v.hash(state), + #[cfg(feature = "bitwise")] + Self::Bitwise(_, f) => f.hash(state), _ => {}, } } @@ -91,6 +95,8 @@ impl IRAggExpr { (Quantile { interpol: l, .. }, Quantile { interpol: r, .. }) => l == r, (Std(_, l), Std(_, r)) => l == r, (Var(_, l), Var(_, r)) => l == r, + #[cfg(feature = "bitwise")] + (Bitwise(_, l), Bitwise(_, r)) => l == r, _ => std::mem::discriminant(self) == std::mem::discriminant(other), } } @@ -124,6 +130,8 @@ impl From for GroupByMethod { Count(_, include_nulls) => GroupByMethod::Count { include_nulls }, Std(_, ddof) => GroupByMethod::Std(ddof), Var(_, ddof) => GroupByMethod::Var(ddof), + #[cfg(feature = "bitwise")] + Bitwise(_, f) => GroupByMethod::Bitwise(f.into()), AggGroups(_) => GroupByMethod::Groups, Quantile { .. } => unreachable!(), } diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index b33cac4aa259..af37357502ee 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -217,6 +217,13 @@ impl AExpr { float_type(&mut field); Ok(field) }, + #[cfg(feature = "bitwise")] + Bitwise(expr, _) => { + *nested = nested.saturating_sub(1); + let field = arena.get(*expr).to_field_impl(schema, arena, nested)?; + // @Q? Do we need to coerce here? + Ok(field) + }, } }, Cast { expr, dtype, .. } => { diff --git a/crates/polars-plan/src/plans/aexpr/traverse.rs b/crates/polars-plan/src/plans/aexpr/traverse.rs index 29999ef6995f..7163e18de165 100644 --- a/crates/polars-plan/src/plans/aexpr/traverse.rs +++ b/crates/polars-plan/src/plans/aexpr/traverse.rs @@ -197,6 +197,8 @@ impl IRAggExpr { Std(input, _) => Single(*input), Var(input, _) => Single(*input), AggGroups(input) => Single(*input), + #[cfg(feature = "bitwise")] + Bitwise(input, _) => Single(*input), } } pub fn set_input(&mut self, input: Node) { @@ -216,6 +218,8 @@ impl IRAggExpr { Std(input, _) => input, Var(input, _) => input, AggGroups(input) => input, + #[cfg(feature = "bitwise")] + Bitwise(input, _) => input, }; *node = input; } diff --git a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs index fe13dd1d3592..95eca45a9bf6 100644 --- a/crates/polars-plan/src/plans/conversion/expr_to_ir.rs +++ b/crates/polars-plan/src/plans/conversion/expr_to_ir.rs @@ -260,6 +260,11 @@ pub(super) fn to_aexpr_impl( AggExpr::AggGroups(expr) => { IRAggExpr::AggGroups(to_aexpr_impl_materialized_lit(owned(expr), arena, state)?) }, + #[cfg(feature = "bitwise")] + AggExpr::Bitwise(expr, f) => IRAggExpr::Bitwise( + to_aexpr_impl_materialized_lit(owned(expr), arena, state)?, + f, + ), }; AExpr::Agg(a_agg) }, diff --git a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs index c90590914e47..5d2e4c373b30 100644 --- a/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs +++ b/crates/polars-plan/src/plans/conversion/ir_to_dsl.rs @@ -160,6 +160,11 @@ pub fn node_to_expr(node: Node, expr_arena: &Arena) -> Expr { let expr = node_to_expr(expr, expr_arena); AggExpr::Count(Arc::new(expr), include_nulls).into() }, + #[cfg(feature = "bitwise")] + IRAggExpr::Bitwise(expr, f) => { + let expr = node_to_expr(expr, expr_arena); + AggExpr::Bitwise(Arc::new(expr), f).into() + }, }, AExpr::Ternary { predicate, diff --git a/crates/polars-plan/src/plans/format.rs b/crates/polars-plan/src/plans/format.rs index d39f3dd35cc9..c72cfad20e39 100644 --- a/crates/polars-plan/src/plans/format.rs +++ b/crates/polars-plan/src/plans/format.rs @@ -120,6 +120,16 @@ impl fmt::Debug for Expr { Var(expr, _) => write!(f, "{expr:?}.var()"), Std(expr, _) => write!(f, "{expr:?}.std()"), Quantile { expr, .. } => write!(f, "{expr:?}.quantile()"), + #[cfg(feature = "bitwise")] + Bitwise(expr, t) => { + let t = match t { + BitwiseAggFunction::And => "and", + BitwiseAggFunction::Or => "or", + BitwiseAggFunction::Xor => "xor", + }; + + write!(f, "{expr:?}.bitwise.{t}()") + }, } }, Cast { diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index c461b525cb9a..4ccb74f66238 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -593,6 +593,16 @@ impl<'a> Display for ExprIRDisplay<'a> { Var(expr, _) => write!(f, "{}.var()", self.with_root(expr)), Std(expr, _) => write!(f, "{}.std()", self.with_root(expr)), Quantile { expr, .. } => write!(f, "{}.quantile()", self.with_root(expr)), + #[cfg(feature = "bitwise")] + Bitwise(expr, t) => { + let t = match t { + BitwiseAggFunction::And => "and", + BitwiseAggFunction::Or => "or", + BitwiseAggFunction::Xor => "xor", + }; + + write!(f, "{}.bitwise.{t}()", self.with_root(expr)) + }, } }, Cast { diff --git a/crates/polars-plan/src/plans/iterator.rs b/crates/polars-plan/src/plans/iterator.rs index 2dc13870b553..997e38fa9d12 100644 --- a/crates/polars-plan/src/plans/iterator.rs +++ b/crates/polars-plan/src/plans/iterator.rs @@ -56,6 +56,8 @@ macro_rules! push_expr { AggGroups(e) => $push($c, e), Std(e, _) => $push($c, e), Var(e, _) => $push($c, e), + #[cfg(feature = "bitwise")] + Bitwise(e, _) => $push($c, e), } }, Ternary { diff --git a/crates/polars-plan/src/plans/visitor/expr.rs b/crates/polars-plan/src/plans/visitor/expr.rs index 2f5fce9bc283..71b287d03b85 100644 --- a/crates/polars-plan/src/plans/visitor/expr.rs +++ b/crates/polars-plan/src/plans/visitor/expr.rs @@ -72,6 +72,8 @@ impl TreeWalker for Expr { AggGroups(x) => AggGroups(am(x, f)?), Std(x, ddf) => Std(am(x, f)?, ddf), Var(x, ddf) => Var(am(x, f)?, ddf), + #[cfg(feature = "bitwise")] + Bitwise(x, t) => Bitwise(am(x, f)?, t), }), Ternary { predicate, truthy, falsy } => Ternary { predicate: am(predicate, &mut f)?, truthy: am(truthy, &mut f)?, falsy: am(falsy, f)? }, Function { input, function, options } => Function { input: input.into_iter().map(f).collect::>()?, function, options }, diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index 9ed35648c89f..1df741a4f51f 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -13,7 +13,7 @@ polars-core = { workspace = true, features = ["python"] } polars-error = { workspace = true } polars-io = { workspace = true } polars-lazy = { workspace = true, features = ["python"] } -polars-ops = { workspace = true } +polars-ops = { workspace = true, features = ["bitwise"] } polars-parquet = { workspace = true, optional = true } polars-plan = { workspace = true } polars-time = { workspace = true } @@ -48,6 +48,7 @@ features = [ "approx_unique", "array_any_all", "arg_where", + "bitwise", "business", "concat_str", "cum_agg", diff --git a/crates/polars-python/src/expr/bitwise.rs b/crates/polars-python/src/expr/bitwise.rs new file mode 100644 index 000000000000..550b1930942e --- /dev/null +++ b/crates/polars-python/src/expr/bitwise.rs @@ -0,0 +1,42 @@ +use pyo3::prelude::*; + +use crate::PyExpr; + +#[pymethods] +impl PyExpr { + fn bitwise_count_ones(&self) -> Self { + self.inner.clone().bitwise_count_ones().into() + } + + fn bitwise_count_zeros(&self) -> Self { + self.inner.clone().bitwise_count_zeros().into() + } + + fn bitwise_leading_ones(&self) -> Self { + self.inner.clone().bitwise_leading_ones().into() + } + + fn bitwise_leading_zeros(&self) -> Self { + self.inner.clone().bitwise_leading_zeros().into() + } + + fn bitwise_trailing_ones(&self) -> Self { + self.inner.clone().bitwise_trailing_ones().into() + } + + fn bitwise_trailing_zeros(&self) -> Self { + self.inner.clone().bitwise_trailing_zeros().into() + } + + fn bitwise_and(&self) -> Self { + self.inner.clone().bitwise_and().into() + } + + fn bitwise_or(&self) -> Self { + self.inner.clone().bitwise_or().into() + } + + fn bitwise_xor(&self) -> Self { + self.inner.clone().bitwise_xor().into() + } +} diff --git a/crates/polars-python/src/expr/mod.rs b/crates/polars-python/src/expr/mod.rs index 85d44fefbf98..93a00018a683 100644 --- a/crates/polars-python/src/expr/mod.rs +++ b/crates/polars-python/src/expr/mod.rs @@ -3,6 +3,8 @@ mod array; #[cfg(feature = "pymethods")] mod binary; #[cfg(feature = "pymethods")] +mod bitwise; +#[cfg(feature = "pymethods")] mod categorical; #[cfg(feature = "pymethods")] mod datetime; diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index a1e5b26f1e27..a05ea6891e15 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -723,6 +723,16 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { arguments: vec![n.0], options: py.None(), }, + IRAggExpr::Bitwise(n, f) => Agg { + name: "bitwise".to_object(py), + arguments: vec![n.0], + options: match f { + polars::prelude::BitwiseAggFunction::And => "and", + polars::prelude::BitwiseAggFunction::Or => "or", + polars::prelude::BitwiseAggFunction::Xor => "xor", + } + .to_object(py), + }, } .into_py(py), AExpr::Ternary { @@ -758,6 +768,9 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { FunctionExpr::ListExpr(_) => { return Err(PyNotImplementedError::new_err("list expr")) }, + FunctionExpr::Bitwise(_) => { + return Err(PyNotImplementedError::new_err("bitwise expr")) + }, FunctionExpr::StringExpr(strfun) => match strfun { StringFunction::ConcatHorizontal { delimiter, diff --git a/crates/polars-stream/Cargo.toml b/crates/polars-stream/Cargo.toml index b1a70997a3c3..78cdbc9115d0 100644 --- a/crates/polars-stream/Cargo.toml +++ b/crates/polars-stream/Cargo.toml @@ -37,3 +37,4 @@ version_check = { workspace = true } [features] nightly = [] +bitwise = ["polars-core/bitwise", "polars-plan/bitwise"] diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index 39493af054c2..618ec358f209 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -603,6 +603,12 @@ fn lower_exprs_with_ctx( fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone()))); transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name))); }, + #[cfg(feature = "bitwise")] + IRAggExpr::Bitwise(_, _) => { + let out_name = unique_column_name(); + fallback_subset.push(ExprIR::new(expr, OutputName::Alias(out_name.clone()))); + transformed_exprs.push(ctx.expr_arena.add(AExpr::Column(out_name))); + }, }, AExpr::Len => { let out_name = unique_column_name(); diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index b858dbc36678..a23d23c7c1ae 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -132,6 +132,7 @@ array_any_all = ["polars-lazy?/array_any_all", "dtype-array"] asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"] iejoin = ["polars-lazy?/iejoin"] binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"] +bitwise = ["polars-core/bitwise", "polars-plan?/bitwise", "polars-ops/bitwise", "polars-lazy?/bitwise"] business = ["polars-lazy?/business", "polars-ops/business"] checked_arithmetic = ["polars-core/checked_arithmetic"] chunked_ids = ["polars-ops?/chunked_ids"] diff --git a/py-polars/docs/source/reference/expressions/computation.rst b/py-polars/docs/source/reference/expressions/computation.rst index 46dba474834f..4ad8e68a1bfd 100644 --- a/py-polars/docs/source/reference/expressions/computation.rst +++ b/py-polars/docs/source/reference/expressions/computation.rst @@ -15,6 +15,14 @@ Computation Expr.arctan Expr.arctanh Expr.arg_unique + Expr.bitwise_count_ones + Expr.bitwise_count_zeros + Expr.bitwise_leading_ones + Expr.bitwise_trailing_ones + Expr.bitwise_trailing_zeros + Expr.bitwise_and + Expr.bitwise_or + Expr.bitwise_xor Expr.cbrt Expr.cos Expr.cosh diff --git a/py-polars/docs/source/reference/series/computation.rst b/py-polars/docs/source/reference/series/computation.rst index 9e3edb3ac0f6..887fed5b0ec2 100644 --- a/py-polars/docs/source/reference/series/computation.rst +++ b/py-polars/docs/source/reference/series/computation.rst @@ -15,6 +15,14 @@ Computation Series.arctanh Series.arg_true Series.arg_unique + Series.bitwise_count_ones + Series.bitwise_count_zeros + Series.bitwise_leading_ones + Series.bitwise_trailing_ones + Series.bitwise_trailing_zeros + Series.bitwise_and + Series.bitwise_or + Series.bitwise_xor Series.cbrt Series.cos Series.cosh diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index b1baee228c77..505d71cafecd 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -10474,6 +10474,42 @@ def replace_strict( self._pyexpr.replace_strict(old, new, default, return_dtype) ) + def bitwise_count_ones(self) -> Expr: + """Evaluate the number of set bits.""" + return self._from_pyexpr(self._pyexpr.bitwise_count_ones()) + + def bitwise_count_zeros(self) -> Expr: + """Evaluate the number of unset bits.""" + return self._from_pyexpr(self._pyexpr.bitwise_count_zeros()) + + def bitwise_leading_ones(self) -> Expr: + """Evaluate the number most-significant set bits before seeing an unset bit.""" + return self._from_pyexpr(self._pyexpr.bitwise_leading_ones()) + + def bitwise_leading_zeros(self) -> Expr: + """Evaluate the number most-significant unset bits before seeing a set bit.""" + return self._from_pyexpr(self._pyexpr.bitwise_leading_zeros()) + + def bitwise_trailing_ones(self) -> Expr: + """Evaluate the number least-significant set bits before seeing an unset bit.""" + return self._from_pyexpr(self._pyexpr.bitwise_trailing_ones()) + + def bitwise_trailing_zeros(self) -> Expr: + """Evaluate the number least-significant unset bits before seeing a set bit.""" + return self._from_pyexpr(self._pyexpr.bitwise_trailing_zeros()) + + def bitwise_and(self) -> Expr: + """Perform an aggregation of bitwise ANDs.""" + return self._from_pyexpr(self._pyexpr.bitwise_and()) + + def bitwise_or(self) -> Expr: + """Perform an aggregation of bitwise ORs.""" + return self._from_pyexpr(self._pyexpr.bitwise_or()) + + def bitwise_xor(self) -> Expr: + """Perform an aggregation of bitwise XORs.""" + return self._from_pyexpr(self._pyexpr.bitwise_xor()) + @deprecate_function( "Use `polars.plugins.register_plugin_function` instead.", version="0.20.16" ) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index c9dcc7423dff..11c160e22874 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -7364,6 +7364,33 @@ def implode(self) -> Self: ] """ + def bitwise_count_ones(self) -> Self: + """Evaluate the number of set bits.""" + + def bitwise_count_zeros(self) -> Self: + """Evaluate the number of unset Self.""" + + def bitwise_leading_ones(self) -> Self: + """Evaluate the number most-significant set bits before seeing an unset bit.""" + + def bitwise_leading_zeros(self) -> Self: + """Evaluate the number most-significant unset bits before seeing a set bit.""" + + def bitwise_trailing_ones(self) -> Self: + """Evaluate the number least-significant set bits before seeing an unset bit.""" + + def bitwise_trailing_zeros(self) -> Self: + """Evaluate the number least-significant unset bits before seeing a set bit.""" + + def bitwise_and(self) -> Self: + """Perform an aggregation of bitwise ANDs.""" + + def bitwise_or(self) -> Self: + """Perform an aggregation of bitwise ORs.""" + + def bitwise_xor(self) -> Self: + """Perform an aggregation of bitwise XORs.""" + # Keep the `list` and `str` properties below at the end of the definition of Series, # as to not confuse mypy with the type annotation `str` and `list` diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py index e69fe2b218bd..e7a957fb534b 100644 --- a/py-polars/tests/unit/operations/test_bitwise.py +++ b/py-polars/tests/unit/operations/test_bitwise.py @@ -1,6 +1,12 @@ +from __future__ import annotations + +import sys +import typing + import pytest import polars as pl +from polars.testing import assert_frame_equal, assert_series_equal @pytest.mark.parametrize("op", ["and_", "or_"]) @@ -17,3 +23,185 @@ def test_bitwise_single_null_value_schema(op: str) -> None: result_schema = q.collect_schema() assert result_schema.len() == 1 assert "a" in result_schema + + +def leading_zeros(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + blen = len(b) - len(b.lstrip("0")) + if blen == len(b): + return nb + else: + return nb - len(b) + blen + + +def leading_ones(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + if len(b) < nb: + return 0 + else: + return len(b) - len(b.lstrip("1")) + + +def trailing_zeros(v: int | None, nb: int) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + blen = len(b) - len(b.rstrip("0")) + if blen == len(b): + return nb + else: + return blen + + +def trailing_ones(v: int | None) -> int | None: + if v is None: + return None + + b = bin(v)[2:] + return len(b) - len(b.rstrip("1")) + + +@pytest.mark.parametrize( + "value", + [ + 0x00, + 0x01, + 0xFCEF_0123, + 0xFFFF_FFFF, + 0xFFF0_FFE1_ABCD_EF01, + 0xAAAA_AAAA_AAAA_AAAA, + None, + ], +) +@pytest.mark.parametrize( + "dtype", + [ + pl.Int8, + pl.Int16, + pl.Int32, + pl.Int64, + pl.UInt8, + pl.UInt16, + pl.UInt32, + pl.UInt64, + pl.Boolean, + ], +) +@pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10") +@typing.no_type_check +def test_bit_counts(value: int, dtype: pl.DataType) -> None: + bitsize = 8 + if "Boolean" in str(dtype): + bitsize = 1 + if "16" in str(dtype): + bitsize = 16 + elif "32" in str(dtype): + bitsize = 32 + elif "64" in str(dtype): + bitsize = 64 + + if bitsize == 1 and value is not None: + value = value & 1 != 0 + + co = 1 if value else 0 + cz = 0 if value else 1 + elif value is not None: + value = value & ((1 << bitsize) - 1) + + if dtype.is_signed_integer() and value >> (bitsize - 1) > 0: + value = value - pow(2, bitsize - 1) + + co = value.bit_count() + cz = bitsize - co + else: + co = None + cz = None + + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_count_ones(), + pl.Series("a", [co], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_count_zeros(), + pl.Series("a", [cz], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_leading_ones(), + pl.Series("a", [leading_ones(value, bitsize)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_leading_zeros(), + pl.Series("a", [leading_zeros(value, bitsize)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_trailing_ones(), + pl.Series("a", [trailing_ones(value)], pl.UInt32), + ) + assert_series_equal( + pl.Series("a", [value], dtype).bitwise_trailing_zeros(), + pl.Series("a", [trailing_zeros(value, bitsize)], pl.UInt32), + ) + + +@pytest.mark.parametrize( + "dtype", + [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64], +) +def test_bit_aggregations(dtype: pl.DataType) -> None: + s = pl.Series("a", [0x74, 0x1C, 0x05], dtype) + + df = s.to_frame().select( + AND=pl.col.a.bitwise_and(), + OR=pl.col.a.bitwise_or(), + XOR=pl.col.a.bitwise_xor(), + ) + + assert_frame_equal( + df, + pl.DataFrame( + [ + pl.Series("AND", [0x04], dtype), + pl.Series("OR", [0x7D], dtype), + pl.Series("XOR", [0x6D], dtype), + ] + ), + ) + + +@pytest.mark.parametrize( + "dtype", + [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64], +) +def test_bit_group_by(dtype: pl.DataType) -> None: + df = pl.DataFrame( + [ + pl.Series("g", [1, 1, 2, 3, 2, 4, 4], pl.Int8), + pl.Series("a", [0x74, 0x1C, 0x05, None, 0x70, 0x01, None], dtype), + ] + ) + + df = df.group_by("g").agg( + AND=pl.col.a.bitwise_and(), + OR=pl.col.a.bitwise_or(), + XOR=pl.col.a.bitwise_xor(), + ) + + assert_frame_equal( + df, + pl.DataFrame( + [ + pl.Series("g", [1, 2, 3, 4], pl.Int8), + pl.Series("AND", [0x74 & 0x1C, 0x05 & 0x70, None, None], dtype), + pl.Series("OR", [0x74 | 0x1C, 0x05 | 0x70, None, None], dtype), + pl.Series("XOR", [0x74 ^ 0x1C, 0x05 ^ 0x70, None, None], dtype), + ] + ), + check_row_order=False, + )