Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Bitwise aggregations should ignore null values #19067

Merged
merged 3 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions crates/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
.unwrap_or(0)
}

#[inline]
fn has_nulls(&self) -> bool {
self.null_count() > 0
}

/// Returns whether slot `i` is null.
/// # Panic
/// Panics iff `i >= self.len()`.
Expand Down
115 changes: 38 additions & 77 deletions crates/polars-compute/src/bitwise/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use std::convert::identity;

use arrow::array::{BooleanArray, PrimitiveArray};
use arrow::array::{Array, BooleanArray, PrimitiveArray};
use arrow::datatypes::ArrowDataType;
use arrow::legacy::utils::CustomIterTools;
use bytemuck::Zeroable;

pub trait BitwiseKernel {
type Scalar;
Expand Down Expand Up @@ -36,8 +35,7 @@ macro_rules! impl_bitwise_kernel {
fn count_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(|&v| $to_bits(v).count_ones())
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -49,9 +47,7 @@ macro_rules! impl_bitwise_kernel {
fn count_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self
.values()
.iter()
self.values_iter()
.map(|&v| $to_bits(v).count_zeros())
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -63,8 +59,7 @@ macro_rules! impl_bitwise_kernel {
fn leading_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(|&v| $to_bits(v).leading_ones())
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -76,8 +71,7 @@ macro_rules! impl_bitwise_kernel {
fn leading_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(|&v| $to_bits(v).leading_zeros())
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -89,8 +83,7 @@ macro_rules! impl_bitwise_kernel {
fn trailing_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(|&v| $to_bits(v).trailing_ones())
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -112,47 +105,29 @@ macro_rules! impl_bitwise_kernel {

#[inline(never)]
fn reduce_and(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
if !self.has_nulls() {
self.values_iter().copied().map($to_bits).reduce(|a, b| a & b).map($from_bits)
} else {
self.non_null_values_iter().map($to_bits).reduce(|a, b| a & b).map($from_bits)
}

let values = self.values();

if values.is_empty() {
return None;
}

Some($from_bits(values.iter().fold(!$to_bits(<$T>::zeroed()), |a, &b| a & $to_bits(b))))
}

#[inline(never)]
fn reduce_or(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
if !self.has_nulls() {
self.values_iter().copied().map($to_bits).reduce(|a, b| a | b).map($from_bits)
} else {
self.non_null_values_iter().map($to_bits).reduce(|a, b| a | b).map($from_bits)
}

Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a | $to_bits(b))))
}

#[inline(never)]
fn reduce_xor(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
if !self.has_nulls() {
self.values_iter().copied().map($to_bits).reduce(|a, b| a ^ b).map($from_bits)
} else {
self.non_null_values_iter().map($to_bits).reduce(|a, b| a ^ b).map($from_bits)
}

Some($from_bits(values.iter().fold($to_bits(<$T>::zeroed()), |a, &b| a ^ $to_bits(b))))
}

fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
Expand Down Expand Up @@ -189,8 +164,7 @@ impl BitwiseKernel for BooleanArray {
fn count_ones(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(u32::from)
.collect_trusted::<Vec<_>>()
.into(),
Expand All @@ -202,8 +176,7 @@ impl BitwiseKernel for BooleanArray {
fn count_zeros(&self) -> PrimitiveArray<u32> {
PrimitiveArray::new(
ArrowDataType::UInt32,
self.values()
.iter()
self.values_iter()
.map(|v| u32::from(!v))
.collect_trusted::<Vec<_>>()
.into(),
Expand Down Expand Up @@ -232,45 +205,33 @@ impl BitwiseKernel for BooleanArray {
}

fn reduce_and(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
if self.len() == self.null_count() {
None
} else if !self.has_nulls() {
Some(self.values().unset_bits() == 0)
} else {
Some((self.values() & self.validity().unwrap()).unset_bits() == 0)
}

let values = self.values();

if values.is_empty() {
return None;
}

Some(values.unset_bits() == 0)
}

fn reduce_or(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
if self.len() == self.null_count() {
None
} else if !self.has_nulls() {
Some(self.values().set_bits() > 0)
} else {
Some((self.values() & self.validity().unwrap()).set_bits() > 0)
}

Some(values.set_bits() > 0)
}

fn reduce_xor(&self) -> Option<Self::Scalar> {
if self.validity().map_or(false, |v| v.unset_bits() > 0) {
return None;
}

let values = self.values();

if values.is_empty() {
return None;
if self.len() == self.null_count() {
None
} else if !self.has_nulls() {
Some(self.values().set_bits() % 2 == 1)
} else {
Some((self.values() & self.validity().unwrap()).set_bits() % 2 == 1)
}

Some(values.set_bits() % 2 == 1)
}

fn bit_and(lhs: Self::Scalar, rhs: Self::Scalar) -> Self::Scalar {
Expand Down
48 changes: 11 additions & 37 deletions crates/polars-core/src/chunked_array/ops/bitwise_reduce.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arrow::array::{Array, PrimitiveArray};
use arrow::array::PrimitiveArray;
use arrow::types::NativeType;
use polars_compute::bitwise::BitwiseKernel;

Expand All @@ -13,33 +13,20 @@ where
type Physical = T::Native;

fn and_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_and(arr).unwrap())
.filter_map(BitwiseKernel::reduce_and)
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_and)
}
fn or_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

fn or_reduce(&self) -> Option<Self::Physical> {
self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_or(arr).unwrap())
.filter_map(BitwiseKernel::reduce_or)
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_or)
}
fn xor_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

fn xor_reduce(&self) -> Option<Self::Physical> {
self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_xor(arr).unwrap())
.filter_map(BitwiseKernel::reduce_xor)
.reduce(<PrimitiveArray<T::Native> as BitwiseKernel>::bit_xor)
}
}
Expand All @@ -48,33 +35,20 @@ impl ChunkBitwiseReduce for ChunkedArray<BooleanType> {
type Physical = bool;

fn and_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_and(arr).unwrap())
.filter_map(BitwiseKernel::reduce_and)
.reduce(|a, b| a & b)
}
fn or_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

fn or_reduce(&self) -> Option<Self::Physical> {
self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_or(arr).unwrap())
.filter_map(BitwiseKernel::reduce_or)
.reduce(|a, b| a | b)
}
fn xor_reduce(&self) -> Option<Self::Physical> {
if self.null_count() > 0 {
return None;
}

fn xor_reduce(&self) -> Option<Self::Physical> {
self.downcast_iter()
.filter(|arr| !arr.is_empty())
.map(|arr| BitwiseKernel::reduce_xor(arr).unwrap())
.filter_map(BitwiseKernel::reduce_xor)
.reduce(|a, b| a ^ b)
}
}
2 changes: 2 additions & 0 deletions crates/polars-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ peaks = ["polars/peaks"]
hist = ["polars/hist"]
find_many = ["polars/find_many"]
new_streaming = ["polars-lazy/new_streaming"]
bitwise = ["polars/bitwise"]

dtype-i8 = []
dtype-i16 = []
Expand All @@ -182,6 +183,7 @@ dtypes = [
operations = [
"array_any_all",
"array_count",
"bitwise",
"is_in",
"repeat_by",
"trigonometry",
Expand Down
33 changes: 33 additions & 0 deletions crates/polars-python/src/series/aggregation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,37 @@ impl PySeries {
)
.into_py(py))
}

#[cfg(feature = "bitwise")]
fn bitwise_and(&self, py: Python) -> PyResult<PyObject> {
Ok(Wrap(
self.series
.and_reduce()
.map_err(PyPolarsErr::from)?
.as_any_value(),
)
.into_py(py))
}

#[cfg(feature = "bitwise")]
fn bitwise_or(&self, py: Python) -> PyResult<PyObject> {
Ok(Wrap(
self.series
.or_reduce()
.map_err(PyPolarsErr::from)?
.as_any_value(),
)
.into_py(py))
}

#[cfg(feature = "bitwise")]
fn bitwise_xor(&self, py: Python) -> PyResult<PyObject> {
Ok(Wrap(
self.series
.xor_reduce()
.map_err(PyPolarsErr::from)?
.as_any_value(),
)
.into_py(py))
}
}
3 changes: 3 additions & 0 deletions py-polars/docs/source/reference/expressions/aggregation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ Aggregation
Expr.approx_n_unique
Expr.arg_max
Expr.arg_min
Expr.bitwise_and
Expr.bitwise_or
Expr.bitwise_xor
Expr.count
Expr.first
Expr.implode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ Computation
Expr.bitwise_count_ones
Expr.bitwise_count_zeros
Expr.bitwise_leading_ones
Expr.bitwise_leading_zeros
Expr.bitwise_trailing_ones
Expr.bitwise_trailing_zeros
Expr.bitwise_and
Expr.bitwise_or
Expr.bitwise_xor
Expr.cbrt
Expr.cos
Expr.cosh
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Computation
Series.bitwise_count_ones
Series.bitwise_count_zeros
Series.bitwise_leading_ones
Series.bitwise_leading_zeros
Series.bitwise_trailing_ones
Series.bitwise_trailing_zeros
Series.bitwise_and
Expand Down
Loading
Loading