diff --git a/Cargo.lock b/Cargo.lock index 605a6db2f..2f7d8207a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2283,6 +2283,7 @@ dependencies = [ "serde_json", "similar-asserts", "smallvec", + "stdutil", "strsim", "textwrap", "tokio", @@ -3031,6 +3032,10 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stdutil" +version = "0.0.94" + [[package]] name = "stringprep" version = "0.1.5" diff --git a/crates/docgen/src/markdown_table.rs b/crates/docgen/src/markdown_table.rs index 70a6dc4e8..51783e48b 100644 --- a/crates/docgen/src/markdown_table.rs +++ b/crates/docgen/src/markdown_table.rs @@ -1,7 +1,7 @@ use std::fmt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::{FormatOptions, Formatter}; @@ -13,7 +13,7 @@ const FORMATTER: Formatter = Formatter::new(FormatOptions { pub fn write_markdown_table<'a>( output: &mut dyn fmt::Write, schema: &Schema, - batches: impl IntoIterator, + batches: impl IntoIterator, ) -> Result<()> { // 'field1 | field2 | field3' let header = schema @@ -54,7 +54,7 @@ pub fn write_markdown_table<'a>( #[cfg(test)] mod tests { - use rayexec_execution::arrays::array::Array; + use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Field; @@ -62,9 +62,9 @@ mod tests { #[test] fn simple() { - let batch = Batch::try_new([ - Array::from_iter([1, 2, 3]), - Array::from_iter(["cat", "dog", "mouse"]), + let batch = Batch2::try_new([ + Array2::from_iter([1, 2, 3]), + Array2::from_iter(["cat", "dog", "mouse"]), ]) .unwrap(); diff --git a/crates/rayexec_csv/src/copy_to.rs b/crates/rayexec_csv/src/copy_to.rs index aef93f92e..130703e87 100644 --- a/crates/rayexec_csv/src/copy_to.rs +++ b/crates/rayexec_csv/src/copy_to.rs @@ -1,7 +1,7 @@ use futures::future::BoxFuture; use futures::FutureExt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -53,7 +53,7 @@ pub struct CsvCopyToSink { } impl CsvCopyToSink { - async fn push_inner(&mut self, batch: Batch) -> Result<()> { + async fn push_inner(&mut self, batch: Batch2) -> Result<()> { let mut buf = Vec::with_capacity(1024); self.encoder.encode(&batch, &mut buf)?; self.sink.write_all(buf.into()).await?; @@ -68,7 +68,7 @@ impl CsvCopyToSink { } impl PartitionSink for CsvCopyToSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { self.push_inner(batch).boxed() } diff --git a/crates/rayexec_csv/src/datatable.rs b/crates/rayexec_csv/src/datatable.rs index 197822514..4bf1cda7b 100644 --- a/crates/rayexec_csv/src/datatable.rs +++ b/crates/rayexec_csv/src/datatable.rs @@ -2,7 +2,7 @@ use std::fmt::{self, Debug}; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::runtime::Runtime; use rayexec_execution::storage::table_storage::{ DataTable, @@ -60,7 +60,7 @@ pub struct CsvFileScan { } impl DataTableScan for CsvFileScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.reader.read_next().await }) } } diff --git a/crates/rayexec_csv/src/reader.rs b/crates/rayexec_csv/src/reader.rs index 448bedebf..f733344f1 100644 --- a/crates/rayexec_csv/src/reader.rs +++ b/crates/rayexec_csv/src/reader.rs @@ -23,8 +23,8 @@ use bytes::Bytes; use futures::stream::BoxStream; use futures::StreamExt; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array, ArrayData}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::parse::{ BoolParser, @@ -342,7 +342,7 @@ impl AsyncCsvReader { AsyncCsvReader { stream } } - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { self.stream.next_batch().await } } @@ -387,7 +387,7 @@ struct AsyncCsvStream { } impl AsyncCsvStream { - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { loop { let (buf, offset) = match self.buf.take() { Some(buf) => (buf, self.buf_offset), @@ -455,7 +455,7 @@ impl AsyncCsvStream { completed: CompletedRecords, schema: &Schema, skip_header: bool, - ) -> Result { + ) -> Result { let skip_records = if skip_header { 1 } else { 0 }; let mut arrs = Vec::with_capacity(schema.fields.len()); @@ -483,14 +483,14 @@ impl AsyncCsvStream { arrs.push(arr); } - Batch::try_new(arrs) + Batch2::try_new(arrs) } fn build_boolean( completed: &CompletedRecords, field_idx: usize, skip_records: usize, - ) -> Result { + ) -> Result { let mut values = Bitmap::with_capacity(completed.num_completed()); let mut validity = Bitmap::with_capacity(completed.num_completed()); @@ -507,7 +507,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( DataType::Boolean, validity, BooleanStorage::from(values), @@ -520,11 +520,11 @@ impl AsyncCsvStream { field_idx: usize, skip_records: usize, mut parser: P, - ) -> Result + ) -> Result where T: Default, P: Parser, - PrimitiveStorage: Into, + PrimitiveStorage: Into, { let mut values = Vec::with_capacity(completed.num_completed()); let mut validity = Bitmap::with_capacity(completed.num_completed()); @@ -544,7 +544,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( datatype.clone(), validity, PrimitiveStorage::from(values), @@ -555,7 +555,7 @@ impl AsyncCsvStream { completed: &CompletedRecords, field_idx: usize, skip_records: usize, - ) -> Result { + ) -> Result { let mut values = GermanVarlenBuffer::with_len(completed.num_completed() - skip_records); let mut validity = Bitmap::with_capacity(completed.num_completed()); @@ -569,7 +569,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( DataType::Utf8, validity, values.into_data(), diff --git a/crates/rayexec_csv/src/writer.rs b/crates/rayexec_csv/src/writer.rs index d6034f225..42e049310 100644 --- a/crates/rayexec_csv/src/writer.rs +++ b/crates/rayexec_csv/src/writer.rs @@ -2,7 +2,7 @@ use std::io::Write as _; use csv::ByteRecord; use rayexec_error::{Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::{FormatOptions, Formatter}; @@ -38,7 +38,7 @@ impl CsvEncoder { } } - pub fn encode(&mut self, batch: &Batch, output_buf: &mut Vec) -> Result<()> { + pub fn encode(&mut self, batch: &Batch2, output_buf: &mut Vec) -> Result<()> { const FORMATTER: Formatter = Formatter::new(FormatOptions::new()); let mut csv_writer = csv::WriterBuilder::new() diff --git a/crates/rayexec_debug/src/discard.rs b/crates/rayexec_debug/src/discard.rs index 5ffab5eca..3a32bbc31 100644 --- a/crates/rayexec_debug/src/discard.rs +++ b/crates/rayexec_debug/src/discard.rs @@ -1,6 +1,6 @@ use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -33,7 +33,7 @@ impl CopyToFunction for DiscardCopyToFunction { struct DiscardCopyToSink; impl PartitionSink for DiscardCopyToSink { - fn push(&mut self, _batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, _batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { Ok(()) }) } diff --git a/crates/rayexec_debug/src/table_storage.rs b/crates/rayexec_debug/src/table_storage.rs index a3fb00e5c..3362e26b0 100644 --- a/crates/rayexec_debug/src/table_storage.rs +++ b/crates/rayexec_debug/src/table_storage.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Field; use rayexec_execution::database::catalog_entry::CatalogEntry; use rayexec_execution::execution::operators::sink::PartitionSink; @@ -28,7 +28,7 @@ pub struct TablePreload { pub schema: String, pub name: String, pub columns: Vec, - pub data: Batch, + pub data: Batch2, } #[derive(Debug, Default)] @@ -124,7 +124,7 @@ impl TableStorage for DebugTableStorage { #[derive(Debug, Clone, Default)] pub struct DebugDataTable { - data: Arc>>, + data: Arc>>, } impl DataTable for DebugDataTable { @@ -168,23 +168,23 @@ impl DataTable for DebugDataTable { #[derive(Debug)] pub struct DebugDataTableScan { - data: Vec, + data: Vec, } impl DataTableScan for DebugDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { Ok(self.data.pop()) }) } } #[derive(Debug)] pub struct DebugDataTableInsert { - collected: Vec, - data: Arc>>, + collected: Vec, + data: Arc>>, } impl PartitionSink for DebugDataTableInsert { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.collected.push(batch); Ok(()) diff --git a/crates/rayexec_delta/src/datatable.rs b/crates/rayexec_delta/src/datatable.rs index 003182e7c..4b004eb38 100644 --- a/crates/rayexec_delta/src/datatable.rs +++ b/crates/rayexec_delta/src/datatable.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; use crate::protocol::table::{Table, TableScan}; @@ -34,7 +34,7 @@ struct DeltaTableScan { } impl DataTableScan for DeltaTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.scan.read_next().await }) } } diff --git a/crates/rayexec_delta/src/protocol/table.rs b/crates/rayexec_delta/src/protocol/table.rs index 2e99e2d7b..379c2f723 100644 --- a/crates/rayexec_delta/src/protocol/table.rs +++ b/crates/rayexec_delta/src/protocol/table.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use futures::{StreamExt, TryStreamExt}; use rayexec_error::{not_implemented, RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit, TimestampTypeMeta}; use rayexec_execution::arrays::field::{Field, Schema}; use rayexec_execution::arrays::scalar::decimal::{Decimal128Type, DecimalType}; @@ -182,7 +182,7 @@ pub struct TableScan { impl TableScan { /// Read the next batch. - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { loop { if self.current.is_none() { let path = match self.paths.pop_front() { diff --git a/crates/rayexec_error/src/lib.rs b/crates/rayexec_error/src/lib.rs index 3d1e6a084..dacda57d5 100644 --- a/crates/rayexec_error/src/lib.rs +++ b/crates/rayexec_error/src/lib.rs @@ -16,12 +16,10 @@ macro_rules! not_implemented { } // TODO: Implement partial eq on msg -#[derive(Debug)] pub struct RayexecError { inner: Box, } -#[derive(Debug)] struct RayexecErrorInner { /// Message for the error. pub msg: String, @@ -153,6 +151,14 @@ impl fmt::Display for RayexecError { } } +impl fmt::Debug for RayexecError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Just use the Display impl for Debug, significantly easier to read + // especially when the error contains a backtrace. + write!(f, "{self}") + } +} + impl Error for RayexecError { fn source(&self) -> Option<&(dyn Error + 'static)> { self.inner.source.as_ref().map(|e| e.as_ref() as _) diff --git a/crates/rayexec_execution/Cargo.toml b/crates/rayexec_execution/Cargo.toml index cf3426671..bb23dab95 100644 --- a/crates/rayexec_execution/Cargo.toml +++ b/crates/rayexec_execution/Cargo.toml @@ -10,6 +10,7 @@ rayexec_parser = { path = "../rayexec_parser" } # rayexec_bullet = { path = "../rayexec_bullet" } rayexec_io = { path = "../rayexec_io" } fmtutil = { path = "../fmtutil" } +stdutil = { path = "../stdutil" } # stackutil = { path = "../stackutil" } TODO: psm hash issues when compiling to wasm on macos ahash = { workspace = true } diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs new file mode 100644 index 000000000..8337bebd6 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -0,0 +1,157 @@ +use std::ops::Deref; + +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::ArrayBuffer; + +/// Abstraction layer for determining where an array's buffer resides. +#[derive(Debug)] +pub struct ArrayData { + inner: ArrayDataInner, +} + +#[derive(Debug)] +enum ArrayDataInner { + /// Array buffer is being managed and is behind a shared pointer. + Managed(B::CowPtr>, Option>), + Owned(ArrayBuffer), + Uninit, +} + +impl ArrayData +where + B: BufferManager, +{ + pub(crate) fn owned(buffer: ArrayBuffer) -> Self { + ArrayData { + inner: ArrayDataInner::Owned(buffer), + } + } + + /// Set this array data to point to a buffer that's being managed. + /// + /// If this array data was previously holding onto an owned buffer, we store + /// that so we can quickly reset back to it as needed without needing to + /// allocate an additional buffer. + pub(crate) fn set_managed(&mut self, managed: B::CowPtr>) -> Result<()> { + match std::mem::replace(&mut self.inner, ArrayDataInner::Uninit) { + ArrayDataInner::Managed(_, cached) => { + // Nothing fancy, just update the managed array. + self.inner = ArrayDataInner::Managed(managed, cached); + } + ArrayDataInner::Owned(owned) => { + // Cache our owned version so we can reset the data to a mutable + // variant as needed. + self.inner = ArrayDataInner::Managed(managed, Some(owned)) + } + ArrayDataInner::Uninit => panic!("Array data in invalid state"), + } + + Ok(()) + } + + pub fn is_managed(&self) -> bool { + matches!(self.inner, ArrayDataInner::Managed(_, _)) + } + + pub fn is_owned(&self) -> bool { + matches!(self.inner, ArrayDataInner::Owned(_)) + } + + /// Try to reset the array data for writes. + /// + /// If the buffer is already owned, nothing is done. If the buffer is + /// managed, but we have a cached owned buffer, we use the cached buffer to + /// make this `Owned`. + /// + /// Returns `Ok(())` if the reset was successful, `Err(())` otherwise. If + /// `Err(())` is returned, this remains unchanged. + pub(crate) fn try_reset_for_write(&mut self) -> Result<(), ()> { + match &mut self.inner { + ArrayDataInner::Managed(_, cached) => { + if let Some(cached) = cached.take() { + self.inner = ArrayDataInner::Owned(cached); + Ok(()) + } else { + // No cached buffer. + Err(()) + } + } + ArrayDataInner::Owned(_) => Ok(()), // Nothing to do, already writable. + ArrayDataInner::Uninit => panic!("Array data in invalid state"), + } + } + + /// Try to make the array managed by the buffer manager. + /// + /// Does nothing if the array is already managed. + /// + /// Returns an error if the array cannot be made to be managed. The array is + /// still valid (and remains in the 'owned' state). + /// + /// A cloned pointer to the newly managed array will be returned. + pub(crate) fn make_managed(&mut self, manager: &B) -> Result>> { + match &mut self.inner { + ArrayDataInner::Managed(m, _) => Ok(m.clone()), // Already managed. + ArrayDataInner::Owned(_) => { + let orig = std::mem::replace(&mut self.inner, ArrayDataInner::Uninit); + let array = match orig { + ArrayDataInner::Owned(array) => array, + _ => unreachable!("variant already checked"), + }; + + match manager.make_cow(array) { + Ok(managed) => { + self.inner = ArrayDataInner::Managed(managed, None); // Manager took ownership, nothing to cache. + match &self.inner { + ArrayDataInner::Managed(m, _) => Ok(m.clone()), + _ => unreachable!("variant just set"), + } + } + Err(orig) => { + // Manager rejected it, put it back as owned and return + // an error. + self.inner = ArrayDataInner::Owned(orig); + Err(RayexecError::new("Failed to make batch array managed")) + } + } + } + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } + + pub fn try_as_mut(&mut self) -> Result<&mut ArrayBuffer> { + match &mut self.inner { + ArrayDataInner::Managed(_, _) => Err(RayexecError::new( + "Mut references from managed arrays not yet supported", + )), + ArrayDataInner::Owned(array) => Ok(array), + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } +} + +impl AsRef> for ArrayData +where + B: BufferManager, +{ + fn as_ref(&self) -> &ArrayBuffer { + match &self.inner { + ArrayDataInner::Managed(m, _) => m.as_ref(), + ArrayDataInner::Owned(array) => array, + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } +} + +impl Deref for ArrayData +where + B: BufferManager, +{ + type Target = ArrayBuffer; + + fn deref(&self) -> &Self::Target { + ArrayData::as_ref(self) + } +} diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs new file mode 100644 index 000000000..a726359f0 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -0,0 +1,862 @@ +use half::f16; +use rayexec_error::{not_implemented, RayexecError, Result}; +use stdutil::iter::{IntoExactSizeIterator, TryFromExactSizeIterator}; + +use super::array_data::ArrayData; +use super::flat::FlatArrayView; +use super::validity::Validity; +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, + PhysicalDictionary, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; +use crate::arrays::buffer::string_view::StringViewHeap; +use crate::arrays::buffer::{ + ArrayBuffer, + DictionaryBuffer, + ListBuffer, + ListItemMetadata, + SecondaryBuffer, +}; +use crate::arrays::datatype::DataType; +use crate::arrays::scalar::decimal::{Decimal128Scalar, Decimal64Scalar}; +use crate::arrays::scalar::interval::Interval; +use crate::arrays::scalar::timestamp::TimestampScalar; +use crate::arrays::scalar::ScalarValue; + +#[derive(Debug)] +pub struct Array { + pub(crate) datatype: DataType, + pub(crate) validity: Validity, + pub(crate) data: ArrayData, +} + +impl Array +where + B: BufferManager, +{ + /// Create a new array with the given capacity. + /// + /// This will take care of initalizing the primary and secondary data + /// buffers depending on the type. + pub fn new(manager: &B, datatype: DataType, capacity: usize) -> Result { + let buffer = array_buffer_for_datatype(manager, &datatype, capacity)?; + let validity = Validity::new_all_valid(capacity); + + Ok(Array { + datatype, + validity, + data: ArrayData::owned(buffer), + }) + } + + pub fn datatype(&self) -> &DataType { + &self.datatype + } + + pub fn physical_type(&self) -> PhysicalType { + self.data().physical_type() + } + + pub fn data(&self) -> &ArrayData { + &self.data + } + + pub fn data_mut(&mut self) -> &mut ArrayData { + &mut self.data + } + + pub fn validity(&self) -> &Validity { + &self.validity + } + + pub fn validity_mut(&mut self) -> &mut Validity { + &mut self.validity + } + + pub fn put_validity(&mut self, validity: Validity) -> Result<()> { + if validity.len() != self.data().capacity() { + return Err(RayexecError::new("Invalid validity length") + .with_field("got", validity.len()) + .with_field("want", self.data.capacity())); + } + self.validity = validity; + Ok(()) + } + + pub fn capacity(&self) -> usize { + self.data.capacity() + } + + /// If this array is a dictionary array. + pub fn is_dictionary(&self) -> bool { + self.data.physical_type() == PhysicalType::Dictionary + } + + /// Return a flat array view for this array. + pub fn flat_view(&self) -> Result> { + FlatArrayView::from_array(self) + } + + /// Copy rows from self to another array. + /// + /// `mapping` provides a mapping of source indices to destination indices in + /// (source, dest) pairs. + pub fn copy_rows( + &self, + mapping: impl IntoExactSizeIterator, + dest: &mut Self, + ) -> Result<()> { + match self.datatype.physical_type() { + PhysicalType::Boolean => copy_rows::(self, mapping, dest)?, + PhysicalType::Int8 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int16 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int32 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int64 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int128 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt8 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt16 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt32 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt64 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt128 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float16 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float32 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float64 => copy_rows::(self, mapping, dest)?, + PhysicalType::Interval => copy_rows::(self, mapping, dest)?, + PhysicalType::Utf8 => copy_rows::(self, mapping, dest)?, + _ => unimplemented!(), + } + + Ok(()) + } + + /// Selects indice from the array. + /// + /// This will convert the underlying array buffer into a dictionary buffer. + pub fn select( + &mut self, + manager: &B, + selection: impl IntoExactSizeIterator, + ) -> Result<()> { + if self.is_dictionary() { + // Already dictionary, select the selection. + let sel = selection.into_iter(); + let mut new_buf = + ArrayBuffer::with_primary_capacity::(manager, sel.len())?; + + let old_sel = self.data.try_as_slice::()?; + let new_sel = new_buf.try_as_slice_mut::()?; + + for (sel_idx, sel_buf) in sel.zip(new_sel) { + let idx = old_sel[sel_idx]; + *sel_buf = idx; + } + + // Now swap the secondary buffers, the dictionary buffer will now be + // on `new_buf`. + std::mem::swap( + self.data.try_as_mut()?.get_secondary_mut(), // TODO: Should just clone the pointer if managed. + new_buf.get_secondary_mut(), + ); + + // And set the new buf, old buf gets dropped. + self.data = ArrayData::owned(new_buf); + + debug_assert!(matches!( + self.data.get_secondary(), + SecondaryBuffer::Dictionary(_) + )); + + return Ok(()); + } + + let sel = selection.into_iter(); + let mut new_buf = + ArrayBuffer::with_primary_capacity::(manager, sel.len())?; + + let new_buf_slice = new_buf.try_as_slice_mut::()?; + + // Set all selection indices in the new array buffer. + for (sel_idx, sel_buf) in sel.zip(new_buf_slice) { + *sel_buf = sel_idx + } + + // TODO: Probably verify selection all in bounds. + + // Now replace the original buffer, and put the original buffer in the + // secondary buffer. + let orig_validity = std::mem::replace( + &mut self.validity, + Validity::new_all_valid(new_buf.capacity()), + ); + let orig_buffer = std::mem::replace(&mut self.data, ArrayData::owned(new_buf)); + // TODO: Should just clone the pointer if managed. + self.data + .try_as_mut()? + .put_secondary_buffer(SecondaryBuffer::Dictionary(DictionaryBuffer { + validity: orig_validity, + buffer: orig_buffer, + })); + + debug_assert!(matches!( + self.data.get_secondary(), + SecondaryBuffer::Dictionary(_) + )); + + Ok(()) + } + + /// "Clones" some other array into this array. + /// + /// This will try to make the buffer from the other array managed to make it + /// cheaply cloneable and shared with this array. + /// + /// Array capacities and datatypes must be the same for both arrays. + pub fn clone_from(&mut self, manager: &B, other: &mut Self) -> Result<()> { + if self.datatype != other.datatype { + return Err(RayexecError::new( + "Attempted clone array from other array with different data types", + ) + .with_field("own_datatype", self.datatype.clone()) + .with_field("other_datatype", other.datatype.clone())); + } + + // TODO: Do we want this check? Dictionaries right now can have differing capacities based + // on selection inputs. + // if self.capacity() != other.capacity() { + // return Err(RayexecError::new( + // "Attempted to clone into array from other array with different capacity", + // ) + // .with_field("own_capacity", self.capacity()) + // .with_field("other_capacity", other.capacity())); + // } + + let managed = other.data.make_managed(manager)?; + self.data.set_managed(managed)?; + self.validity = other.validity.clone(); + + Ok(()) + } + + /// Resets self to prepare for writing to the array. + /// + /// This will: + /// - Reset validity to all 'valid'. + /// - Create or reuse a writeable buffer for array data. No guarantees are + /// made about the contents of the buffer. + /// + /// Bfuffer values _must_ be written for a row before attempting to read a + /// value for that row after calling this function. Underlying storage may + /// be cleared resulting in stale metadata (and thus invalid reads). + pub fn reset_for_write(&mut self, manager: &B) -> Result<()> { + self.validity = Validity::new_all_valid(self.capacity()); + + // Check if dictionary first since we want to try to get the underlying + // buffer from that. We should only have layer of "dictionary", so we + // shouldn't need to recurse. + if self.data.as_ref().physical_type() == PhysicalType::Dictionary { + let secondary = self.data.try_as_mut()?.get_secondary_mut(); + let dict = match std::mem::replace(secondary, SecondaryBuffer::None) { + SecondaryBuffer::Dictionary(dict) => dict, + other => { + return Err(RayexecError::new(format!( + "Expected dictionary secondary buffer, got {other:?}", + ))) + } + }; + + // TODO: Not sure what to do if capacities don't match. Currently + // dictionaries are only created through 'select' and the index + // buffer gets initialized to the length of the selection. + self.data = dict.buffer; + } + + if let Err(()) = self.data.try_reset_for_write() { + // Need to create a new buffer and set that. + let buffer = array_buffer_for_datatype(manager, &self.datatype, self.capacity())?; + self.data = ArrayData::owned(buffer) + } + + // Reset secondary buffers. + match self.data.try_as_mut()?.get_secondary_mut() { + SecondaryBuffer::StringViewHeap(heap) => { + heap.clear(); + // All metadata is stale. Panics may occur if attempting to read + // prior to writing new values for a row. + } + SecondaryBuffer::List(list) => { + list.entries = 0; + // Child array keeps its capacity, it'll be overwritten. List + // item metadata will become stale, but technically won't error. + } + SecondaryBuffer::Dictionary(_) => (), + SecondaryBuffer::None => (), + } + + Ok(()) + } + + pub fn get_value(&self, idx: usize) -> Result { + if idx >= self.capacity() { + return Err(RayexecError::new("Index out of bounds") + .with_field("idx", idx) + .with_field("capacity", self.capacity())); + } + + let flat = self.flat_view()?; + + if !flat.validity.is_valid(idx) { + return Ok(ScalarValue::Null); + } + + match &self.datatype { + DataType::Boolean => { + let v = PhysicalBool::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Boolean(*v)) + } + DataType::Int8 => { + let v = PhysicalI8::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int8(*v)) + } + DataType::Int16 => { + let v = PhysicalI16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int16(*v)) + } + DataType::Int32 => { + let v = PhysicalI32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int32(*v)) + } + DataType::Int64 => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int64(*v)) + } + DataType::Int128 => { + let v = PhysicalI128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int128(*v)) + } + DataType::UInt8 => { + let v = PhysicalU8::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt8(*v)) + } + DataType::UInt16 => { + let v = PhysicalU16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt16(*v)) + } + DataType::UInt32 => { + let v = PhysicalU32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt32(*v)) + } + DataType::UInt64 => { + let v = PhysicalU64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt64(*v)) + } + DataType::UInt128 => { + let v = PhysicalU128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt128(*v)) + } + DataType::Float16 => { + let v = PhysicalF16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float16(*v)) + } + DataType::Float32 => { + let v = PhysicalF32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float32(*v)) + } + DataType::Float64 => { + let v = PhysicalF64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float64(*v)) + } + DataType::Decimal64(m) => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Decimal64(Decimal64Scalar { + precision: m.precision, + scale: m.scale, + value: *v, + })) + } + DataType::Decimal128(m) => { + let v = PhysicalI128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Decimal128(Decimal128Scalar { + precision: m.precision, + scale: m.scale, + value: *v, + })) + } + DataType::Interval => { + let v = PhysicalInterval::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Interval(*v)) + } + DataType::Timestamp(m) => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Timestamp(TimestampScalar { + unit: m.unit, + value: *v, + })) + } + DataType::Utf8 => { + let addressable = PhysicalUtf8::get_addressable(flat.array_buffer)?; + // TODO: Don't allocate. Doesn't matter too much since this is + // just for constant eval right now. + let v = addressable.get(idx).unwrap().to_string(); + Ok(ScalarValue::Utf8(v.into())) + } + DataType::Binary => { + let addressable = PhysicalBinary::get_addressable(flat.array_buffer)?; + let v = addressable.get(idx).unwrap().to_vec(); + Ok(ScalarValue::Binary(v.into())) + } + + _ => not_implemented!("get value for scalar type"), + } + } + + /// Set a scalar value at a given index. + pub fn set_value(&mut self, idx: usize, val: &ScalarValue) -> Result<()> { + if idx >= self.capacity() { + return Err(RayexecError::new("Index out of bounds") + .with_field("idx", idx) + .with_field("capacity", self.capacity())); + } + + self.validity.set_valid(idx); + let data = self.data.try_as_mut()?; + + match val { + ScalarValue::Null => { + self.validity.set_invalid(idx); + } + ScalarValue::Boolean(val) => { + PhysicalBool::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int8(val) => { + PhysicalI8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int16(val) => { + PhysicalI16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int32(val) => { + PhysicalI32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int128(val) => { + PhysicalI128::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt8(val) => { + PhysicalU8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt16(val) => { + PhysicalU16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt32(val) => { + PhysicalU32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt64(val) => { + PhysicalU64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt128(val) => { + PhysicalU128::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float16(val) => { + PhysicalF16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float32(val) => { + PhysicalF32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float64(val) => { + PhysicalF64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Decimal64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Decimal128(val) => { + PhysicalI128::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Date32(val) => { + PhysicalI32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Date64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Timestamp(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Interval(val) => { + PhysicalInterval::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Utf8(val) => { + PhysicalUtf8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Binary(val) => { + PhysicalBinary::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::List(list) => { + let secondary = self.data.try_as_mut()?.get_secondary_mut().get_list_mut()?; + + // Ensure we have space to push. + let rem_cap = secondary.child.capacity() - secondary.entries; + if rem_cap < list.len() { + // TODO: Just resize secondary. + return Err(RayexecError::new( + "Secondary list buffer does not have required capacity", + ) + .with_field("remaining", rem_cap) + .with_field("need", list.len())); + } + + for (child_idx, val) in (secondary.entries..).zip(list) { + secondary.child.set_value(child_idx, val)?; + } + + // Now update entry count in child. Original value is our offset + // index. + let start_offset = secondary.entries; + secondary.entries += list.len(); + + // Set metadata pointing to new list. + PhysicalList::get_addressable_mut(self.data.try_as_mut()?)?.put( + idx, + &ListItemMetadata { + offset: start_offset as i32, + len: list.len() as i32, + }, + ); + } + ScalarValue::Struct(_) => not_implemented!("set value for struct"), + } + + Ok(()) + } +} + +/// Create a new array buffer for a datatype. +fn array_buffer_for_datatype( + manager: &B, + datatype: &DataType, + capacity: usize, +) -> Result> +where + B: BufferManager, +{ + let buffer = match datatype.physical_type() { + PhysicalType::UntypedNull => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Boolean => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int8 => ArrayBuffer::with_primary_capacity::(manager, capacity)?, + PhysicalType::Int16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int128 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt8 => ArrayBuffer::with_primary_capacity::(manager, capacity)?, + PhysicalType::UInt16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt128 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Interval => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Utf8 => { + let mut buffer = ArrayBuffer::with_primary_capacity::(manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + buffer + } + PhysicalType::List => { + let inner_type = match &datatype { + DataType::List(m) => m.datatype.as_ref().clone(), + other => { + return Err(RayexecError::new(format!( + "Expected list datatype, got {other}" + ))) + } + }; + + let child = Array::new(manager, inner_type, capacity)?; + + let mut buffer = ArrayBuffer::with_primary_capacity::(manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::List(ListBuffer::new(child))); + + buffer + } + other => not_implemented!("create array buffer for physical type {other}"), + }; + + Ok(buffer) +} + +/// Helper for copying rows. +fn copy_rows( + from: &Array, + mapping: impl IntoExactSizeIterator, + to: &mut Array, +) -> Result<()> +where + S: MutablePhysicalStorage, + B: BufferManager, +{ + let from_flat = from.flat_view()?; + let from_storage = S::get_addressable(from_flat.array_buffer)?; + + let to_data = to.data.try_as_mut()?; + let mut to_storage = S::get_addressable_mut(to_data)?; + + if from_flat.validity.all_valid() && to.validity.all_valid() { + for (from_idx, to_idx) in mapping.into_iter() { + let from_idx = from_flat.selection.get(from_idx).unwrap(); + let v = from_storage.get(from_idx).unwrap(); + to_storage.put(to_idx, v); + } + } else { + for (from_idx, to_idx) in mapping.into_iter() { + let from_idx = from_flat.selection.get(from_idx).unwrap(); + if from_flat.validity.is_valid(from_idx) { + let v = from_storage.get(from_idx).unwrap(); + to_storage.put(to_idx, v); + } else { + to.validity.set_invalid(to_idx); + } + } + } + + Ok(()) +} + +/// Implements `try_from_iter` for primitive types. +/// +/// Note these create arrays using Nop buffer manager and so really only +/// suitable for tests right now. +macro_rules! impl_primitive_from_iter { + ($prim:ty, $phys:ty, $typ_variant:ident) => { + impl TryFromExactSizeIterator<$prim> for Array { + type Error = RayexecError; + + fn try_from_iter>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + + let manager = NopBufferManager; + + let mut array = Array::new(&manager, DataType::$typ_variant, iter.len())?; + let slice = array.data.try_as_mut()?.try_as_slice_mut::<$phys>()?; + + for (dest, v) in slice.iter_mut().zip(iter) { + *dest = v; + } + + Ok(array) + } + } + }; +} + +impl_primitive_from_iter!(bool, PhysicalBool, Boolean); + +impl_primitive_from_iter!(i8, PhysicalI8, Int8); +impl_primitive_from_iter!(i16, PhysicalI16, Int16); +impl_primitive_from_iter!(i32, PhysicalI32, Int32); +impl_primitive_from_iter!(i64, PhysicalI64, Int64); +impl_primitive_from_iter!(i128, PhysicalI128, Int128); + +impl_primitive_from_iter!(u8, PhysicalU8, UInt8); +impl_primitive_from_iter!(u16, PhysicalU16, UInt16); +impl_primitive_from_iter!(u32, PhysicalU32, UInt32); +impl_primitive_from_iter!(u64, PhysicalU64, UInt64); +impl_primitive_from_iter!(u128, PhysicalU128, UInt128); + +impl_primitive_from_iter!(f16, PhysicalF16, Float16); +impl_primitive_from_iter!(f32, PhysicalF32, Float32); +impl_primitive_from_iter!(f64, PhysicalF64, Float64); + +impl_primitive_from_iter!(Interval, PhysicalInterval, Interval); + +impl<'a> TryFromExactSizeIterator<&'a str> for Array { + type Error = RayexecError; + + fn try_from_iter>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + let len = iter.len(); + + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, len)?; + buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut addressable = buffer.try_as_string_view_addressable_mut()?; + + for (idx, v) in iter.enumerate() { + addressable.put(idx, v); + } + + Ok(Array { + datatype: DataType::Utf8, + validity: Validity::new_all_valid(len), + data: ArrayData::owned(buffer), + }) + } +} + +/// From iterator implementation that creates an array from optionally valid +/// values. Some is treated as valid, None as invalid. +impl TryFromExactSizeIterator> for Array +where + V: Default, + Array: TryFromExactSizeIterator, +{ + type Error = RayexecError; + + fn try_from_iter>>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + let len = iter.len(); + + let mut validity = Validity::new_all_valid(len); + + // New iterator that just uses the default value for missing values, and + // sets the validity as appropriate. + let iter = iter.enumerate().map(|(idx, v)| { + if v.is_none() { + validity.set_invalid(idx); + } + v.unwrap_or_default() + }); + + let mut array = Self::try_from_iter(iter)?; + array.put_validity(validity)?; + + Ok(array) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn reset_after_clone_from() { + let mut a1 = Array::try_from_iter(["a", "bb", "ccc"]).unwrap(); + let mut a2 = Array::try_from_iter(["d", "ee", "fff"]).unwrap(); + + a1.clone_from(&NopBufferManager, &mut a2).unwrap(); + + let expected = Array::try_from_iter(["d", "ee", "fff"]).unwrap(); + assert_arrays_eq(&expected, &a1); + assert_arrays_eq(&expected, &a2); + + a1.reset_for_write(&NopBufferManager).unwrap(); + + // Ensure we can write to it. + let mut strings = a1 + .data_mut() + .try_as_mut() + .unwrap() + .try_as_string_view_addressable_mut() + .unwrap(); + + strings.put(0, "hello"); + strings.put(1, "world"); + strings.put(2, "goodbye"); + + let expected = Array::try_from_iter(["hello", "world", "goodbye"]).unwrap(); + assert_arrays_eq(&expected, &a1); + } + + #[test] + fn reset_resets_validity() { + let mut a = Array::try_from_iter([Some("a"), None, Some("c")]).unwrap(); + assert!(!a.validity().all_valid()); + + a.reset_for_write(&NopBufferManager).unwrap(); + assert!(a.validity().all_valid()); + } +} diff --git a/crates/rayexec_execution/src/arrays/array/flat.rs b/crates/rayexec_execution/src/arrays/array/flat.rs new file mode 100644 index 000000000..f5756de0a --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/flat.rs @@ -0,0 +1,52 @@ +use rayexec_error::{RayexecError, Result}; + +use super::exp::Array; +use super::selection::Selection; +use super::validity::Validity; +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::physical_type::PhysicalDictionary; +use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + +/// A view on top of normal arrays flattening some parts of the nested +/// structure. +#[derive(Debug)] +pub struct FlatArrayView<'a, B: BufferManager = NopBufferManager> { + pub(crate) validity: &'a Validity, + pub(crate) array_buffer: &'a ArrayBuffer, + pub(crate) selection: Selection<'a>, +} + +impl<'a, B> FlatArrayView<'a, B> +where + B: BufferManager, +{ + pub fn from_array(array: &'a Array) -> Result { + if array.is_dictionary() { + let selection = array.data.try_as_slice::()?; + let dict_buffer = match array.data.get_secondary() { + SecondaryBuffer::Dictionary(dict) => dict, + _ => { + return Err(RayexecError::new( + "Secondary buffer not a dictionary buffer", + )) + } + }; + + Ok(FlatArrayView { + validity: &dict_buffer.validity, + array_buffer: &dict_buffer.buffer, + selection: Selection::selection(selection), + }) + } else { + Ok(FlatArrayView { + validity: &array.validity, + array_buffer: &array.data, + selection: Selection::linear(array.capacity()), + }) + } + } + + pub fn logical_len(&self) -> usize { + self.selection.len() + } +} diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index 1278c58ec..882a21775 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -1,3 +1,9 @@ +pub mod array_data; +pub mod exp; +pub mod flat; +pub mod selection; +pub mod validity; + mod shared_or_owned; use std::fmt::Debug; @@ -17,26 +23,26 @@ use crate::arrays::executor::builder::{ }; use crate::arrays::executor::physical_type::{ PhysicalAny, - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalType, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalType2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Scalar, Decimal64Scalar}; use crate::arrays::scalar::interval::Interval; use crate::arrays::scalar::timestamp::TimestampScalar; @@ -59,7 +65,7 @@ pub type PhysicalValidity = SharedOrOwned; pub type LogicalSelection = SharedOrOwned; #[derive(Debug, Clone, PartialEq)] -pub struct Array { +pub struct Array2 { /// Data type of the array. pub(crate) datatype: DataType, /// Selection of rows for the array. @@ -74,10 +80,10 @@ pub struct Array { /// into account the selection vector, and always maps directly to the data. pub(crate) validity: Option, /// The physical data. - pub(crate) data: ArrayData, + pub(crate) data: ArrayData2, } -impl Array { +impl Array2 { pub fn new_untyped_null_array(len: usize) -> Self { // Note that we're adding a bitmap here even though the data already // returns NULL. This allows the executors (especially for aggregates) @@ -87,7 +93,7 @@ impl Array { let selection = SelectionVector::repeated(len, 0); let data = UntypedNullStorage(1); - Array { + Array2 { datatype: DataType::Null, selection: Some(selection.into()), validity: Some(validity.into()), @@ -99,11 +105,11 @@ impl Array { pub fn new_typed_null_array(datatype: DataType, len: usize) -> Result { // Create physical array data of length 1, and use a selection vector to // extend it out to the desired size. - let data = datatype.physical_type()?.zeroed_array_data(1); + let data = datatype.physical_type2()?.zeroed_array_data(1); let validity = Bitmap::new_with_all_false(1); let selection = SelectionVector::repeated(len, 0); - Ok(Array { + Ok(Array2 { datatype, selection: Some(selection.into()), validity: Some(validity.into()), @@ -111,8 +117,8 @@ impl Array { }) } - pub fn new_with_array_data(datatype: DataType, data: impl Into) -> Self { - Array { + pub fn new_with_array_data(datatype: DataType, data: impl Into) -> Self { + Array2 { datatype, selection: None, validity: None, @@ -123,9 +129,9 @@ impl Array { pub fn new_with_validity_and_array_data( datatype: DataType, validity: impl Into, - data: impl Into, + data: impl Into, ) -> Self { - Array { + Array2 { datatype, selection: None, validity: Some(validity.into()), @@ -137,9 +143,9 @@ impl Array { datatype: DataType, validity: impl Into, selection: impl Into, - data: impl Into, + data: impl Into, ) -> Self { - Array { + Array2 { datatype, selection: Some(selection.into()), validity: Some(validity.into()), @@ -242,20 +248,20 @@ impl Array { /// Returns the array data. /// /// ArrayData can be cheaply cloned. - pub fn array_data(&self) -> &ArrayData { + pub fn array_data(&self) -> &ArrayData2 { &self.data } - pub fn into_array_data(self) -> ArrayData { + pub fn into_array_data(self) -> ArrayData2 { self.data } /// Gets the physical type of the array. - pub fn physical_type(&self) -> PhysicalType { + pub fn physical_type(&self) -> PhysicalType2 { match self.data.physical_type() { - PhysicalType::Binary => match self.datatype { - DataType::Utf8 => PhysicalType::Utf8, - _ => PhysicalType::Binary, + PhysicalType2::Binary => match self.datatype { + DataType::Utf8 => PhysicalType2::Utf8, + _ => PhysicalType2::Binary, }, other => other, } @@ -297,13 +303,13 @@ impl Array { } match self.array_data() { - ArrayData::UntypedNull(_) => Ok(Array { + ArrayData2::UntypedNull(_) => Ok(Array2 { datatype: self.datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(self.logical_len()).into(), }), - ArrayData::Boolean(_) => UnaryExecutor::execute::( + ArrayData2::Boolean(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -311,7 +317,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Int8(_) => UnaryExecutor::execute::( + ArrayData2::Int8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -319,7 +325,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Int16(_) => UnaryExecutor::execute::( + ArrayData2::Int16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -327,7 +333,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Int32(_) => UnaryExecutor::execute::( + ArrayData2::Int32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -335,7 +341,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Int64(_) => UnaryExecutor::execute::( + ArrayData2::Int64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -343,7 +349,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Int128(_) => UnaryExecutor::execute::( + ArrayData2::Int128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -351,7 +357,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::UInt8(_) => UnaryExecutor::execute::( + ArrayData2::UInt8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -359,7 +365,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::UInt16(_) => UnaryExecutor::execute::( + ArrayData2::UInt16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -367,7 +373,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::UInt32(_) => UnaryExecutor::execute::( + ArrayData2::UInt32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -375,7 +381,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::UInt64(_) => UnaryExecutor::execute::( + ArrayData2::UInt64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -383,7 +389,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::UInt128(_) => UnaryExecutor::execute::( + ArrayData2::UInt128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -391,7 +397,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Float16(_) => UnaryExecutor::execute::( + ArrayData2::Float16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -399,7 +405,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Float32(_) => UnaryExecutor::execute::( + ArrayData2::Float32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -407,7 +413,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Float64(_) => UnaryExecutor::execute::( + ArrayData2::Float64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -415,7 +421,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Interval(_) => UnaryExecutor::execute::( + ArrayData2::Interval(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -423,7 +429,7 @@ impl Array { }, |v, buf| buf.put(&v), ), - ArrayData::Binary(_) => { + ArrayData2::Binary(_) => { // Use the german varlen storage for all output varlen arrays, // even if the input use using some other variant. // @@ -431,7 +437,7 @@ impl Array { // data while just selecting the appropriate metadata. Instead // this will just copy everything. if self.datatype().is_utf8() { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -440,7 +446,7 @@ impl Array { |v, buf| buf.put(v), ) } else { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -450,7 +456,7 @@ impl Array { ) } } - ArrayData::List(_) => Err(RayexecError::new("Cannot yet unselect list arrays")), + ArrayData2::List(_) => Err(RayexecError::new("Cannot yet unselect list arrays")), } } @@ -460,67 +466,67 @@ impl Array { pub fn physical_scalar(&self, idx: usize) -> Result { Ok(match &self.datatype { DataType::Null => match &self.data { - ArrayData::UntypedNull(_) => ScalarValue::Null, + ArrayData2::UntypedNull(_) => ScalarValue::Null, _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Boolean => match &self.data { - ArrayData::Boolean(arr) => arr.as_ref().as_ref().value(idx).into(), + ArrayData2::Boolean(arr) => arr.as_ref().as_ref().value(idx).into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float16 => match &self.data { - ArrayData::Float16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float32 => match &self.data { - ArrayData::Float32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float64 => match &self.data { - ArrayData::Float64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int8 => match &self.data { - ArrayData::Int8(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int8(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int16 => match &self.data { - ArrayData::Int16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int32 => match &self.data { - ArrayData::Int32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int64 => match &self.data { - ArrayData::Int64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int128 => match &self.data { - ArrayData::Int64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt8 => match &self.data { - ArrayData::UInt8(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt8(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt16 => match &self.data { - ArrayData::UInt16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt32 => match &self.data { - ArrayData::UInt32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt64 => match &self.data { - ArrayData::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt128 => match &self.data { - ArrayData::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Decimal64(m) => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Decimal64(Decimal64Scalar { + ArrayData2::Int64(arr) => ScalarValue::Decimal64(Decimal64Scalar { precision: m.precision, scale: m.scale, value: arr.as_ref().as_ref()[idx], @@ -528,7 +534,7 @@ impl Array { _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Decimal128(m) => match &self.data { - ArrayData::Int128(arr) => ScalarValue::Decimal128(Decimal128Scalar { + ArrayData2::Int128(arr) => ScalarValue::Decimal128(Decimal128Scalar { precision: m.precision, scale: m.scale, value: arr.as_ref().as_ref()[idx], @@ -536,33 +542,33 @@ impl Array { _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Date32 => match &self.data { - ArrayData::Int32(arr) => ScalarValue::Date32(arr.as_ref().as_ref()[idx]), + ArrayData2::Int32(arr) => ScalarValue::Date32(arr.as_ref().as_ref()[idx]), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Date64 => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Date64(arr.as_ref().as_ref()[idx]), + ArrayData2::Int64(arr) => ScalarValue::Date64(arr.as_ref().as_ref()[idx]), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Timestamp(m) => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Timestamp(TimestampScalar { + ArrayData2::Int64(arr) => ScalarValue::Timestamp(TimestampScalar { unit: m.unit, value: arr.as_ref().as_ref()[idx], }), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Interval => match &self.data { - ArrayData::Interval(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Interval(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Utf8 => { let v = match &self.data { - ArrayData::Binary(BinaryData::Binary(arr)) => arr + ArrayData2::Binary(BinaryData::Binary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::LargeBinary(arr)) => arr + ArrayData2::Binary(BinaryData::LargeBinary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::German(arr)) => arr + ArrayData2::Binary(BinaryData::German(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, _other => return Err(array_not_valid_for_type_err(&self.datatype)), @@ -572,13 +578,13 @@ impl Array { } DataType::Binary => { let v = match &self.data { - ArrayData::Binary(BinaryData::Binary(arr)) => arr + ArrayData2::Binary(BinaryData::Binary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::LargeBinary(arr)) => arr + ArrayData2::Binary(BinaryData::LargeBinary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::German(arr)) => arr + ArrayData2::Binary(BinaryData::German(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, _other => return Err(array_not_valid_for_type_err(&self.datatype)), @@ -587,7 +593,7 @@ impl Array { } DataType::Struct(_) => not_implemented!("get value: struct"), DataType::List(_) => match &self.data { - ArrayData::List(list) => { + ArrayData2::List(list) => { let meta = list .metadata .as_slice() @@ -613,118 +619,118 @@ impl Array { match scalar { ScalarValue::Null => { - UnaryExecutor::value_at::(self, row).map(|arr_val| arr_val.is_none()) + UnaryExecutor2::value_at::(self, row).map(|arr_val| arr_val.is_none()) } // None == NULL ScalarValue::Boolean(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int16(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt16(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Float32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Float64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Date32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Date64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } - ScalarValue::Interval(v) => UnaryExecutor::value_at::(self, row).map( - |arr_val| match arr_val { + ScalarValue::Interval(v) => UnaryExecutor2::value_at::(self, row) + .map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, - }, - ), + }), ScalarValue::Utf8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Binary(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val + { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Timestamp(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes time unit is the same match arr_val { Some(arr_val) => arr_val == v.value, @@ -733,7 +739,7 @@ impl Array { }) } ScalarValue::Decimal64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, @@ -742,7 +748,7 @@ impl Array { }) } ScalarValue::Decimal128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, @@ -768,7 +774,7 @@ impl Array { None => SelectionVector::with_range(offset..(offset + count)), }; - Array { + Array2 { datatype: self.datatype.clone(), selection: Some(selection.into()), validity: self.validity.clone(), @@ -781,10 +787,10 @@ fn array_not_valid_for_type_err(datatype: &DataType) -> RayexecError { RayexecError::new(format!("Array data not valid for data type: {datatype}")) } -impl FromIterator> for Array +impl FromIterator> for Array2 where F: Default, - Array: FromIterator, + Array2: FromIterator, { fn from_iter>>(iter: T) -> Self { // TODO: Make a bit more performant, this is used for more than just @@ -803,14 +809,14 @@ where } } - let mut array = Array::from_iter(new_vals); + let mut array = Array2::from_iter(new_vals); array.validity = Some(validity.into()); array } } -impl FromIterator for Array { +impl FromIterator for Array2 { fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let (lower, _) = iter.size_hint(); @@ -820,16 +826,16 @@ impl FromIterator for Array { german.try_push(s.as_bytes()).unwrap(); } - Array { + Array2 { datatype: DataType::Utf8, selection: None, validity: None, - data: ArrayData::Binary(BinaryData::German(Arc::new(german))), + data: ArrayData2::Binary(BinaryData::German(Arc::new(german))), } } } -impl<'a> FromIterator<&'a str> for Array { +impl<'a> FromIterator<&'a str> for Array2 { fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let (lower, _) = iter.size_hint(); @@ -839,25 +845,25 @@ impl<'a> FromIterator<&'a str> for Array { german.try_push(s.as_bytes()).unwrap(); } - Array { + Array2 { datatype: DataType::Utf8, selection: None, validity: None, - data: ArrayData::Binary(BinaryData::German(Arc::new(german))), + data: ArrayData2::Binary(BinaryData::German(Arc::new(german))), } } } macro_rules! impl_primitive_from_iter { ($prim:ty, $variant:ident) => { - impl FromIterator<$prim> for Array { + impl FromIterator<$prim> for Array2 { fn from_iter>(iter: T) -> Self { let vals: Vec<_> = iter.into_iter().collect(); - Array { + Array2 { datatype: DataType::$variant, selection: None, validity: None, - data: ArrayData::$variant(Arc::new(vals.into())), + data: ArrayData2::$variant(Arc::new(vals.into())), } } } @@ -878,20 +884,20 @@ impl_primitive_from_iter!(f16, Float16); impl_primitive_from_iter!(f32, Float32); impl_primitive_from_iter!(f64, Float64); -impl FromIterator for Array { +impl FromIterator for Array2 { fn from_iter>(iter: T) -> Self { let vals: Bitmap = iter.into_iter().collect(); - Array { + Array2 { datatype: DataType::Boolean, selection: None, validity: None, - data: ArrayData::Boolean(Arc::new(vals.into())), + data: ArrayData2::Boolean(Arc::new(vals.into())), } } } #[derive(Debug, Clone, PartialEq)] -pub enum ArrayData { +pub enum ArrayData2 { UntypedNull(UntypedNullStorage), Boolean(Arc), Float16(Arc>), @@ -912,27 +918,27 @@ pub enum ArrayData { List(Arc), } -impl ArrayData { - pub fn physical_type(&self) -> PhysicalType { +impl ArrayData2 { + pub fn physical_type(&self) -> PhysicalType2 { match self { - Self::UntypedNull(_) => PhysicalType::UntypedNull, - Self::Boolean(_) => PhysicalType::Boolean, - Self::Float16(_) => PhysicalType::Float16, - Self::Float32(_) => PhysicalType::Float32, - Self::Float64(_) => PhysicalType::Float64, - Self::Int8(_) => PhysicalType::Int8, - Self::Int16(_) => PhysicalType::Int16, - Self::Int32(_) => PhysicalType::Int32, - Self::Int64(_) => PhysicalType::Int64, - Self::Int128(_) => PhysicalType::Int128, - Self::UInt8(_) => PhysicalType::UInt8, - Self::UInt16(_) => PhysicalType::UInt16, - Self::UInt32(_) => PhysicalType::UInt32, - Self::UInt64(_) => PhysicalType::UInt64, - Self::UInt128(_) => PhysicalType::UInt128, - Self::Interval(_) => PhysicalType::Interval, - Self::Binary(_) => PhysicalType::Binary, - Self::List(_) => PhysicalType::List, + Self::UntypedNull(_) => PhysicalType2::UntypedNull, + Self::Boolean(_) => PhysicalType2::Boolean, + Self::Float16(_) => PhysicalType2::Float16, + Self::Float32(_) => PhysicalType2::Float32, + Self::Float64(_) => PhysicalType2::Float64, + Self::Int8(_) => PhysicalType2::Int8, + Self::Int16(_) => PhysicalType2::Int16, + Self::Int32(_) => PhysicalType2::Int32, + Self::Int64(_) => PhysicalType2::Int64, + Self::Int128(_) => PhysicalType2::Int128, + Self::UInt8(_) => PhysicalType2::UInt8, + Self::UInt16(_) => PhysicalType2::UInt16, + Self::UInt32(_) => PhysicalType2::UInt32, + Self::UInt64(_) => PhysicalType2::UInt64, + Self::UInt128(_) => PhysicalType2::UInt128, + Self::Interval(_) => PhysicalType2::Interval, + Self::Binary(_) => PhysicalType2::Binary, + Self::List(_) => PhysicalType2::List, } } @@ -959,7 +965,7 @@ impl ArrayData { BinaryData::LargeBinary(s) => s.len(), BinaryData::German(s) => s.len(), }, - ArrayData::List(s) => s.len(), + ArrayData2::List(s) => s.len(), } } @@ -988,111 +994,111 @@ impl BinaryData { } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: UntypedNullStorage) -> Self { - ArrayData::UntypedNull(value) + ArrayData2::UntypedNull(value) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: BooleanStorage) -> Self { - ArrayData::Boolean(value.into()) + ArrayData2::Boolean(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float16(value.into()) + ArrayData2::Float16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float32(value.into()) + ArrayData2::Float32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float64(value.into()) + ArrayData2::Float64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int8(value.into()) + ArrayData2::Int8(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int16(value.into()) + ArrayData2::Int16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int32(value.into()) + ArrayData2::Int32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int64(value.into()) + ArrayData2::Int64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int128(value.into()) + ArrayData2::Int128(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt8(value.into()) + ArrayData2::UInt8(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt16(value.into()) + ArrayData2::UInt16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt32(value.into()) + ArrayData2::UInt32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt64(value.into()) + ArrayData2::UInt64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt128(value.into()) + ArrayData2::UInt128(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Interval(value.into()) + ArrayData2::Interval(value.into()) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: GermanVarlenStorage) -> Self { - ArrayData::Binary(BinaryData::German(Arc::new(value))) + ArrayData2::Binary(BinaryData::German(Arc::new(value))) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: ListStorage) -> Self { - ArrayData::List(Arc::new(value)) + ArrayData2::List(Arc::new(value)) } } @@ -1103,7 +1109,7 @@ mod tests { #[test] fn select_mut_no_change() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::with_range(0..3); arr.select_mut(selection); @@ -1115,7 +1121,7 @@ mod tests { #[test] fn select_mut_prune_rows() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 2]); arr.select_mut(selection); @@ -1127,7 +1133,7 @@ mod tests { #[test] fn select_mut_expand_rows() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 1, 1, 2]); arr.select_mut(selection); @@ -1141,7 +1147,7 @@ mod tests { #[test] fn select_mut_existing_selection() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 2]); // => ["a", "c"] @@ -1158,7 +1164,7 @@ mod tests { #[test] fn scalar_value_logical_eq_i32() { - let arr = Array::from_iter([1, 2, 3]); + let arr = Array2::from_iter([1, 2, 3]); let scalar = ScalarValue::Int32(2); assert!(!arr.scalar_value_logically_eq(&scalar, 0).unwrap()); @@ -1167,7 +1173,7 @@ mod tests { #[test] fn scalar_value_logical_eq_null() { - let arr = Array::from_iter([Some(1), None, Some(3)]); + let arr = Array2::from_iter([Some(1), None, Some(3)]); let scalar = ScalarValue::Null; assert!(!arr.scalar_value_logically_eq(&scalar, 0).unwrap()); diff --git a/crates/rayexec_execution/src/arrays/array/selection.rs b/crates/rayexec_execution/src/arrays/array/selection.rs new file mode 100644 index 000000000..fe40a5588 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/selection.rs @@ -0,0 +1,110 @@ +#[derive(Debug, Clone, Copy)] +pub enum Selection<'a> { + /// Constant selection. + /// + /// All indices point to the same location. + Constant { len: usize, loc: usize }, + /// Represents a linear selection. + /// + /// '0..len' + Linear { len: usize }, + /// Represents the true location to use for some index. + Selection(&'a [usize]), +} + +impl<'a> Selection<'a> { + pub fn constant(len: usize, loc: usize) -> Self { + Self::Constant { len, loc } + } + + pub fn linear(len: usize) -> Self { + Self::Linear { len } + } + + pub fn selection(sel: &'a [usize]) -> Self { + Self::Selection(sel) + } + + pub fn is_linear(&self) -> bool { + matches!(self, Selection::Linear { .. }) + } + + pub fn iter(&self) -> FlatSelectionIter { + FlatSelectionIter { idx: 0, sel: *self } + } + + pub fn len(&self) -> usize { + match self { + Self::Constant { len, .. } => *len, + Self::Linear { len } => *len, + Self::Selection(sel) => sel.len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] + pub fn get(&self, idx: usize) -> Option { + match self { + Self::Constant { len, loc } => { + if idx >= *len { + None + } else { + Some(*loc) + } + } + Self::Linear { len } => { + if idx >= *len { + None + } else { + Some(idx) + } + } + Self::Selection(sel) => sel.get(idx).copied(), + } + } +} + +impl<'a> IntoIterator for Selection<'a> { + type Item = usize; + type IntoIter = FlatSelectionIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + FlatSelectionIter { idx: 0, sel: self } + } +} + +#[derive(Debug)] +pub struct FlatSelectionIter<'a> { + idx: usize, + sel: Selection<'a>, +} + +impl Iterator for FlatSelectionIter<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + if self.idx >= self.sel.len() { + return None; + } + + let v = match self.sel { + Selection::Constant { loc, .. } => loc, + Selection::Linear { .. } => self.idx, + Selection::Selection(sel) => sel[self.idx], + }; + + self.idx += 1; + + Some(v) + } + + fn size_hint(&self) -> (usize, Option) { + let rem = self.sel.len() - self.idx; + (rem, Some(rem)) + } +} + +impl ExactSizeIterator for FlatSelectionIter<'_> {} diff --git a/crates/rayexec_execution/src/arrays/array/validity.rs b/crates/rayexec_execution/src/arrays/array/validity.rs new file mode 100644 index 000000000..beaa15ff9 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/validity.rs @@ -0,0 +1,109 @@ +use crate::arrays::bitmap::Bitmap; + +#[derive(Debug, Clone)] +pub struct Validity { + inner: ValidityInner, +} + +#[derive(Debug, Clone)] +enum ValidityInner { + /// No mask has been set, assume all entries valid. + AllValid { len: usize }, + /// All entries invalid. + AllInvalid { len: usize }, + /// Mask has been set. Bitmap indicates which entries are valid or invalid. + Mask { bitmap: Bitmap }, +} + +impl Validity { + pub fn new_all_valid(len: usize) -> Self { + Validity { + inner: ValidityInner::AllValid { len }, + } + } + + pub fn new_all_invalid(len: usize) -> Self { + Validity { + inner: ValidityInner::AllInvalid { len }, + } + } + + pub fn len(&self) -> usize { + match &self.inner { + ValidityInner::AllValid { len } => *len, + ValidityInner::AllInvalid { len } => *len, + ValidityInner::Mask { bitmap } => bitmap.len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn all_valid(&self) -> bool { + match &self.inner { + ValidityInner::AllValid { .. } => true, + ValidityInner::AllInvalid { .. } => false, + ValidityInner::Mask { bitmap } => bitmap.is_all_true(), + } + } + + pub fn is_valid(&self, idx: usize) -> bool { + match &self.inner { + ValidityInner::AllValid { .. } => true, + ValidityInner::AllInvalid { .. } => false, + ValidityInner::Mask { bitmap } => bitmap.value(idx), + } + } + + pub fn set_valid(&mut self, idx: usize) { + match &mut self.inner { + ValidityInner::AllValid { .. } => (), // Already valid, + ValidityInner::AllInvalid { len } => { + let mut bitmap = Bitmap::new_with_all_false(*len); + bitmap.set_unchecked(idx, true); + self.inner = ValidityInner::Mask { bitmap } + } + ValidityInner::Mask { bitmap } => bitmap.set_unchecked(idx, true), + } + } + + pub fn set_invalid(&mut self, idx: usize) { + match &mut self.inner { + ValidityInner::AllValid { len } => { + let mut bitmap = Bitmap::new_with_all_true(*len); + bitmap.set_unchecked(idx, false); + self.inner = ValidityInner::Mask { bitmap } + } + ValidityInner::AllInvalid { .. } => (), // Nothing to do, already invalid. + ValidityInner::Mask { bitmap } => bitmap.set_unchecked(idx, false), + } + } + + pub fn iter(&self) -> ValidityIter { + ValidityIter { + idx: 0, + validity: self, + } + } +} + +#[derive(Debug)] +pub struct ValidityIter<'a> { + idx: usize, + validity: &'a Validity, +} + +impl Iterator for ValidityIter<'_> { + type Item = bool; + + fn next(&mut self) -> Option { + if self.idx >= self.validity.len() { + return None; + } + + let val = self.validity.is_valid(self.idx); + self.idx += 1; + Some(val) + } +} diff --git a/crates/rayexec_execution/src/arrays/batch.rs b/crates/rayexec_execution/src/arrays/batch.rs index 61e6b2628..413db2822 100644 --- a/crates/rayexec_execution/src/arrays/batch.rs +++ b/crates/rayexec_execution/src/arrays/batch.rs @@ -2,32 +2,32 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::scalar::concat_with_exact_total_len; use crate::arrays::row::ScalarRow; use crate::arrays::selection::SelectionVector; /// A batch of same-length arrays. #[derive(Debug, Clone, PartialEq)] -pub struct Batch { +pub struct Batch2 { /// Columns that make up this batch. - cols: Vec, + cols: Vec, /// Number of rows in this batch. Needed to allow for a batch that has no /// columns but a non-zero number of rows. num_rows: usize, } -impl Batch { +impl Batch2 { pub const fn empty() -> Self { - Batch { + Batch2 { cols: Vec::new(), num_rows: 0, } } pub fn empty_with_num_rows(num_rows: usize) -> Self { - Batch { + Batch2 { cols: Vec::new(), num_rows, } @@ -36,7 +36,7 @@ impl Batch { /// Concat multiple batches into one. /// /// Batches are requried to have the same logical schemas. - pub fn concat(batches: &[Batch]) -> Result { + pub fn concat(batches: &[Batch2]) -> Result { let num_cols = match batches.first() { Some(batch) => batch.num_columns(), None => return Err(RayexecError::new("Cannot concat zero batches")), @@ -57,7 +57,7 @@ impl Batch { // Special case for zero col batches. The true number of rows wouldn't // be reflected if we just attempted to concat no array. if num_cols == 0 { - return Ok(Batch::empty_with_num_rows(num_rows)); + return Ok(Batch2::empty_with_num_rows(num_rows)); } let mut output_cols = Vec::with_capacity(num_cols); @@ -74,13 +74,13 @@ impl Batch { working_arrays.clear(); } - Batch::try_new(output_cols) + Batch2::try_new(output_cols) } /// Create a new batch from some number of arrays. /// /// All arrays should have the same logical length. - pub fn try_new(cols: impl IntoIterator) -> Result { + pub fn try_new(cols: impl IntoIterator) -> Result { let cols: Vec<_> = cols.into_iter().collect(); let len = match cols.first() { Some(arr) => arr.logical_len(), @@ -96,7 +96,7 @@ impl Batch { } } - Ok(Batch { + Ok(Batch2 { cols, num_rows: len, }) @@ -106,7 +106,7 @@ impl Batch { pub fn project(&self, indices: &[usize]) -> Self { let cols = indices.iter().map(|idx| self.cols[*idx].clone()).collect(); - Batch { + Batch2 { cols, num_rows: self.num_rows, } @@ -114,7 +114,7 @@ impl Batch { pub fn slice(&self, offset: usize, count: usize) -> Self { let cols = self.cols.iter().map(|c| c.slice(offset, count)).collect(); - Batch { + Batch2 { cols, num_rows: count, } @@ -124,7 +124,7 @@ impl Batch { /// /// This accepts an Arc selection as it'll be cloned for each array in the /// batch. - pub fn select(&self, selection: Arc) -> Batch { + pub fn select(&self, selection: Arc) -> Batch2 { let cols = self .cols .iter() @@ -135,7 +135,7 @@ impl Batch { }) .collect(); - Batch { + Batch2 { cols, num_rows: selection.as_ref().num_rows(), } @@ -158,15 +158,15 @@ impl Batch { Some(ScalarRow::from_iter(row)) } - pub fn column(&self, idx: usize) -> Option<&Array> { + pub fn column(&self, idx: usize) -> Option<&Array2> { self.cols.get(idx) } - pub fn columns(&self) -> &[Array] { + pub fn columns(&self) -> &[Array2] { &self.cols } - pub fn columns_mut(&mut self) -> &mut [Array] { + pub fn columns_mut(&mut self) -> &mut [Array2] { &mut self.cols } @@ -178,7 +178,7 @@ impl Batch { self.num_rows } - pub fn into_arrays(self) -> Vec { + pub fn into_arrays(self) -> Vec { self.cols } } diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs new file mode 100644 index 000000000..39a5becad --- /dev/null +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -0,0 +1,183 @@ +use rayexec_error::{RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; + +use super::array::exp::Array; +use super::array::selection::Selection; +use super::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use super::datatype::DataType; + +#[derive(Debug)] +pub struct Batch { + /// Arrays making up the batch. + /// + /// All arrays must have the same capacity (underlying length). + pub(crate) arrays: Vec>, + /// Number of logical rows in the batch. + /// + /// Equal to or less than capacity when batch contains at least one array. + /// If the batch contains no arrays, number of rows can be arbitarily set. + /// + /// This allows "resizing" batches without needed to resize the underlying + /// arrays, allowing for buffer reuse. + pub(crate) num_rows: usize, + /// Capacity (in number of rows) of the batch. + /// + /// This should match the capacity of the arrays. If there are zero arrays + /// in the batch, this should be zero. + pub(crate) capacity: usize, +} + +impl Batch +where + B: BufferManager, +{ + /// Create an empty batch with zero rows. + pub const fn empty() -> Self { + Self::empty_with_num_rows(0) + } + + /// Create an empty batch with some number of rows. + pub const fn empty_with_num_rows(num_rows: usize) -> Self { + Batch { + arrays: Vec::new(), + num_rows, + capacity: 0, + } + } + + /// Create a batch by initializing arrays for the given datatypes. + /// + /// Each array will be initialized to hold `capacity` rows. + pub fn new( + manager: &B, + datatypes: impl IntoExactSizeIterator, + capacity: usize, + ) -> Result { + let datatypes = datatypes.into_iter(); + let mut arrays = Vec::with_capacity(datatypes.len()); + + for datatype in datatypes { + let array = Array::new(manager, datatype, capacity)?; + arrays.push(array) + } + + Ok(Batch { + arrays, + num_rows: 0, + capacity, + }) + } + + /// Create a new batch from some number of arrays. + /// + /// All arrays are expected to have the same capacity. + /// + /// `row_eq_cap` indicates if the logical cardinality of the batch should + /// equal the capacity of the arrays. If false, the logical cardinality will + /// be set to zero. + pub(crate) fn try_from_arrays( + arrays: impl IntoIterator>, + rows_eq_cap: bool, + ) -> Result { + let arrays: Vec<_> = arrays.into_iter().collect(); + let capacity = match arrays.first() { + Some(arr) => arr.capacity(), + None => { + return Ok(Batch { + arrays: Vec::new(), + num_rows: 0, + capacity: 0, + }) + } + }; + + for array in &arrays { + if array.capacity() != capacity { + return Err(RayexecError::new( + "Attempted to create batch from arrays with different capacities", + ) + .with_field("expected", capacity) + .with_field("got", array.capacity())); + } + } + + Ok(Batch { + arrays, + num_rows: if rows_eq_cap { capacity } else { 0 }, + capacity, + }) + } + + /// Clones `other` into self. + pub fn clone_from(&mut self, manager: &B, other: &mut Self) -> Result<()> { + if self.arrays.len() != other.arrays.len() { + return Err(RayexecError::new( + "Attempted to clone from other batch with different number of arrays", + )); + } + + for (a, b) in self.arrays.iter_mut().zip(other.arrays.iter_mut()) { + a.clone_from(manager, b)?; + } + + self.set_num_rows(other.num_rows())?; + + Ok(()) + } + + /// Selects rows from the batch based on `selection`. + pub fn select(&mut self, manager: &B, selection: &[usize]) -> Result<()> { + for arr in &mut self.arrays { + arr.select(manager, selection.iter().copied())?; + } + + self.set_num_rows(selection.len())?; + + Ok(()) + } + + pub fn num_rows(&self) -> usize { + self.num_rows + } + + pub fn set_num_rows(&mut self, rows: usize) -> Result<()> { + if rows > self.capacity { + return Err(RayexecError::new("Number of rows exceeds capacity") + .with_field("capacity", self.capacity) + .with_field("requested_num_rows", rows)); + } + self.num_rows = rows; + + Ok(()) + } + + /// Returns a selection that selects rows [0, num_rows). + pub fn selection<'a>(&self) -> Selection<'a> { + Selection::Linear { len: self.num_rows } + } + + pub fn arrays(&self) -> &[Array] { + &self.arrays + } + + pub fn arrays_mut(&mut self) -> &mut [Array] { + &mut self.arrays + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + + #[test] + fn from_arrays_all_same_len() { + let a = Array::try_from_iter([3, 4, 5]).unwrap(); + let b = Array::try_from_iter(["a", "b", "c"]).unwrap(); + + let batch = Batch::try_from_arrays([a, b], true).unwrap(); + + assert_eq!(3, batch.selection().len()); + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/any.rs b/crates/rayexec_execution/src/arrays/buffer/any.rs new file mode 100644 index 000000000..be7b4b39d --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/any.rs @@ -0,0 +1,35 @@ +use super::buffer_manager::BufferManager; +use super::physical_type::Addressable; +use super::ArrayBuffer; + +/// Representation of the existence of a value. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AnyValue; + +/// Wrapper around an array buffer for providing `AddressableStorage` +/// functionality for any array buffer type. +/// +/// This is used when the values themselves don't matter, only that they exist. +#[derive(Debug)] +pub struct AnyAddressable<'a, B: BufferManager> { + pub(crate) buffer: &'a ArrayBuffer, +} + +impl Addressable for AnyAddressable<'_, B> +where + B: BufferManager, +{ + type T = AnyValue; + + fn len(&self) -> usize { + self.buffer.capacity() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + if idx < self.buffer.capacity() { + Some(&AnyValue) + } else { + None + } + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs new file mode 100644 index 000000000..cdbb0651f --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs @@ -0,0 +1,51 @@ +use std::fmt::Debug; +use std::ops::Deref; +use std::sync::Arc; + +use rayexec_error::Result; + +pub trait BufferManager: Debug + Sync + Send + Clone { + type Reservation: Debug; + // TODO: T => Spillable or something. + type CowPtr: CowPtr + where + T: Debug; + + fn reserve_external(&self, additional_bytes: usize) -> Result; + + fn make_cow(&self, item: T) -> Result, T>; +} + +// TODO: Probably rename, I don't think we want the 'cow' logic on this. Instead +// that'll probably be on ArrayData. +pub trait CowPtr: Debug + Clone + AsRef + Deref { + // TODO: Clone on write. + // + // Will need to be able to get the underlying reservation in order to track + // appropriately. + // + // Also might need to recurse to make sure everything is writable, not sure + // yet. +} + +impl CowPtr for Arc where T: Debug {} + +/// Placeholder buffer manager. +#[derive(Debug, Clone)] +pub struct NopBufferManager; + +impl BufferManager for NopBufferManager { + type Reservation = (); + type CowPtr + = Arc + where + T: Debug; + + fn reserve_external(&self, _additional_bytes: usize) -> Result { + Ok(()) + } + + fn make_cow(&self, item: T) -> Result, T> { + Ok(Arc::new(item)) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs new file mode 100644 index 000000000..36b473300 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -0,0 +1,344 @@ +pub mod any; +pub mod buffer_manager; +pub mod physical_type; +pub mod string_view; + +mod raw; + +use any::AnyAddressable; +use buffer_manager::{BufferManager, NopBufferManager}; +use fmtutil::IntoDisplayableSlice; +use physical_type::{PhysicalStorage, PhysicalType}; +use raw::RawBufferParts; +use rayexec_error::{RayexecError, Result}; +use string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, + StringViewAddressable, + StringViewAddressableMut, + StringViewHeap, + StringViewMetadataUnion, +}; + +use super::array::array_data::ArrayData; +use super::array::exp::Array; +use super::array::validity::Validity; + +/// Buffer for arrays. +/// +/// Buffers are able to hold a fixed number of elements in the primary buffer. +/// Some types make use of secondary buffers for additional data. In such cases, +/// the primary buffer may hold things like metadata or offsets depending on the +/// type. +#[derive(Debug)] +pub struct ArrayBuffer { + /// Physical type of the buffer. + physical_type: PhysicalType, + /// The primary data buffer. + /// + /// For primitive buffers, this will just contain the primitives themselves. + /// Other buffers like string buffers will store the metadata here. + primary: RawBufferParts, + /// Secondary buffer if needed for the buffer type. + secondary: Box>, +} + +impl ArrayBuffer +where + B: BufferManager, +{ + /// Create an array buffer with the given capacity for the primary data + /// buffer. + /// + /// The secondary buffer will be initialized to None. + pub(crate) fn with_primary_capacity( + manager: &B, + capacity: usize, + ) -> Result { + let primary = RawBufferParts::try_new::(manager, capacity)?; + + Ok(ArrayBuffer { + physical_type: S::PHYSICAL_TYPE, + primary, + secondary: Box::new(SecondaryBuffer::None), + }) + } + + pub(crate) fn put_secondary_buffer(&mut self, secondary: SecondaryBuffer) { + self.secondary = Box::new(secondary) + } + + pub fn capacity(&self) -> usize { + self.primary.len + } + + pub fn physical_type(&self) -> PhysicalType { + self.physical_type + } + + pub fn try_as_slice(&self) -> Result<&[S::PrimaryBufferType]> { + self.check_type(S::PHYSICAL_TYPE)?; + let slice = unsafe { self.primary.as_slice::() }; + + Ok(slice) + } + + pub fn try_as_slice_mut(&mut self) -> Result<&mut [S::PrimaryBufferType]> { + self.check_type(S::PHYSICAL_TYPE)?; + let slice = unsafe { self.primary.as_slice_mut::() }; + + Ok(slice) + } + + pub fn get_secondary(&self) -> &SecondaryBuffer { + &self.secondary + } + + pub fn get_secondary_mut(&mut self) -> &mut SecondaryBuffer { + &mut self.secondary + } + + pub fn as_any_addressable(&self) -> AnyAddressable { + AnyAddressable { buffer: self } + } + + pub fn try_as_string_view_addressable(&self) -> Result { + self.check_type(PhysicalType::Utf8)?; + + let metadata = unsafe { self.primary.as_slice::() }; + let heap = match self.secondary.as_ref() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(StringViewAddressable { metadata, heap }) + } + + pub fn try_as_string_view_addressable_mut(&mut self) -> Result { + self.check_type(PhysicalType::Utf8)?; + + let metadata = unsafe { self.primary.as_slice_mut::() }; + let heap = match self.secondary.as_mut() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(StringViewAddressableMut { metadata, heap }) + } + + pub fn try_as_binary_view_addressable(&self) -> Result { + self.check_type_one_of(&[PhysicalType::Utf8, PhysicalType::Binary])?; + + let metadata = unsafe { self.primary.as_slice::() }; + let heap = match self.secondary.as_ref() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressable { metadata, heap }) + } + + pub fn try_as_binary_view_addressable_mut(&mut self) -> Result { + // Note that unlike the non-mut version of this function, we only allow + // physical binary types here. For reads, treating strings as binary is + // completely fine, but allowing writing raw binary to a logical string + // array could lead to invalid utf8. + self.check_type(PhysicalType::Binary)?; + + let metadata = unsafe { self.primary.as_slice_mut::() }; + let heap = match self.secondary.as_mut() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressableMut { metadata, heap }) + } + + /// Resize the primary buffer to be able to hold `capacity` elements. + pub fn resize_primary( + &mut self, + manager: &B, + capacity: usize, + ) -> Result<()> { + self.check_type(S::PHYSICAL_TYPE)?; + + unsafe { + self.primary + .resize::(manager, capacity) + } + } + + /// Ensure the primary buffer can hold `capacity` elements. + /// + /// Does nothing if the primary buffer already has enough capacity. + pub fn reserve_primary( + &mut self, + manager: &B, + capacity: usize, + ) -> Result<()> { + self.check_type(S::PHYSICAL_TYPE)?; + + if self.capacity() >= capacity { + return Ok(()); + } + + self.resize_primary::(manager, capacity) + } + + /// Checks that the physical type of this buffer matches `want`. + fn check_type(&self, want: PhysicalType) -> Result<()> { + if want != self.physical_type { + return Err(RayexecError::new("Physical types don't match") + .with_field("have", self.physical_type) + .with_field("want", want)); + } + + Ok(()) + } + + fn check_type_one_of(&self, oneof: &[PhysicalType]) -> Result<()> { + if !oneof.contains(&self.physical_type) { + return Err( + RayexecError::new("Physical type not one of requested types") + .with_field("have", self.physical_type) + .with_field("oneof", oneof.display_as_list().to_string()), + ); + } + + Ok(()) + } +} + +impl Drop for ArrayBuffer { + fn drop(&mut self) { + let ptr = self.primary.ptr; + + let len = self.primary.len * self.physical_type.primary_buffer_mem_size(); + let cap = self.primary.cap * self.physical_type.primary_buffer_mem_size(); + + let vec = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + std::mem::drop(vec); + + // self.primary.reservation.free() + } +} + +#[derive(Debug)] +pub enum SecondaryBuffer { + StringViewHeap(StringViewHeap), + Dictionary(DictionaryBuffer), + List(ListBuffer), + None, +} + +impl SecondaryBuffer +where + B: BufferManager, +{ + pub fn get_list(&self) -> Result<&ListBuffer> { + match self { + Self::List(l) => Ok(l), + _ => Err(RayexecError::new("Expected list buffer")), + } + } + + pub fn get_list_mut(&mut self) -> Result<&mut ListBuffer> { + match self { + Self::List(l) => Ok(l), + _ => Err(RayexecError::new("Expected list buffer")), + } + } +} + +#[derive(Debug)] +pub struct DictionaryBuffer { + pub(crate) validity: Validity, + pub(crate) buffer: ArrayData, +} + +impl DictionaryBuffer +where + B: BufferManager, +{ + pub fn new(buffer: ArrayData, validity: Validity) -> Self { + debug_assert_eq!(buffer.capacity(), validity.len()); + DictionaryBuffer { buffer, validity } + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct ListItemMetadata { + pub offset: i32, + pub len: i32, +} + +#[derive(Debug)] +pub struct ListBuffer { + /// Number of "filled" entries in the child array. + /// + /// This differs from the child's capacity as we need to be able + /// incrementally push back values. + /// + /// This is only looked at when writing values to the child array. Reads can + /// ignore this as all required info is in the entry metadata. + pub(crate) entries: usize, + pub(crate) child: Array, +} + +impl ListBuffer +where + B: BufferManager, +{ + pub fn new(child: Array) -> Self { + ListBuffer { entries: 0, child } + } +} + +#[cfg(test)] +mod tests { + use physical_type::PhysicalI32; + + use super::*; + + #[test] + fn resize_primitive_increase_size() { + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 4).unwrap(); + + let s = buffer.try_as_slice::().unwrap(); + assert_eq!(4, s.len()); + + buffer + .resize_primary::(&NopBufferManager, 8) + .unwrap(); + + let s = buffer.try_as_slice_mut::().unwrap(); + assert_eq!(8, s.len()); + + // Sanity check, make sure we can write to it. + s.iter_mut().for_each(|v| *v = 12); + + assert_eq!(vec![12; 8].as_slice(), s); + } + + #[test] + fn resize_primitive_decrease_size() { + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 4).unwrap(); + + let s = buffer.try_as_slice::().unwrap(); + assert_eq!(4, s.len()); + + buffer + .resize_primary::(&NopBufferManager, 2) + .unwrap(); + + let s = buffer.try_as_slice_mut::().unwrap(); + assert_eq!(2, s.len()); + + // Sanity check, make sure we can write to it. + s.iter_mut().for_each(|v| *v = 12); + + assert_eq!(vec![12; 2].as_slice(), s); + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs new file mode 100644 index 000000000..8b97c9758 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -0,0 +1,380 @@ +use std::fmt::{self, Debug}; + +use half::f16; +use rayexec_error::Result; + +use super::buffer_manager::BufferManager; +use super::string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, + StringViewAddressable, + StringViewAddressableMut, + StringViewMetadataUnion, +}; +use super::{ArrayBuffer, ListItemMetadata}; +use crate::arrays::scalar::interval::Interval; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PhysicalType { + UntypedNull, + Boolean, + Int8, + Int16, + Int32, + Int64, + Int128, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Float16, + Float32, + Float64, + Interval, + Binary, + Utf8, + List, + Struct, + Dictionary, +} + +impl PhysicalType { + pub const fn primary_buffer_mem_size(&self) -> usize { + match self { + Self::UntypedNull => PhysicalUntypedNull::PRIMARY_BUFFER_TYPE_SIZE, + Self::Boolean => PhysicalBool::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int8 => PhysicalI8::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int16 => PhysicalI16::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int32 => PhysicalI32::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int64 => PhysicalI64::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int128 => PhysicalI128::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt8 => PhysicalU8::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt16 => PhysicalU16::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt32 => PhysicalU32::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt64 => PhysicalU64::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt128 => PhysicalU128::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float16 => PhysicalF16::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float32 => PhysicalF32::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float64 => PhysicalF64::PRIMARY_BUFFER_TYPE_SIZE, + Self::Interval => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + Self::Utf8 => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + Self::List => PhysicalList::PRIMARY_BUFFER_TYPE_SIZE, + Self::Dictionary => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + + _ => unimplemented!(), + } + } + + pub const fn as_str(&self) -> &'static str { + match self { + Self::UntypedNull => "UntypedNull", + Self::Boolean => "Boolean", + Self::Int8 => "Int8", + Self::Int16 => "Int16", + Self::Int32 => "Int32", + Self::Int64 => "Int64", + Self::Int128 => "Int128", + Self::UInt8 => "UInt8", + Self::UInt16 => "UInt16", + Self::UInt32 => "UInt32", + Self::UInt64 => "UInt64", + Self::UInt128 => "UInt128", + Self::Float16 => "Float16", + Self::Float32 => "Float32", + Self::Float64 => "Float64", + Self::Interval => "Interval", + Self::Binary => "Binary", + Self::Utf8 => "Utf8", + Self::List => "List", + Self::Struct => "Struct", + Self::Dictionary => "Dictionary", + } + } +} + +impl fmt::Display for PhysicalType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +/// Represents an in-memory array that can be indexed into to retrieve values. +pub trait Addressable: Debug { + /// The type that get's returned. + type T: Send + Debug + ?Sized; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get a value at the given index. + fn get(&self, idx: usize) -> Option<&Self::T>; +} + +impl Addressable for &[T] +where + T: Debug + Send, +{ + type T = T; + + fn len(&self) -> usize { + (**self).len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + (**self).get(idx) + } +} + +/// Represents in-memory storage that we can get mutable references to. +pub trait AddressableMut: Debug { + type T: Debug + ?Sized; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get a mutable reference to a value at the given index. + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T>; + + /// Put a value at the given index. + /// + /// Should panic if index is out of bounds. + fn put(&mut self, idx: usize, val: &Self::T); +} + +impl AddressableMut for &mut [T] +where + T: Debug + Send + Copy, +{ + type T = T; + + fn len(&self) -> usize { + (**self).len() + } + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + (**self).get_mut(idx) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + self[idx] = *val; + } +} + +/// Trait for determining how we access the underlying storage for arrays. +pub trait PhysicalStorage: Debug + Default + Sync + Send + Clone + Copy + 'static { + const PHYSICAL_TYPE: PhysicalType; + + /// Size in bytes of the type being stored in the primary buffer. + const PRIMARY_BUFFER_TYPE_SIZE: usize = std::mem::size_of::(); + + /// The type that's stored in the primary buffer. + /// + /// This should be small and fixed sized. + type PrimaryBufferType: Sized + Debug + Default + Sync + Send + Clone + Copy; + + /// The logical type being stored that can be accessed. + /// + /// For primitive buffers, this will be the same as the primary buffer type. + type StorageType: Sync + Send + ?Sized; + + /// The type of the addressable storage. + type Addressable<'a>: Addressable; + + /// Get addressable storage for indexing into the array. + fn get_addressable(buffer: &ArrayBuffer) -> Result>; +} + +pub trait MutablePhysicalStorage: PhysicalStorage { + type AddressableMut<'a>: AddressableMut; + + /// Get mutable addressable storage for the array. + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result>; +} + +macro_rules! generate_primitive { + ($prim:ty, $name:ident, $phys_typ:ident) => { + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] + pub struct $name; + + impl PhysicalStorage for $name { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::$phys_typ; + + type PrimaryBufferType = $prim; + type StorageType = Self::PrimaryBufferType; + type Addressable<'a> = &'a [Self::StorageType]; + + fn get_addressable( + buffer: &ArrayBuffer, + ) -> Result> { + buffer.try_as_slice::() + } + } + + impl MutablePhysicalStorage for $name { + type AddressableMut<'a> = &'a mut [Self::StorageType]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } + } + }; +} + +generate_primitive!(bool, PhysicalBool, Boolean); + +generate_primitive!(i8, PhysicalI8, Int8); +generate_primitive!(i16, PhysicalI16, Int16); +generate_primitive!(i32, PhysicalI32, Int32); +generate_primitive!(i64, PhysicalI64, Int64); +generate_primitive!(i128, PhysicalI128, Int128); + +generate_primitive!(u8, PhysicalU8, UInt8); +generate_primitive!(u16, PhysicalU16, UInt16); +generate_primitive!(u32, PhysicalU32, UInt32); +generate_primitive!(u64, PhysicalU64, UInt64); +generate_primitive!(u128, PhysicalU128, UInt128); + +generate_primitive!(f16, PhysicalF16, Float16); +generate_primitive!(f32, PhysicalF32, Float32); +generate_primitive!(f64, PhysicalF64, Float64); + +generate_primitive!(Interval, PhysicalInterval, Interval); + +/// Marker type representing a null value without an associated type. +/// +/// This will be the type we use for queries like `SELECT NULL` where there's no +/// additional type information in the query. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct UntypedNull; + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalUntypedNull; + +impl PhysicalStorage for PhysicalUntypedNull { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::UntypedNull; + + type PrimaryBufferType = UntypedNull; + type StorageType = UntypedNull; + + type Addressable<'a> = &'a [UntypedNull]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} + +impl MutablePhysicalStorage for PhysicalUntypedNull { + type AddressableMut<'a> = &'a mut [UntypedNull]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } +} + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalUtf8; + +impl PhysicalStorage for PhysicalUtf8 { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Utf8; + + type PrimaryBufferType = StringViewMetadataUnion; + type StorageType = str; + + type Addressable<'a> = StringViewAddressable<'a>; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_string_view_addressable() + } +} + +impl MutablePhysicalStorage for PhysicalUtf8 { + type AddressableMut<'a> = StringViewAddressableMut<'a>; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_string_view_addressable_mut() + } +} + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalBinary; + +impl PhysicalStorage for PhysicalBinary { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Binary; + + type PrimaryBufferType = StringViewMetadataUnion; + type StorageType = [u8]; + + type Addressable<'a> = BinaryViewAddressable<'a>; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_binary_view_addressable() + } +} + +impl MutablePhysicalStorage for PhysicalBinary { + type AddressableMut<'a> = BinaryViewAddressableMut<'a>; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_binary_view_addressable_mut() + } +} + +/// Dictionary arrays have the selection vector as the primary data buffer. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalDictionary; + +impl PhysicalStorage for PhysicalDictionary { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Dictionary; + + type PrimaryBufferType = usize; // The index into the dictionary. + type StorageType = Self::PrimaryBufferType; + + type Addressable<'a> = &'a [usize]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalList; + +impl PhysicalStorage for PhysicalList { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::List; + + type PrimaryBufferType = ListItemMetadata; + type StorageType = Self::PrimaryBufferType; + + type Addressable<'a> = &'a [Self::StorageType]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} + +impl MutablePhysicalStorage for PhysicalList { + type AddressableMut<'a> = &'a mut [Self::StorageType]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/raw.rs b/crates/rayexec_execution/src/arrays/buffer/raw.rs new file mode 100644 index 000000000..65fd91a61 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/raw.rs @@ -0,0 +1,89 @@ +use rayexec_error::Result; + +use super::buffer_manager::BufferManager; + +#[derive(Debug)] +pub struct RawBufferParts { + /// Memory reservation for this buffer. + pub(crate) _reservation: B::Reservation, + /// Raw pointer to start of vec. + pub(crate) ptr: *mut u8, + /// Number of elements `T` in the vec, not bytes. + pub(crate) len: usize, + /// Capacity of vec (`T` not bytes). + pub(crate) cap: usize, +} + +impl RawBufferParts { + pub fn try_new(manager: &B, len: usize) -> Result { + // Note that `vec!` may over-allocate, so we track that too. + // + // See + // > vec![x; n], vec![a, b, c, d], and Vec::with_capacity(n), will all + // > produce a Vec with at least the requested capacity. + let alloc_size = len * std::mem::size_of::(); + let reservation = manager.reserve_external(alloc_size)?; + + let mut data: Vec = vec![T::default(); len]; + + let ptr = data.as_mut_ptr(); + let len = data.len(); + let cap = data.capacity(); + + let additional = (cap * std::mem::size_of::()) - alloc_size; + if additional > 0 { + // TODO: Combine + // let additional = manager.reserve_external(additional)?; + // reservation = reservation.combine(additional); + } + + std::mem::forget(data); + + Ok(RawBufferParts { + _reservation: reservation, + ptr: ptr.cast(), + len, + cap, + }) + } + + pub unsafe fn as_slice(&self) -> &[T] { + std::slice::from_raw_parts(self.ptr.cast::().cast_const(), self.len) + } + + pub unsafe fn as_slice_mut(&mut self) -> &mut [T] { + std::slice::from_raw_parts_mut(self.ptr.cast::(), self.len) + } + + pub unsafe fn resize(&mut self, manager: &B, len: usize) -> Result<()> { + if self.len == 0 { + // Special case when length is zero. + // + // We want to enable the use case where we initialize the buffer to + // nothing (null) and later append to it. However, the `T` that we + // pass in here might have a different alignment which wouldn't be + // safe. + // + // By just creating a new buffer, we can avoid that issue. + let new_self = Self::try_new::(manager, len)?; + *self = new_self; + return Ok(()); + } + + debug_assert_eq!(self.ptr as usize % std::mem::size_of::(), 0); + + let mut data: Vec = Vec::from_raw_parts(self.ptr.cast(), self.len, self.cap); + + // TODO: Reservation stuff. + + data.resize(len, T::default()); + + self.ptr = data.as_mut_ptr().cast(); + self.len = data.len(); + self.cap = data.capacity(); + + std::mem::forget(data); + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs new file mode 100644 index 000000000..2bfb476cd --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -0,0 +1,283 @@ +use std::fmt; + +use super::physical_type::{Addressable, AddressableMut}; + +#[derive(Debug)] +pub struct StringViewAddressable<'a> { + pub(crate) metadata: &'a [StringViewMetadataUnion], + pub(crate) heap: &'a StringViewHeap, +} + +impl Addressable for StringViewAddressable<'_> { + type T = str; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + let m = self.metadata.get(idx)?; + let bs = self.heap.get(m)?; + Some(unsafe { std::str::from_utf8_unchecked(bs) }) + } +} + +#[derive(Debug)] +pub struct StringViewAddressableMut<'a> { + pub(crate) metadata: &'a mut [StringViewMetadataUnion], + pub(crate) heap: &'a mut StringViewHeap, +} + +impl AddressableMut for StringViewAddressableMut<'_> { + type T = str; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + let m = self.metadata.get_mut(idx)?; + let bs = self.heap.get_mut(m)?; + Some(unsafe { std::str::from_utf8_unchecked_mut(bs) }) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + let bs = val.as_bytes(); + let new_m = self.heap.push_bytes(bs); + self.metadata[idx] = new_m; + } +} + +#[derive(Debug)] +pub struct BinaryViewAddressable<'a> { + pub(crate) metadata: &'a [StringViewMetadataUnion], + pub(crate) heap: &'a StringViewHeap, +} + +impl Addressable for BinaryViewAddressable<'_> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + let m = self.metadata.get(idx)?; + self.heap.get(m) + } +} + +#[derive(Debug)] +pub struct BinaryViewAddressableMut<'a> { + pub(crate) metadata: &'a mut [StringViewMetadataUnion], + pub(crate) heap: &'a mut StringViewHeap, +} + +impl AddressableMut for BinaryViewAddressableMut<'_> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + let m = self.metadata.get_mut(idx)?; + self.heap.get_mut(m) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + let new_m = self.heap.push_bytes(val); + self.metadata[idx] = new_m; + } +} + +/// Metadata for small (<= 12 bytes) varlen data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct StringViewSmallMetadata { + pub len: i32, + pub inline: [u8; 12], +} + +/// Metadata for large (> 12 bytes) varlen data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct StringViewLargeMetadata { + pub len: i32, + pub prefix: [u8; 4], + pub buffer_idx: i32, + pub offset: i32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StringViewMetadata<'a> { + Small(&'a StringViewSmallMetadata), + Large(&'a StringViewLargeMetadata), +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub union StringViewMetadataUnion { + small: StringViewSmallMetadata, + large: StringViewLargeMetadata, +} + +impl Default for StringViewMetadataUnion { + #[inline] + fn default() -> Self { + Self::zero() + } +} + +impl StringViewMetadataUnion { + #[inline] + pub fn as_metadata(&self) -> StringViewMetadata { + unsafe { + // i32 len is first field in both, safe to access from either + // variant. + if self.is_small() { + StringViewMetadata::Small(&self.small) + } else { + StringViewMetadata::Large(&self.large) + } + } + } + + pub const fn is_small(&self) -> bool { + // i32 len is first field in both, safe to access from either + // variant. + unsafe { self.small.len <= 12 } + } + + pub fn data_len(&self) -> i32 { + // SAFETY: `len` field is in the same place in both variants. + unsafe { self.small.len } + } + + pub(crate) const fn zero() -> Self { + Self { + small: StringViewSmallMetadata { + len: 0, + inline: [0; 12], + }, + } + } + + fn as_small(&self) -> StringViewSmallMetadata { + debug_assert!(self.is_small()); + unsafe { self.small } + } + + fn as_large(&self) -> StringViewLargeMetadata { + debug_assert!(!self.is_small()); + unsafe { self.large } + } +} + +impl From for StringViewMetadataUnion { + fn from(value: StringViewSmallMetadata) -> Self { + StringViewMetadataUnion { small: value } + } +} + +impl From for StringViewMetadataUnion { + fn from(value: StringViewLargeMetadata) -> Self { + StringViewMetadataUnion { large: value } + } +} + +impl fmt::Debug for StringViewMetadataUnion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_small() { + let small = self.as_small(); + small.fmt(f) + } else { + let large = self.as_large(); + large.fmt(f) + } + } +} + +#[derive(Debug)] +pub struct StringViewHeap { + /// Buffer containing all blob data. + buffer: Vec, +} + +impl Default for StringViewHeap { + fn default() -> Self { + Self::new() + } +} + +impl StringViewHeap { + // TODO: Tracker + pub const fn new() -> Self { + StringViewHeap { buffer: Vec::new() } + } + + pub fn clear(&mut self) { + self.buffer.clear(); + } + + pub fn push_bytes(&mut self, value: &[u8]) -> StringViewMetadataUnion { + if value.len() as i32 <= 12 { + // Store completely inline. + let mut inline = [0; 12]; + inline[0..value.len()].copy_from_slice(value); + + StringViewSmallMetadata { + len: value.len() as i32, + inline, + } + .into() + } else { + // Store prefix, buf index, and offset in line. Store complete copy + // in buffer. + + let offset = self.buffer.len(); + let mut prefix = [0; 4]; + let prefix_len = std::cmp::min(value.len(), 4); + prefix[0..prefix_len].copy_from_slice(&value[0..prefix_len]); + + self.buffer.extend_from_slice(value); + + StringViewLargeMetadata { + len: value.len() as i32, + prefix, + buffer_idx: 0, + offset: offset as i32, + } + .into() + } + } + + pub fn get<'a, 'b: 'a>(&'b self, metadata: &'a StringViewMetadataUnion) -> Option<&'a [u8]> { + if metadata.is_small() { + unsafe { Some(&metadata.small.inline[..(metadata.small.len as usize)]) } + } else { + unsafe { + let offset = metadata.large.offset as usize; + let len = metadata.large.len as usize; + + self.buffer.get(offset..(offset + len)) + } + } + } + + pub fn get_mut<'a, 'b: 'a>( + &'b mut self, + metadata: &'a mut StringViewMetadataUnion, + ) -> Option<&'a mut [u8]> { + if metadata.is_small() { + unsafe { Some(&mut metadata.small.inline[..(metadata.small.len as usize)]) } + } else { + unsafe { + let offset = metadata.large.offset as usize; + let len = metadata.large.len as usize; + + self.buffer.get_mut(offset..(offset + len)) + } + } + } +} diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 76845aa06..5896836ec 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -1,6 +1,6 @@ use std::ops::Mul; -use half::f16; +use stdutil::iter::IntoExactSizeIterator; use num::{CheckedDiv, CheckedMul, Float, NumCast, PrimInt, ToPrimitive}; use rayexec_error::{RayexecError, Result}; @@ -48,16 +48,11 @@ use super::parse::{ UInt64Parser, UInt8Parser, }; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, TimeUnit}; -use crate::arrays::executor::builder::{ - ArrayBuilder, - BooleanBuffer, - GermanVarlenBuffer, - PrimitiveBuffer, -}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::array::validity::Validity; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalBool, PhysicalF16, PhysicalF32, @@ -67,6 +62,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, + PhysicalInterval, PhysicalStorage, PhysicalU128, PhysicalU16, @@ -75,206 +71,234 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::datatype::{DataType, TimeUnit}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; -pub fn cast_array(arr: &Array, to: DataType, behavior: CastFailBehavior) -> Result { - if arr.datatype() == &to { - // TODO: Cow? - return Ok(arr.clone()); +/// Casts an array to another array. +/// +/// The datatype of `out` determines the what we're casting values to. +/// +/// `behavior` determines what happens if casting results in an overflow or some +/// other precision/accuracy error. Note that if we don't have an implementation +/// of casting from one type to another, this will always error. +pub fn cast_array( + arr: &mut Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, + behavior: CastFailBehavior, +) -> Result<()> { + if arr.datatype() == out.datatype() { + out.clone_from(&NopBufferManager, arr)?; + out.select(&NopBufferManager, sel)?; + + return Ok(()); } - let arr = match arr.datatype() { + let to = out.datatype(); + + match arr.datatype() { DataType::Null => { - // Can cast NULL to anything else. - let data = to.physical_type()?.zeroed_array_data(arr.logical_len()); - let validity = Bitmap::new_with_all_false(arr.logical_len()); - Array::new_with_validity_and_array_data(to, validity, data) + // Can cast NULL to anything else. Just set the valid mask to all + // invalid. + out.put_validity(Validity::new_all_invalid(out.capacity()))?; + Ok(()) } // String to anything else. - DataType::Utf8 => cast_from_utf8(arr, to, behavior)?, + DataType::Utf8 => cast_from_utf8(arr, sel, out, behavior), // Primitive numerics to other primitive numerics. DataType::Int8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } // Int to date32 - DataType::Int8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int8 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::Int16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int16 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::Int32 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int32 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::UInt8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::UInt8 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::UInt16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::UInt16 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } // Int to decimal. DataType::Int8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } // Float to decimal. + DataType::Float16 if to.is_decimal() => { + cast_float_to_decimal_helper::(arr, sel, out, behavior) + } DataType::Float32 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, sel, out, behavior) } DataType::Float64 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, sel, out, behavior) } // Decimal to decimal DataType::Decimal64(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, sel, out, behavior) } DataType::Decimal128(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, sel, out, behavior) } // Decimal to float. DataType::Decimal64(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, - other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), + DataType::Float16 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float32 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float64 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, DataType::Decimal128(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, - other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), + DataType::Float16 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float32 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float64 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, // Anything to string. - _ if to.is_utf8() => cast_to_utf8(arr, behavior)?, - - other => { - return Err(RayexecError::new(format!( - "Casting from {other} to {to} not implemented" - ))) - } - }; + _ if to.is_utf8() => cast_to_utf8(arr, sel, out, behavior), - Ok(arr) + other => Err(RayexecError::new(format!( + "Casting from {other} to {to} not implemented", + ))), + } } -fn decimal_rescale_helper<'a, S>( - arr: &'a Array, - to: DataType, +fn decimal_rescale_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: PrimInt, + D1: DecimalType, { - match to { - DataType::Decimal64(_) => decimal_rescale::(arr, to, behavior), - DataType::Decimal128(_) => decimal_rescale::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => decimal_rescale::(arr, sel, out, behavior), + DataType::Decimal128(_) => decimal_rescale::(arr, sel, out, behavior), other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -pub fn decimal_rescale<'a, S, D>( - arr: &'a Array, - to: DataType, +pub fn decimal_rescale( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage, - D: DecimalType, - S::Type<'a>: PrimInt, - ArrayData: From>, + D1: DecimalType, + D2: DecimalType, { - let new_meta = to.try_get_decimal_type_meta()?; + let new_meta = arr.datatype().try_get_decimal_type_meta()?; let arr_meta = arr.datatype().try_get_decimal_type_meta()?; - let scale_amount = ::from( + let scale_amount = ::from( 10.pow((arr_meta.scale - new_meta.scale).unsigned_abs() as u32), ) .expect("to be in range"); - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // Convert to decimal primitive. - let v = match ::from(v) { + let v = match ::from(v) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); return; } }; @@ -282,169 +306,184 @@ where if arr_meta.scale < new_meta.scale { match v.checked_mul(&scale_amount) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + } } } else { match v.checked_div(&scale_amount) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + } } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_float_to_decimal_helper<'a, S>( - arr: &'a Array, - to: DataType, +fn cast_float_to_decimal_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, - S::Type<'a>: Float, + S::StorageType: Float, { - match to { - DataType::Decimal64(_) => cast_float_to_decimal::(arr, to, behavior), - DataType::Decimal128(_) => cast_float_to_decimal::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => { + cast_float_to_decimal::(arr, sel, out, behavior) + } + DataType::Decimal128(_) => { + cast_float_to_decimal::(arr, sel, out, behavior) + } other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -fn cast_float_to_decimal<'a, S, D>( - arr: &'a Array, - to: DataType, +fn cast_float_to_decimal( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, + S::StorageType: Float, D: DecimalType, - S::Type<'a>: Float, - ArrayData: From>, { - let decimal_meta = to.try_get_decimal_type_meta()?; + let decimal_meta = out.datatype().try_get_decimal_type_meta()?; let scale = decimal_meta.scale; let precision = decimal_meta.precision; - let scale = < as AddressableStorage>::T as NumCast>::from( - 10.pow(scale.unsigned_abs() as u32), - ) - .ok_or_else(|| RayexecError::new(format!("Failed to cast scale {scale} to float")))?; + let scale = ::from(10.pow(scale.unsigned_abs() as u32)) + .ok_or_else(|| RayexecError::new(format!("Failed to cast scale {scale} to float")))?; - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // TODO: Properly handle negative scale. let scaled_value = v.mul(scale).round(); match ::from(scaled_value) { Some(v) => { if let Err(err) = D::validate_precision(v, precision) { - fail_state.set_did_fail_with_error(buf.idx, err); + fail_state.set_error(|| err); + buf.put_null(); return; } buf.put(&v) } - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -// TODO: Weird to specify both the float generic and datatype. -pub fn cast_decimal_to_float<'a, S, F>( - arr: &'a Array, - to: DataType, +pub fn cast_decimal_to_float( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage, - F: Float + Default + Copy, - <::Storage<'a> as AddressableStorage>::T: ToPrimitive, - ArrayData: From>, + D: DecimalType, + S: MutablePhysicalStorage, + S::StorageType: Float + Copy, { let decimal_meta = arr.datatype().try_get_decimal_type_meta()?; - let scale = ::from((10.0).powi(decimal_meta.scale as i32)).ok_or_else(|| { - RayexecError::new(format!( - "Failed to cast scale {} to float", - decimal_meta.scale - )) - })?; - - let builder = ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::::with_len(arr.logical_len()), - }; + let scale = ::from((10.0).powi(decimal_meta.scale as i32)) + .ok_or_else(|| { + RayexecError::new(format!( + "Failed to cast scale {} to float", + decimal_meta.scale + )) + })?; - let mut fail_state = behavior.new_state_for_array(arr); - let output = - UnaryExecutor::execute::(arr, builder, |v, buf| match ::from(v) { + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( + arr, + sel, + OutBuffer::from_array(out)?, + |&v, buf| match ::from(v) { Some(v) => { - let scaled = v.div(scale); + let scaled = v / scale; buf.put(&scaled); } - None => fail_state.set_did_fail(buf.idx), - })?; + None => { + fail_state.set_error(|| RayexecError::new("Failed to cast float to decimal")); + buf.put_null(); + } + }, + )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_int_to_decimal_helper<'a, S>( - arr: &'a Array, - to: DataType, +fn cast_int_to_decimal_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, - S::Type<'a>: PrimInt, + S::StorageType: PrimInt, { - match to { - DataType::Decimal64(_) => cast_int_to_decimal::(arr, to, behavior), - DataType::Decimal128(_) => cast_int_to_decimal::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => cast_int_to_decimal::(arr, sel, out, behavior), + DataType::Decimal128(_) => { + cast_int_to_decimal::(arr, sel, out, behavior) + } other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -fn cast_int_to_decimal<'a, S, D>( - arr: &'a Array, - to: DataType, +fn cast_int_to_decimal( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, D: DecimalType, - S::Type<'a>: PrimInt, - ArrayData: From>, + S::StorageType: PrimInt, { - let decimal_meta = to.try_get_decimal_type_meta()?; + let decimal_meta = out.datatype().try_get_decimal_type_meta()?; let scale = decimal_meta.scale; let precision = decimal_meta.precision; let scale_amount = ::from(10.pow(scale.unsigned_abs() as u32)) .expect("to be in range"); - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // Convert to decimal primitive. let v = match ::from(v) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } }; @@ -454,7 +493,8 @@ where match v.checked_mul(&scale_amount) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } } @@ -462,14 +502,16 @@ where match v.checked_div(&scale_amount) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } } }; if let Err(err) = D::validate_precision(val, precision) { - fail_state.set_did_fail_with_error(buf.idx, err); + fail_state.set_error(|| err); + buf.put_null(); return; } @@ -477,170 +519,249 @@ where }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_primitive_numeric_helper<'a, S>( - arr: &'a Array, - to: DataType, +fn cast_primitive_numeric_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, - S::Type<'a>: ToPrimitive, + S::StorageType: ToPrimitive + Sized + Copy, { - match to { - DataType::Int8 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int16 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int32 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int64 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int128 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt8 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt16 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt32 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt64 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt128 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float16 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float32 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float64 => cast_primitive_numeric::(arr, to, behavior), + match out.datatype() { + DataType::Int8 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int64 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int128 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt8 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt64 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt128 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float64 => cast_primitive_numeric::(arr, sel, out, behavior), other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -pub fn cast_primitive_numeric<'a, S, T>( - arr: &'a Array, - datatype: DataType, +/// Cast a primitive number to some other primitive numeric. +fn cast_primitive_numeric( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: ToPrimitive, - T: NumCast + Default + Copy, - ArrayData: From>, + S1: PhysicalStorage, + S1::StorageType: ToPrimitive + Sized + Copy, + S2: MutablePhysicalStorage, + S2::StorageType: NumCast + Copy, { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( - arr, - ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| match T::from(v) { + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::(arr, sel, OutBuffer::from_array(out)?, |&v, buf| { + match NumCast::from(v) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), - }, - )?; + None => { + fail_state.set_error(|| RayexecError::new("Failed to cast primitive numeric")); + buf.put_null(); + } + } + })?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } pub fn cast_from_utf8( arr: &Array, - datatype: DataType, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result { - match datatype { - DataType::Boolean => cast_parse_bool(arr, behavior), - DataType::Int8 => cast_parse_primitive(arr, datatype, behavior, Int8Parser::default()), - DataType::Int16 => cast_parse_primitive(arr, datatype, behavior, Int16Parser::default()), - DataType::Int32 => cast_parse_primitive(arr, datatype, behavior, Int32Parser::default()), - DataType::Int64 => cast_parse_primitive(arr, datatype, behavior, Int64Parser::default()), - DataType::Int128 => cast_parse_primitive(arr, datatype, behavior, Int128Parser::default()), - DataType::UInt8 => cast_parse_primitive(arr, datatype, behavior, UInt8Parser::default()), - DataType::UInt16 => cast_parse_primitive(arr, datatype, behavior, UInt16Parser::default()), - DataType::UInt32 => cast_parse_primitive(arr, datatype, behavior, UInt32Parser::default()), - DataType::UInt64 => cast_parse_primitive(arr, datatype, behavior, UInt64Parser::default()), - DataType::UInt128 => { - cast_parse_primitive(arr, datatype, behavior, UInt128Parser::default()) +) -> Result<()> { + match out.datatype() { + DataType::Boolean => { + cast_parse_primitive::<_, PhysicalBool>(arr, sel, out, behavior, BoolParser) } - DataType::Float16 => { - cast_parse_primitive(arr, datatype, behavior, Float16Parser::default()) + DataType::Int8 => { + cast_parse_primitive::<_, PhysicalI8>(arr, sel, out, behavior, Int8Parser::default()) } - DataType::Float32 => { - cast_parse_primitive(arr, datatype, behavior, Float32Parser::default()) + DataType::Int16 => { + cast_parse_primitive::<_, PhysicalI16>(arr, sel, out, behavior, Int16Parser::default()) } - DataType::Float64 => { - cast_parse_primitive(arr, datatype, behavior, Float64Parser::default()) + DataType::Int32 => { + cast_parse_primitive::<_, PhysicalI32>(arr, sel, out, behavior, Int32Parser::default()) } - DataType::Decimal64(m) => cast_parse_primitive( + DataType::Int64 => { + cast_parse_primitive::<_, PhysicalI64>(arr, sel, out, behavior, Int64Parser::default()) + } + DataType::Int128 => cast_parse_primitive::<_, PhysicalI128>( + arr, + sel, + out, + behavior, + Int128Parser::default(), + ), + DataType::UInt8 => { + cast_parse_primitive::<_, PhysicalU8>(arr, sel, out, behavior, UInt8Parser::default()) + } + DataType::UInt16 => { + cast_parse_primitive::<_, PhysicalU16>(arr, sel, out, behavior, UInt16Parser::default()) + } + DataType::UInt32 => { + cast_parse_primitive::<_, PhysicalU32>(arr, sel, out, behavior, UInt32Parser::default()) + } + DataType::UInt64 => { + cast_parse_primitive::<_, PhysicalU64>(arr, sel, out, behavior, UInt64Parser::default()) + } + DataType::UInt128 => cast_parse_primitive::<_, PhysicalU128>( + arr, + sel, + out, + behavior, + UInt128Parser::default(), + ), + DataType::Float16 => cast_parse_primitive::<_, PhysicalF16>( + arr, + sel, + out, + behavior, + Float16Parser::default(), + ), + DataType::Float32 => cast_parse_primitive::<_, PhysicalF32>( + arr, + sel, + out, + behavior, + Float32Parser::default(), + ), + DataType::Float64 => cast_parse_primitive::<_, PhysicalF64>( + arr, + sel, + out, + behavior, + Float64Parser::default(), + ), + DataType::Decimal64(m) => cast_parse_primitive::<_, PhysicalI64>( arr, - datatype, + sel, + out, behavior, Decimal64Parser::new(m.precision, m.scale), ), - DataType::Decimal128(m) => cast_parse_primitive( + DataType::Decimal128(m) => cast_parse_primitive::<_, PhysicalI128>( arr, - datatype, + sel, + out, behavior, Decimal128Parser::new(m.precision, m.scale), ), - DataType::Date32 => cast_parse_primitive(arr, datatype, behavior, Date32Parser), - DataType::Interval => { - cast_parse_primitive(arr, datatype, behavior, IntervalParser::default()) + DataType::Date32 => { + cast_parse_primitive::<_, PhysicalI32>(arr, sel, out, behavior, Date32Parser) } + DataType::Interval => cast_parse_primitive::<_, PhysicalInterval>( + arr, + sel, + out, + behavior, + IntervalParser::default(), + ), other => Err(RayexecError::new(format!( "Unable to cast utf8 array to {other}" ))), } } -pub fn cast_to_utf8(arr: &Array, behavior: CastFailBehavior) -> Result { +pub fn cast_to_utf8( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, + behavior: CastFailBehavior, +) -> Result<()> { match arr.datatype() { DataType::Boolean => { - cast_format::(arr, BoolFormatter::default(), behavior) + cast_format::(arr, sel, out, BoolFormatter::default(), behavior) + } + DataType::Int8 => { + cast_format::(arr, sel, out, Int8Formatter::default(), behavior) + } + DataType::Int16 => { + cast_format::(arr, sel, out, Int16Formatter::default(), behavior) + } + DataType::Int32 => { + cast_format::(arr, sel, out, Int32Formatter::default(), behavior) + } + DataType::Int64 => { + cast_format::(arr, sel, out, Int64Formatter::default(), behavior) } - DataType::Int8 => cast_format::(arr, Int8Formatter::default(), behavior), - DataType::Int16 => cast_format::(arr, Int16Formatter::default(), behavior), - DataType::Int32 => cast_format::(arr, Int32Formatter::default(), behavior), - DataType::Int64 => cast_format::(arr, Int64Formatter::default(), behavior), DataType::Int128 => { - cast_format::(arr, Int128Formatter::default(), behavior) + cast_format::(arr, sel, out, Int128Formatter::default(), behavior) + } + DataType::UInt8 => { + cast_format::(arr, sel, out, UInt8Formatter::default(), behavior) } - DataType::UInt8 => cast_format::(arr, UInt8Formatter::default(), behavior), DataType::UInt16 => { - cast_format::(arr, UInt16Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt16Formatter::default(), behavior) } DataType::UInt32 => { - cast_format::(arr, UInt32Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt32Formatter::default(), behavior) } DataType::UInt64 => { - cast_format::(arr, UInt64Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt64Formatter::default(), behavior) } DataType::UInt128 => { - cast_format::(arr, UInt128Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt128Formatter::default(), behavior) } DataType::Float32 => { - cast_format::(arr, Float32Formatter::default(), behavior) + cast_format::(arr, sel, out, Float32Formatter::default(), behavior) } DataType::Float64 => { - cast_format::(arr, Float64Formatter::default(), behavior) + cast_format::(arr, sel, out, Float64Formatter::default(), behavior) } DataType::Decimal64(m) => cast_format::( arr, + sel, + out, Decimal64Formatter::new(m.precision, m.scale), behavior, ), DataType::Decimal128(m) => cast_format::( arr, + sel, + out, Decimal128Formatter::new(m.precision, m.scale), behavior, ), DataType::Timestamp(m) => match m.unit { - TimeUnit::Second => { - cast_format::(arr, TimestampSecondsFormatter::default(), behavior) - } + TimeUnit::Second => cast_format::( + arr, + sel, + out, + TimestampSecondsFormatter::default(), + behavior, + ), TimeUnit::Millisecond => cast_format::( arr, + sel, + out, TimestampMillisecondsFormatter::default(), behavior, ), TimeUnit::Microsecond => cast_format::( arr, + sel, + out, TimestampMicrosecondsFormatter::default(), behavior, ), TimeUnit::Nanosecond => cast_format::( arr, + sel, + out, TimestampNanosecondsFormatter::default(), behavior, ), @@ -651,141 +772,129 @@ pub fn cast_to_utf8(arr: &Array, behavior: CastFailBehavior) -> Result { } } -fn cast_format<'a, S, F>( - arr: &'a Array, +/// Cast an array to strings by formatting values. +fn cast_format( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, mut formatter: F, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where S: PhysicalStorage, - F: Formatter>, + F: Formatter, { - let mut fail_state = behavior.new_state_for_array(arr); + let mut fail_state = behavior.new_state(); let mut string_buf = String::new(); - let output = UnaryExecutor::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(arr.logical_len()), - }, + sel, + OutBuffer::from_array(out)?, |v, buf| { string_buf.clear(); - match formatter.write(&v, &mut string_buf) { + match formatter.write(v, &mut string_buf) { Ok(_) => buf.put(string_buf.as_str()), - Err(_) => fail_state.set_did_fail(buf.idx), + Err(_) => { + fail_state.set_error(|| RayexecError::new("Failed to cast to utf8")); + buf.put_null(); + } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_parse_bool(arr: &Array, behavior: CastFailBehavior) -> Result { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( - arr, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(arr.logical_len()), - }, - |v, buf| match BoolParser.parse(v) { - Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), - }, - )?; - - fail_state.check_and_apply(arr, output) -} - -fn cast_parse_primitive( +/// Cast a utf8 array to some other primitive type by parsing string values. +fn cast_parse_primitive( arr: &Array, - datatype: DataType, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, mut parser: P, -) -> Result +) -> Result<()> where - T: Default + Copy, - P: Parser, - ArrayData: From>, + S::StorageType: Sized, + P: Parser, + S: MutablePhysicalStorage, { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: datatype.clone(), - buffer: PrimitiveBuffer::::with_len(arr.logical_len()), - }, + sel, + OutBuffer::from_array(out)?, |v, buf| match parser.parse(v) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed to parse value from utf8")); + buf.put_null(); + } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; use crate::arrays::datatype::DecimalTypeMeta; - use crate::arrays::scalar::ScalarValue; + use crate::arrays::testutil::assert_arrays_eq; #[test] fn array_cast_utf8_to_i32() { - let arr = Array::from_iter(["13", "18", "123456789"]); + let mut arr = Array::try_from_iter(["13", "18", "123456789"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); - let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); - assert_eq!(ScalarValue::from(13), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(18), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(123456789), got.logical_value(2).unwrap()); + let expected = Array::try_from_iter([13, 18, 123456789]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_utf8_to_i32_overflow_error() { - let arr = Array::from_iter(["13", "18", "123456789000000"]); - cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap_err(); + let mut arr = Array::try_from_iter(["13", "18", "123456789000000"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap_err(); } #[test] fn array_cast_utf8_to_i32_overflow_null() { - let arr = Array::from_iter(["13", "18", "123456789000000"]); + let mut arr = Array::try_from_iter(["13", "18", "123456789000000"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); - let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Null).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Null).unwrap(); - assert_eq!(ScalarValue::from(13), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(18), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(2).unwrap()); + let expected = Array::try_from_iter([Some(13), Some(18), None]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_null_to_f32() { - let arr = Array::new_untyped_null_array(3); + let mut arr = Array::new(&NopBufferManager, DataType::Null, 3).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Float32, 3).unwrap(); - let got = cast_array(&arr, DataType::Float32, CastFailBehavior::Error).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); - assert_eq!(&DataType::Float32, got.datatype()); + let expected = Array::try_from_iter([None as Option, None, None]).unwrap(); - assert_eq!(ScalarValue::Null, got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_decimal64_to_f64() { - let arr = Array::new_with_array_data( - DataType::Decimal64(DecimalTypeMeta { - precision: 10, - scale: 3, - }), - PrimitiveStorage::from(vec![1500_i64, 2000_i64, 2500_i64]), - ); - - let got = cast_array(&arr, DataType::Float64, CastFailBehavior::Error).unwrap(); - - assert_eq!(ScalarValue::Float64(1.5), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Float64(2.0), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Float64(2.5), got.logical_value(2).unwrap()); + let mut arr = Array::try_from_iter([1500_i64, 2000, 2500]).unwrap(); + // '[1.500, 2.000, 2.500]' + arr.datatype = DataType::Decimal64(DecimalTypeMeta::new(10, 3)); + + let mut out = Array::new(&NopBufferManager, DataType::Float64, 3).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); + + let expected = Array::try_from_iter([1.5_f64, 2.0, 2.5]).unwrap(); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs index f81ca0e32..1de04215d 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs @@ -1,7 +1,5 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; - /// Behavior when a cast fail due to under/overflow. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CastFailBehavior { @@ -12,84 +10,38 @@ pub enum CastFailBehavior { } impl CastFailBehavior { - pub(crate) fn new_state_for_array(&self, _arr: &Array) -> CastFailState { - match self { - CastFailBehavior::Error => CastFailState::TrackOneAndError(None), - CastFailBehavior::Null => CastFailState::TrackManyAndInvalidate(Vec::new()), + pub(crate) fn new_state(&self) -> CastErrorState { + CastErrorState { + behavior: *self, + error: None, } } } #[derive(Debug)] -pub struct ErrorIndex { - /// Row index that we failed on. - pub idx: usize, - /// Optional error we can use instead of the generic "failed to cast" error. - pub error: Option, -} - -/// State used to track failures casting. -#[derive(Debug)] -pub(crate) enum CastFailState { - /// Keep the row index of the first failure. - TrackOneAndError(Option), - /// Track all failures during casting. - TrackManyAndInvalidate(Vec), +pub struct CastErrorState { + behavior: CastFailBehavior, + error: Option, } -impl CastFailState { - pub(crate) fn set_did_fail(&mut self, idx: usize) { - match self { - Self::TrackOneAndError(maybe_idx) => { - if maybe_idx.is_none() { - *maybe_idx = Some(ErrorIndex { idx, error: None }); - } - } - Self::TrackManyAndInvalidate(indices) => indices.push(idx), - } - } - - pub(crate) fn set_did_fail_with_error(&mut self, idx: usize, error: RayexecError) { - match self { - Self::TrackOneAndError(maybe_idx) => { - if maybe_idx.is_none() { - *maybe_idx = Some(ErrorIndex { - idx, - error: Some(error), - }) - } - } - Self::TrackManyAndInvalidate(indices) => indices.push(idx), // Error ignored, we're replacing with null. +impl CastErrorState { + /// Set the error from a function. + /// + /// If the cast behavior is use NULL on failure, then `error_fn` is not + /// called. + pub fn set_error(&mut self, error_fn: F) + where + F: FnOnce() -> RayexecError, + { + if self.behavior == CastFailBehavior::Error && self.error.is_none() { + self.error = Some(error_fn()) } } - pub(crate) fn check_and_apply(self, original: &Array, mut output: Array) -> Result { - match self { - Self::TrackOneAndError(None) => Ok(output), - Self::TrackOneAndError(Some(error_idx)) => { - let scalar = original.logical_value(error_idx.idx)?; - match error_idx.error { - Some(error) => Err(RayexecError::with_source( - format!("Failed to cast '{scalar}' to {}", output.datatype()), - Box::new(error), - )), - None => Err(RayexecError::new(format!( - "Failed to cast '{scalar}' to {}", - output.datatype() - ))), - } - } - Self::TrackManyAndInvalidate(indices) => { - if indices.is_empty() { - Ok(output) - } else { - // Apply the nulls. - for idx in indices { - output.set_physical_validity(idx, false); - } - Ok(output) - } - } + pub fn into_result(self) -> Result<()> { + match self.error { + Some(err) => Err(err), + None => Ok(()), } } } diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index 7e02e1498..7f1a2cc4c 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -1,11 +1,12 @@ use chrono::{DateTime, Datelike, NaiveDate, Timelike, Utc}; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array; -use crate::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalI64}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalI64}; +use crate::arrays::datatype::{DataType, TimeUnit}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; pub const EPOCH_NAIVE_DATE: NaiveDate = match NaiveDate::from_ymd_opt(1970, 1, 1) { @@ -69,48 +70,63 @@ pub enum DatePart { /// /// The results should be decimal representing the part extracted, and should /// use the Decimal64 default precision and scale. -pub fn extract_date_part(part: DatePart, arr: &Array) -> Result { +pub fn extract_date_part( + part: DatePart, + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, +) -> Result<()> { let datatype = arr.datatype(); match datatype { DataType::Date32 => match part { - DatePart::Microseconds => date32_extract_with_fn(arr, extract_microseconds), - DatePart::Milliseconds => date32_extract_with_fn(arr, extract_milliseconds), - DatePart::Second => date32_extract_with_fn(arr, extract_seconds), - DatePart::Minute => date32_extract_with_fn(arr, extract_minute), - DatePart::DayOfWeek => date32_extract_with_fn(arr, extract_day_of_week), - DatePart::IsoDayOfWeek => date32_extract_with_fn(arr, extract_iso_day_of_week), - DatePart::Day => date32_extract_with_fn(arr, extract_day), - DatePart::Month => date32_extract_with_fn(arr, extract_month), - DatePart::Quarter => date32_extract_with_fn(arr, extract_quarter), - DatePart::Year => date32_extract_with_fn(arr, extract_year), + DatePart::Microseconds => date32_extract_with_fn(arr, sel, extract_microseconds, out), + DatePart::Milliseconds => date32_extract_with_fn(arr, sel, extract_milliseconds, out), + DatePart::Second => date32_extract_with_fn(arr, sel, extract_seconds, out), + DatePart::Minute => date32_extract_with_fn(arr, sel, extract_minute, out), + DatePart::DayOfWeek => date32_extract_with_fn(arr, sel, extract_day_of_week, out), + DatePart::IsoDayOfWeek => { + date32_extract_with_fn(arr, sel, extract_iso_day_of_week, out) + } + DatePart::Day => date32_extract_with_fn(arr, sel, extract_day, out), + DatePart::Month => date32_extract_with_fn(arr, sel, extract_month, out), + DatePart::Quarter => date32_extract_with_fn(arr, sel, extract_quarter, out), + DatePart::Year => date32_extract_with_fn(arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, DataType::Date64 => match part { - DatePart::Microseconds => date64_extract_with_fn(arr, extract_microseconds), - DatePart::Milliseconds => date64_extract_with_fn(arr, extract_milliseconds), - DatePart::Second => date64_extract_with_fn(arr, extract_seconds), - DatePart::Minute => date64_extract_with_fn(arr, extract_minute), - DatePart::DayOfWeek => date64_extract_with_fn(arr, extract_day_of_week), - DatePart::IsoDayOfWeek => date64_extract_with_fn(arr, extract_iso_day_of_week), - DatePart::Day => date64_extract_with_fn(arr, extract_day), - DatePart::Month => date64_extract_with_fn(arr, extract_month), - DatePart::Quarter => date64_extract_with_fn(arr, extract_quarter), - DatePart::Year => date64_extract_with_fn(arr, extract_year), + DatePart::Microseconds => date64_extract_with_fn(arr, sel, extract_microseconds, out), + DatePart::Milliseconds => date64_extract_with_fn(arr, sel, extract_milliseconds, out), + DatePart::Second => date64_extract_with_fn(arr, sel, extract_seconds, out), + DatePart::Minute => date64_extract_with_fn(arr, sel, extract_minute, out), + DatePart::DayOfWeek => date64_extract_with_fn(arr, sel, extract_day_of_week, out), + DatePart::IsoDayOfWeek => { + date64_extract_with_fn(arr, sel, extract_iso_day_of_week, out) + } + DatePart::Day => date64_extract_with_fn(arr, sel, extract_day, out), + DatePart::Month => date64_extract_with_fn(arr, sel, extract_month, out), + DatePart::Quarter => date64_extract_with_fn(arr, sel, extract_quarter, out), + DatePart::Year => date64_extract_with_fn(arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, DataType::Timestamp(m) => match part { - DatePart::Microseconds => timestamp_extract_with_fn(m.unit, arr, extract_microseconds), - DatePart::Milliseconds => timestamp_extract_with_fn(m.unit, arr, extract_milliseconds), - DatePart::Second => timestamp_extract_with_fn(m.unit, arr, extract_seconds), - DatePart::Minute => timestamp_extract_with_fn(m.unit, arr, extract_minute), - DatePart::DayOfWeek => timestamp_extract_with_fn(m.unit, arr, extract_day_of_week), + DatePart::Microseconds => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_microseconds, out) + } + DatePart::Milliseconds => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_milliseconds, out) + } + DatePart::Second => timestamp_extract_with_fn(m.unit, arr, sel, extract_seconds, out), + DatePart::Minute => timestamp_extract_with_fn(m.unit, arr, sel, extract_minute, out), + DatePart::DayOfWeek => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_day_of_week, out) + } DatePart::IsoDayOfWeek => { - timestamp_extract_with_fn(m.unit, arr, extract_iso_day_of_week) + timestamp_extract_with_fn(m.unit, arr, sel, extract_iso_day_of_week, out) } - DatePart::Day => timestamp_extract_with_fn(m.unit, arr, extract_day), - DatePart::Month => timestamp_extract_with_fn(m.unit, arr, extract_month), - DatePart::Quarter => timestamp_extract_with_fn(m.unit, arr, extract_quarter), - DatePart::Year => timestamp_extract_with_fn(m.unit, arr, extract_year), + DatePart::Day => timestamp_extract_with_fn(m.unit, arr, sel, extract_day, out), + DatePart::Month => timestamp_extract_with_fn(m.unit, arr, sel, extract_month, out), + DatePart::Quarter => timestamp_extract_with_fn(m.unit, arr, sel, extract_quarter, out), + DatePart::Year => timestamp_extract_with_fn(m.unit, arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, other => Err(RayexecError::new(format!( @@ -119,65 +135,84 @@ pub fn extract_date_part(part: DatePart, arr: &Array) -> Result { } } -fn timestamp_extract_with_fn(unit: TimeUnit, arr: &Array, f: F) -> Result +fn timestamp_extract_with_fn( + unit: TimeUnit, + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { match unit { - TimeUnit::Second => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp(val, 0).unwrap_or_default() - }), - TimeUnit::Millisecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_millis(val).unwrap_or_default() - }), - TimeUnit::Microsecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_micros(val).unwrap_or_default() - }), - TimeUnit::Nanosecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_nanos(val) - }), + TimeUnit::Second => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp(val, 0).unwrap_or_default(), + out, + ), + TimeUnit::Millisecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp_millis(val).unwrap_or_default(), + out, + ), + TimeUnit::Microsecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp_micros(val).unwrap_or_default(), + out, + ), + TimeUnit::Nanosecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + DateTime::from_timestamp_nanos, + out, + ), } } fn timestamp_extract_with_fn_and_datetime_builder( arr: &Array, + sel: impl IntoExactSizeIterator, f: F, builder: B, -) -> Result + out: &mut Array, +) -> Result<()> where B: Fn(i64) -> DateTime, F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { let date = builder(val); buf.put(&f(date)) }, ) } -fn date32_extract_with_fn(arr: &Array, f: F) -> Result +fn date32_extract_with_fn( + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { // TODO: Can this actually fail? let date = DateTime::from_timestamp(val as i64 * SECONDS_IN_DAY, 0).unwrap_or_default(); buf.put(&f(date)) @@ -185,20 +220,20 @@ where ) } -fn date64_extract_with_fn(arr: &Array, f: F) -> Result +fn date64_extract_with_fn( + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { // TODO: Can this actually fail? let date = DateTime::from_timestamp_millis(val).unwrap_or_default(); buf.put(&f(date)) diff --git a/crates/rayexec_execution/src/arrays/datatype.rs b/crates/rayexec_execution/src/arrays/datatype.rs index b6ba28fc3..a05cd0843 100644 --- a/crates/rayexec_execution/src/arrays/datatype.rs +++ b/crates/rayexec_execution/src/arrays/datatype.rs @@ -4,7 +4,8 @@ use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt} use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::executor::physical_type::PhysicalType; +use super::buffer::physical_type::PhysicalType; +use crate::arrays::executor::physical_type::PhysicalType2; use crate::arrays::field::Field; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -445,8 +446,8 @@ impl DataType { } } - pub fn physical_type(&self) -> Result { - Ok(match self { + pub fn physical_type(&self) -> PhysicalType { + match self { DataType::Null => PhysicalType::UntypedNull, DataType::Boolean => PhysicalType::Boolean, DataType::Int8 => PhysicalType::Int8, @@ -470,8 +471,38 @@ impl DataType { DataType::Interval => PhysicalType::Interval, DataType::Utf8 => PhysicalType::Utf8, DataType::Binary => PhysicalType::Binary, - DataType::Struct(_) => not_implemented!("struct data type to physical type"), + DataType::Struct(_) => PhysicalType::Struct, DataType::List(_) => PhysicalType::List, + } + } + + pub fn physical_type2(&self) -> Result { + Ok(match self { + DataType::Null => PhysicalType2::UntypedNull, + DataType::Boolean => PhysicalType2::Boolean, + DataType::Int8 => PhysicalType2::Int8, + DataType::Int16 => PhysicalType2::Int16, + DataType::Int32 => PhysicalType2::Int32, + DataType::Int64 => PhysicalType2::Int64, + DataType::Int128 => PhysicalType2::Int128, + DataType::UInt8 => PhysicalType2::UInt8, + DataType::UInt16 => PhysicalType2::UInt16, + DataType::UInt32 => PhysicalType2::UInt32, + DataType::UInt64 => PhysicalType2::UInt64, + DataType::UInt128 => PhysicalType2::UInt128, + DataType::Float16 => PhysicalType2::Float16, + DataType::Float32 => PhysicalType2::Float32, + DataType::Float64 => PhysicalType2::Float64, + DataType::Decimal64(_) => PhysicalType2::Int64, + DataType::Decimal128(_) => PhysicalType2::Int128, + DataType::Timestamp(_) => PhysicalType2::Int64, + DataType::Date32 => PhysicalType2::Int32, + DataType::Date64 => PhysicalType2::Int64, + DataType::Interval => PhysicalType2::Interval, + DataType::Utf8 => PhysicalType2::Utf8, + DataType::Binary => PhysicalType2::Binary, + DataType::Struct(_) => not_implemented!("struct data type to physical type"), + DataType::List(_) => PhysicalType2::List, }) } diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index 26e2f30bd..5c1b20fea 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -1,28 +1,28 @@ use rayexec_error::{RayexecError, Result}; -use super::{AggregateState, RowToStateMapping}; -use crate::arrays::array::Array; -use crate::arrays::executor::physical_type::PhysicalStorage; +use super::{AggregateState2, RowToStateMapping}; +use crate::arrays::array::Array2; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::check_validity; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; /// Updates aggregate states for an aggregate that accepts two inputs. #[derive(Debug, Clone, Copy)] -pub struct BinaryNonNullUpdater; +pub struct BinaryNonNullUpdater2; -impl BinaryNonNullUpdater { +impl BinaryNonNullUpdater2 { pub fn update<'a, S1, S2, I, State, Output>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, mapping: I, states: &mut [State], ) -> Result<()> where - S1: PhysicalStorage, - S2: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, I: IntoIterator, - State: AggregateState<(S1::Type<'a>, S2::Type<'a>), Output>, + State: AggregateState2<(S1::Type<'a>, S2::Type<'a>), Output>, { if array1.logical_len() != array2.logical_len() { return Err(RayexecError::new(format!( @@ -77,7 +77,7 @@ impl BinaryNonNullUpdater { #[cfg(test)] mod tests { use super::*; - use crate::arrays::executor::physical_type::PhysicalI32; + use crate::arrays::executor::physical_type::PhysicalI32_2; // SUM(col) + PRODUCT(col) #[derive(Debug)] @@ -92,7 +92,7 @@ mod tests { } } - impl AggregateState<(i32, i32), i32> for TestAddSumAndProductState { + impl AggregateState2<(i32, i32), i32> for TestAddSumAndProductState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.product *= other.product; @@ -113,8 +113,8 @@ mod tests { #[test] fn binary_primitive_single_state() { let mut states = [TestAddSumAndProductState::default()]; - let array1 = Array::from_iter([1, 2, 3, 4, 5]); - let array2 = Array::from_iter([6, 7, 8, 9, 10]); + let array1 = Array2::from_iter([1, 2, 3, 4, 5]); + let array2 = Array2::from_iter([6, 7, 8, 9, 10]); let mapping = [ RowToStateMapping { @@ -131,7 +131,7 @@ mod tests { }, ]; - BinaryNonNullUpdater::update::( + BinaryNonNullUpdater2::update::( &array1, &array2, mapping, diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs index cfacfefd9..f8e399a04 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs @@ -11,14 +11,14 @@ use rayexec_error::Result; pub use unary::*; use super::builder::{ArrayBuilder, ArrayDataBuffer}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; /// State for a single group's aggregate. /// /// An example state for SUM would be a struct that takes a running sum from /// values provided in `update`. -pub trait AggregateState: Debug { +pub trait AggregateState2: Debug { /// Merge other state into this state. fn merge(&mut self, other: &mut Self) -> Result<()>; @@ -39,9 +39,9 @@ pub struct RowToStateMapping { } #[derive(Debug, Clone, Copy)] -pub struct StateCombiner; +pub struct StateCombiner2; -impl StateCombiner { +impl StateCombiner2 { /// Combine states, merging states from `consume` into `targets`. /// /// `mapping` provides a mapping of consume states to the target index. The @@ -53,7 +53,7 @@ impl StateCombiner { targets: &mut [State], ) -> Result<()> where - State: AggregateState, + State: AggregateState2, { for mapping in mapping { let target = &mut targets[mapping.to_state]; @@ -72,12 +72,12 @@ impl StateFinalizer { pub fn finalize<'a, State, I, B, Input, Output>( states: I, mut builder: ArrayBuilder, - ) -> Result + ) -> Result where B: ArrayDataBuffer, I: IntoIterator, I::IntoIter: ExactSizeIterator, - State: AggregateState + 'a, + State: AggregateState2 + 'a, Output: Borrow, { let states = states.into_iter(); @@ -99,7 +99,7 @@ impl StateFinalizer { Some(validities.into()) }; - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: validities, diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index 512f4e1e2..d94e56fec 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -1,25 +1,25 @@ use rayexec_error::Result; -use super::{AggregateState, RowToStateMapping}; -use crate::arrays::array::Array; -use crate::arrays::executor::physical_type::PhysicalStorage; +use super::{AggregateState2, RowToStateMapping}; +use crate::arrays::array::Array2; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; /// Updates aggregate states for an aggregate that accepts one input. #[derive(Debug, Clone, Copy)] -pub struct UnaryNonNullUpdater; +pub struct UnaryNonNullUpdater2; -impl UnaryNonNullUpdater { +impl UnaryNonNullUpdater2 { pub fn update<'a, S, I, State, Output>( - array: &'a Array, + array: &'a Array2, mapping: I, states: &mut [State], ) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, I: IntoIterator, - State: AggregateState, Output>, + State: AggregateState2, Output>, { let selection = array.selection_vector(); @@ -60,14 +60,14 @@ impl UnaryNonNullUpdater { #[cfg(test)] mod tests { use super::*; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; #[derive(Debug, Default)] struct TestSumState { val: i32, } - impl AggregateState for TestSumState { + impl AggregateState2 for TestSumState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.val += other.val; Ok(()) @@ -86,7 +86,7 @@ mod tests { #[test] fn unary_primitive_single_state() { let mut states = [TestSumState::default()]; - let array = Array::from_iter([1, 2, 3, 4, 5]); + let array = Array2::from_iter([1, 2, 3, 4, 5]); let mapping = [ RowToStateMapping { from_row: 1, @@ -102,7 +102,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(11, states[0].val); } @@ -110,7 +111,7 @@ mod tests { #[test] fn unary_primitive_single_state_skip_null() { let mut states = [TestSumState::default()]; - let array = Array::from_iter([Some(1), Some(2), Some(3), None, Some(5)]); + let array = Array2::from_iter([Some(1), Some(2), Some(3), None, Some(5)]); let mapping = [ RowToStateMapping { from_row: 1, @@ -126,7 +127,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(7, states[0].val); } @@ -134,7 +136,7 @@ mod tests { #[test] fn unary_primitive_multiple_state() { let mut states = [TestSumState::default(), TestSumState::default()]; - let array = Array::from_iter([1, 2, 3, 4, 5]); + let array = Array2::from_iter([1, 2, 3, 4, 5]); let mapping = [ RowToStateMapping { from_row: 1, @@ -154,7 +156,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(5, states[0].val); assert_eq!(7, states[1].val); @@ -165,7 +168,7 @@ mod tests { buf: String, } - impl AggregateState<&str, String> for TestStringAgg { + impl AggregateState2<&str, String> for TestStringAgg { fn merge(&mut self, other: &mut Self) -> Result<()> { self.buf.push_str(&other.buf); Ok(()) @@ -185,7 +188,7 @@ mod tests { fn unary_str_single_state() { // Test just checks to ensure working with varlen is sane. let mut states = [TestStringAgg::default()]; - let array = Array::from_iter(["aa", "bbb", "cccc"]); + let array = Array2::from_iter(["aa", "bbb", "cccc"]); let mapping = [ RowToStateMapping { from_row: 0, @@ -201,7 +204,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!("aabbbcccc", &states[0].buf); } diff --git a/crates/rayexec_execution/src/arrays/executor/builder.rs b/crates/rayexec_execution/src/arrays/executor/builder.rs index a634dc552..57c46a2f7 100644 --- a/crates/rayexec_execution/src/arrays/executor/builder.rs +++ b/crates/rayexec_execution/src/arrays/executor/builder.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; use std::sync::Arc; use super::physical_type::{AsBytes, VarlenType}; -use crate::arrays::array::{ArrayData, BinaryData}; +use crate::arrays::array::{ArrayData2, BinaryData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::storage::{ @@ -59,7 +59,7 @@ pub trait ArrayDataBuffer { fn put(&mut self, idx: usize, val: &Self::Type); /// Convert the buffer into array data. - fn into_data(self) -> ArrayData; + fn into_data(self) -> ArrayData2; } #[derive(Debug)] @@ -99,8 +99,8 @@ impl ArrayDataBuffer for BooleanBuffer { self.values.set_unchecked(idx, *val) } - fn into_data(self) -> ArrayData { - ArrayData::Boolean(Arc::new(BooleanStorage(self.values))) + fn into_data(self) -> ArrayData2 { + ArrayData2::Boolean(Arc::new(BooleanStorage(self.values))) } } @@ -125,7 +125,7 @@ impl ArrayDataBuffer for PrimitiveBuffer where T: Copy, Vec: Into>, - ArrayData: From>, + ArrayData2: From>, { type Type = T; @@ -137,7 +137,7 @@ where self.values[idx] = *val } - fn into_data(self) -> ArrayData { + fn into_data(self) -> ArrayData2 { PrimitiveStorage::from(self.values).into() } } @@ -241,13 +241,13 @@ where } } - fn into_data(self) -> ArrayData { + fn into_data(self) -> ArrayData2 { let storage = GermanVarlenStorage { metadata: self.metadata.into(), data: self.data.into(), }; - ArrayData::Binary(BinaryData::German(Arc::new(storage))) + ArrayData2::Binary(BinaryData::German(Arc::new(storage))) } } diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index 516b9831f..c25a05926 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use super::builder::{ArrayDataBuffer, BooleanBuffer, GermanVarlenBuffer, PrimitiveBuffer}; -use crate::arrays::array::{Array, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData2, BinaryData}; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{ AddressableStorage, @@ -20,7 +20,7 @@ use crate::arrays::storage::{ }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PhysicalType { +pub enum PhysicalType2 { UntypedNull, Boolean, Int8, @@ -42,8 +42,8 @@ pub enum PhysicalType { List, } -impl PhysicalType { - pub fn zeroed_array_data(&self, len: usize) -> ArrayData { +impl PhysicalType2 { + pub fn zeroed_array_data(&self, len: usize) -> ArrayData2 { match self { Self::UntypedNull => UntypedNullStorage(len).into(), Self::Boolean => BooleanBuffer::with_len(len).into_data(), @@ -65,14 +65,14 @@ impl PhysicalType { Self::Utf8 => GermanVarlenBuffer::::with_len(len).into_data(), Self::List => ListStorage { metadata: vec![ListItemMetadata::default(); len].into(), - array: Array::new_untyped_null_array(0), + array: Array2::new_untyped_null_array(0), } .into(), } } } -impl ProtoConv for PhysicalType { +impl ProtoConv for PhysicalType2 { type ProtoType = rayexec_proto::generated::physical_type::PhysicalType; fn to_proto(&self) -> Result { @@ -177,14 +177,19 @@ impl VarlenType for [u8] { /// /// Contains a lifetime to enable tying the returned storage to the provided /// array data. -pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { +pub trait PhysicalStorage2: Debug + Sync + Send + Clone + Copy + 'static { + // /// The type that's stored in the primary buffer. + // /// + // /// This should be small and fixed sized. + // type PrimaryBufferType: Sized + Debug + Default + Sync + Send + Clone + Copy; + /// The type that gets returned from the underlying array storage. type Type<'a>: Sync + Send; /// The type of the underlying array storage. type Storage<'a>: AddressableStorage>; /// Gets the storage for the array that we can access directly. - fn get_storage(data: &ArrayData) -> Result>; + fn get_storage(data: &ArrayData2) -> Result>; } /// Type that's able to be used for any physical type. @@ -195,11 +200,11 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { #[derive(Debug, Clone, Copy)] pub struct PhysicalAny; -impl PhysicalStorage for PhysicalAny { +impl PhysicalStorage2 for PhysicalAny { type Type<'a> = (); type Storage<'a> = UnitStorage; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { Ok(UnitStorage(data.len())) } } @@ -227,255 +232,255 @@ impl AddressableStorage for UnitStorage { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalUntypedNull; +pub struct PhysicalUntypedNull_2; -impl PhysicalStorage for PhysicalUntypedNull { +impl PhysicalStorage2 for PhysicalUntypedNull_2 { type Type<'a> = UntypedNull; type Storage<'a> = UntypedNullStorage; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UntypedNull(s) => Ok(*s), + ArrayData2::UntypedNull(s) => Ok(*s), _ => Err(RayexecError::new("invalid storage")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalBool; +pub struct PhysicalBool_2; -impl PhysicalStorage for PhysicalBool { +impl PhysicalStorage2 for PhysicalBool_2 { type Type<'a> = bool; type Storage<'a> = BooleanStorageRef<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Boolean(storage) => Ok(storage.as_boolean_storage_ref()), + ArrayData2::Boolean(storage) => Ok(storage.as_boolean_storage_ref()), _ => Err(RayexecError::new("invalid storage, expected boolean")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI8; +pub struct PhysicalI8_2; -impl PhysicalStorage for PhysicalI8 { +impl PhysicalStorage2 for PhysicalI8_2 { type Type<'a> = i8; type Storage<'a> = PrimitiveStorageSlice<'a, i8>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int8(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int8(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int8")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI16; +pub struct PhysicalI16_2; -impl PhysicalStorage for PhysicalI16 { +impl PhysicalStorage2 for PhysicalI16_2 { type Type<'a> = i16; type Storage<'a> = PrimitiveStorageSlice<'a, i16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int16")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI32; +pub struct PhysicalI32_2; -impl PhysicalStorage for PhysicalI32 { +impl PhysicalStorage2 for PhysicalI32_2 { type Type<'a> = i32; type Storage<'a> = PrimitiveStorageSlice<'a, i32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int32")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI64; +pub struct PhysicalI64_2; -impl PhysicalStorage for PhysicalI64 { +impl PhysicalStorage2 for PhysicalI64_2 { type Type<'a> = i64; type Storage<'a> = PrimitiveStorageSlice<'a, i64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int64")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI128; +pub struct PhysicalI128_2; -impl PhysicalStorage for PhysicalI128 { +impl PhysicalStorage2 for PhysicalI128_2 { type Type<'a> = i128; type Storage<'a> = PrimitiveStorageSlice<'a, i128>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int128(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int128(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int128")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU8; +pub struct PhysicalU8_2; -impl PhysicalStorage for PhysicalU8 { +impl PhysicalStorage2 for PhysicalU8_2 { type Type<'a> = u8; type Storage<'a> = PrimitiveStorageSlice<'a, u8>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt8(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt8(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u8")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU16; +pub struct PhysicalU16_2; -impl PhysicalStorage for PhysicalU16 { +impl PhysicalStorage2 for PhysicalU16_2 { type Type<'a> = u16; type Storage<'a> = PrimitiveStorageSlice<'a, u16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u16")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU32; +pub struct PhysicalU32_2; -impl PhysicalStorage for PhysicalU32 { +impl PhysicalStorage2 for PhysicalU32_2 { type Type<'a> = u32; type Storage<'a> = PrimitiveStorageSlice<'a, u32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u32")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU64; +pub struct PhysicalU64_2; -impl PhysicalStorage for PhysicalU64 { +impl PhysicalStorage2 for PhysicalU64_2 { type Type<'a> = u64; type Storage<'a> = PrimitiveStorageSlice<'a, u64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u64")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU128; +pub struct PhysicalU128_2; -impl PhysicalStorage for PhysicalU128 { +impl PhysicalStorage2 for PhysicalU128_2 { type Type<'a> = u128; type Storage<'a> = PrimitiveStorageSlice<'a, u128>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt128(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt128(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u128")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF16; +pub struct PhysicalF16_2; -impl PhysicalStorage for PhysicalF16 { +impl PhysicalStorage2 for PhysicalF16_2 { type Type<'a> = f16; type Storage<'a> = PrimitiveStorageSlice<'a, f16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f32")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF32; +pub struct PhysicalF32_2; -impl PhysicalStorage for PhysicalF32 { +impl PhysicalStorage2 for PhysicalF32_2 { type Type<'a> = f32; type Storage<'a> = PrimitiveStorageSlice<'a, f32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f32")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF64; +pub struct PhysicalF64_2; -impl PhysicalStorage for PhysicalF64 { +impl PhysicalStorage2 for PhysicalF64_2 { type Type<'a> = f64; type Storage<'a> = PrimitiveStorageSlice<'a, f64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f64")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalInterval; +pub struct PhysicalInterval_2; -impl PhysicalStorage for PhysicalInterval { +impl PhysicalStorage2 for PhysicalInterval_2 { type Type<'a> = Interval; type Storage<'a> = PrimitiveStorageSlice<'a, Interval>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Interval(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Interval(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected interval")), } } } #[derive(Debug, Clone, Copy)] -pub struct PhysicalBinary; +pub struct PhysicalBinary_2; -impl PhysicalStorage for PhysicalBinary { +impl PhysicalStorage2 for PhysicalBinary_2 { type Type<'a> = &'a [u8]; type Storage<'a> = BinaryDataStorage<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Binary(binary) => match binary { + ArrayData2::Binary(binary) => match binary { BinaryData::Binary(b) => { Ok(BinaryDataStorage::Binary(b.as_contiguous_storage_slice())) } @@ -490,15 +495,15 @@ impl PhysicalStorage for PhysicalBinary { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalUtf8; +pub struct PhysicalUtf8_2; -impl PhysicalStorage for PhysicalUtf8 { +impl PhysicalStorage2 for PhysicalUtf8_2 { type Type<'a> = &'a str; type Storage<'a> = StrDataStorage<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Binary(binary) => match binary { + ArrayData2::Binary(binary) => match binary { BinaryData::Binary(b) => { Ok(BinaryDataStorage::Binary(b.as_contiguous_storage_slice()).into()) } @@ -584,15 +589,15 @@ impl<'a> From> for StrDataStorage<'a> { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalList; +pub struct PhysicalList_2; -impl PhysicalStorage for PhysicalList { +impl PhysicalStorage2 for PhysicalList_2 { type Type<'a> = ListItemMetadata; type Storage<'a> = PrimitiveStorageSlice<'a, ListItemMetadata>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::List(storage) => Ok(storage.metadata.as_primitive_storage_slice()), + ArrayData2::List(storage) => Ok(storage.metadata.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected list")), } } diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs index f289147c9..5d8ea7566 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs @@ -1,28 +1,28 @@ use rayexec_error::Result; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct BinaryExecutor; +pub struct BinaryExecutor2; -impl BinaryExecutor { +impl BinaryExecutor2 { pub fn execute<'a, S1, S2, B, Op>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, &mut OutputBuffer), - S1: PhysicalStorage, - S2: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array1)?; @@ -81,7 +81,7 @@ impl BinaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -97,20 +97,20 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{GermanVarlenBuffer, PrimitiveBuffer}; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; use crate::arrays::scalar::ScalarValue; #[test] fn binary_simple_add() { - let left = Array::from_iter([1, 2, 3]); - let right = Array::from_iter([4, 5, 6]); + let left = Array2::from_iter([1, 2, 3]); + let right = Array2::from_iter([4, 5, 6]); let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::::with_len(3), }; - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -125,8 +125,8 @@ mod tests { #[test] fn binary_string_repeat() { - let left = Array::from_iter([1, 2, 3]); - let right = Array::from_iter(["hello", "world", "goodbye!"]); + let left = Array2::from_iter([1, 2, 3]); + let right = Array2::from_iter(["hello", "world", "goodbye!"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -134,7 +134,7 @@ mod tests { }; let mut string_buf = String::new(); - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -162,13 +162,13 @@ mod tests { #[test] fn binary_add_with_invalid() { // Make left constant null. - let mut left = Array::from_iter([1]); + let mut left = Array2::from_iter([1]); left.put_selection(SelectionVector::repeated(3, 0)); left.set_physical_validity(0, false); - let right = Array::from_iter([2, 3, 4]); + let right = Array2::from_iter([2, 3, 4]); - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, ArrayBuilder { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 8029004ba..c97608c68 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ @@ -13,28 +13,28 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalList, - PhysicalStorage, - PhysicalType, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalList_2, + PhysicalStorage2, + PhysicalType2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection; use crate::arrays::storage::{ AddressableStorage, @@ -81,9 +81,9 @@ where /// /// `fill_map` is an iterator of mappings that map indices from `array` to /// where they should be placed in the buffer. - pub fn fill<'a, S, I>(&mut self, array: &'a Array, fill_map: I) -> Result<()> + pub fn fill<'a, S, I>(&mut self, array: &'a Array2, fill_map: I) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, I: IntoIterator, S::Type<'a>: Borrow<::Type>, { @@ -118,14 +118,14 @@ where Ok(()) } - pub fn finish(self) -> Array { + pub fn finish(self) -> Array2 { let validity = if self.validity.is_all_true() { None } else { Some(self.validity.into()) }; - Array { + Array2 { datatype: self.builder.datatype, selection: None, validity, @@ -135,7 +135,7 @@ where } /// Concatenate multiple arrays into a single array. -pub fn concat(arrays: &[&Array]) -> Result { +pub fn concat(arrays: &[&Array2]) -> Result { let total_len: usize = arrays.iter().map(|a| a.logical_len()).sum(); concat_with_exact_total_len(arrays, total_len) } @@ -146,147 +146,147 @@ pub fn concat(arrays: &[&Array]) -> Result { /// /// This function exists so that we can compute the total length once for a set /// of batches that we're concatenating instead of once per array. -pub(crate) fn concat_with_exact_total_len(arrays: &[&Array], total_len: usize) -> Result { +pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) -> Result { let datatype = match arrays.first() { Some(arr) => arr.datatype(), None => return Err(RayexecError::new("Cannot concat zero arrays")), }; - match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array { + match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(total_len).into(), }), - PhysicalType::Boolean => { + PhysicalType2::Boolean => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: BooleanBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Interval => { + PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } - PhysicalType::List => concat_lists(datatype.clone(), arrays, total_len), + PhysicalType2::List => concat_lists(datatype.clone(), arrays, total_len), } } -fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Result { +fn concat_lists(datatype: DataType, arrays: &[&Array2], total_len: usize) -> Result { let inner_arrays = arrays .iter() .map(|arr| match arr.array_data() { - ArrayData::List(list) => { + ArrayData2::List(list) => { if list.array.has_selection() { return Err(RayexecError::new("List child array has selection")); } @@ -308,18 +308,21 @@ fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Resu let mut acc_rows = 0; for (array, child_array) in arrays.iter().zip(inner_arrays) { - UnaryExecutor::for_each::(array, |_row_num, metadata| match metadata { - Some(metadata) => { - metadatas.push(ListItemMetadata { - offset: metadata.offset + acc_rows, - len: metadata.len, - }); - } - None => { - metadatas.push(ListItemMetadata::default()); - validity.set_unchecked(metadatas.len() - 1, false); - } - })?; + UnaryExecutor2::for_each::( + array, + |_row_num, metadata| match metadata { + Some(metadata) => { + metadatas.push(ListItemMetadata { + offset: metadata.offset + acc_rows, + len: metadata.len, + }); + } + None => { + metadatas.push(ListItemMetadata::default()); + validity.set_unchecked(metadatas.len() - 1, false); + } + }, + )?; acc_rows += child_array.logical_len() as i32; } @@ -329,7 +332,7 @@ fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Resu array: concatenated, }; - Ok(Array { + Ok(Array2 { datatype, selection: None, validity: Some(validity.into()), @@ -338,11 +341,11 @@ fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Resu } fn concat_with_fill_state<'a, S, B>( - arrays: &'a [&Array], + arrays: &'a [&Array2], mut fill_state: FillState, -) -> Result +) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow<::Type>, { @@ -370,139 +373,139 @@ where /// array. /// /// Indices may be specified more than once. -pub fn interleave(arrays: &[&Array], indices: &[(usize, usize)]) -> Result { +pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result { let datatype = match arrays.first() { Some(arr) => arr.datatype(), None => return Err(RayexecError::new("Cannot interleave zero arrays")), }; - match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array { + match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(indices.len()).into(), }), - PhysicalType::Boolean => { + PhysicalType2::Boolean => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: BooleanBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Interval => { + PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::List => { + PhysicalType2::List => { // TODO: Also doable Err(RayexecError::new( "interleaving list arrays not yet supported", @@ -512,12 +515,12 @@ pub fn interleave(arrays: &[&Array], indices: &[(usize, usize)]) -> Result( - arrays: &'a [&Array], + arrays: &'a [&Array2], indices: &[(usize, usize)], mut fill_state: FillState, -) -> Result +) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow<::Type>, { @@ -550,7 +553,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::PrimitiveBuffer; - use crate::arrays::executor::physical_type::PhysicalI32; + use crate::arrays::executor::physical_type::PhysicalI32_2; use crate::arrays::scalar::ScalarValue; #[test] @@ -560,14 +563,14 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 0, to: 0 }, FillMapping { from: 1, to: 1 }, FillMapping { from: 2, to: 2 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -583,14 +586,14 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 1, to: 0 }, FillMapping { from: 1, to: 1 }, FillMapping { from: 1, to: 2 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -606,14 +609,14 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 0, to: 1 }, FillMapping { from: 1, to: 2 }, FillMapping { from: 2, to: 0 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -629,21 +632,21 @@ mod tests { buffer: PrimitiveBuffer::::with_len(6), }); - let arr1 = Array::from_iter([4, 5, 6]); + let arr1 = Array2::from_iter([4, 5, 6]); let mapping1 = [ FillMapping { from: 0, to: 2 }, FillMapping { from: 1, to: 4 }, FillMapping { from: 2, to: 0 }, ]; - state.fill::(&arr1, mapping1).unwrap(); + state.fill::(&arr1, mapping1).unwrap(); - let arr2 = Array::from_iter([7, 8, 9]); + let arr2 = Array2::from_iter([7, 8, 9]); let mapping2 = [ FillMapping { from: 0, to: 1 }, FillMapping { from: 1, to: 3 }, FillMapping { from: 2, to: 5 }, ]; - state.fill::(&arr2, mapping2).unwrap(); + state.fill::(&arr2, mapping2).unwrap(); let got = state.finish(); @@ -657,8 +660,8 @@ mod tests { #[test] fn interleave_2() { - let arr1 = Array::from_iter([4, 5, 6]); - let arr2 = Array::from_iter([7, 8, 9]); + let arr1 = Array2::from_iter([4, 5, 6]); + let arr2 = Array2::from_iter([7, 8, 9]); let indices = [(0, 1), (0, 2), (1, 0), (1, 1), (0, 0), (1, 2)]; @@ -674,8 +677,8 @@ mod tests { #[test] fn interleave_2_repeated() { - let arr1 = Array::from_iter([4, 5]); - let arr2 = Array::from_iter([7, 8]); + let arr1 = Array2::from_iter([4, 5]); + let arr2 = Array2::from_iter([7, 8]); let indices = [(0, 1), (1, 1), (0, 1), (1, 1)]; @@ -689,8 +692,8 @@ mod tests { #[test] fn concat_2() { - let arr1 = Array::from_iter([4, 5, 6]); - let arr2 = Array::from_iter([7, 8]); + let arr1 = Array2::from_iter([4, 5, 6]); + let arr2 = Array2::from_iter([7, 8]); let got = concat(&[&arr1, &arr2]).unwrap(); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 548bcb850..32fb4485f 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -2,28 +2,28 @@ use ahash::RandomState; use half::f16; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalList, - PhysicalStorage, - PhysicalType, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, - PhysicalUtf8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalList_2, + PhysicalStorage2, + PhysicalType2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, }; use crate::arrays::scalar::interval::Interval; use crate::arrays::selection; @@ -38,63 +38,63 @@ pub struct HashExecutor; impl HashExecutor { /// Hashes the given array values, combining them with the existing hashes /// in `hashes`. - pub fn hash_combine(array: &Array, hashes: &mut [u64]) -> Result<()> { + pub fn hash_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { - PhysicalType::UntypedNull => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UntypedNull => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Boolean => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Boolean => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int128 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int128 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt128 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt128 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Binary => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Binary => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Utf8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Utf8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Interval => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Interval => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::List => Self::hash_list::(array, hashes)?, + PhysicalType2::List => Self::hash_list::(array, hashes)?, } Ok(()) @@ -102,69 +102,69 @@ impl HashExecutor { /// Hash the given array and write the values into `hashes`, overwriting any /// existing values. - pub fn hash_no_combine(array: &Array, hashes: &mut [u64]) -> Result<()> { + pub fn hash_no_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { - PhysicalType::UntypedNull => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UntypedNull => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Boolean => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Boolean => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int128 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Int128 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt128 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::UInt128 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float16 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float16 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float32 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float32 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float64 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Float64 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Binary => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Binary => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Utf8 => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Utf8 => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::Interval => { - Self::hash_one_inner::(array, hashes)? + PhysicalType2::Interval => { + Self::hash_one_inner::(array, hashes)? } - PhysicalType::List => Self::hash_list::(array, hashes)?, + PhysicalType2::List => Self::hash_list::(array, hashes)?, } Ok(()) } - pub fn hash_many<'b>(arrays: &[Array], hashes: &'b mut [u64]) -> Result<&'b mut [u64]> { + pub fn hash_many<'b>(arrays: &[Array2], hashes: &'b mut [u64]) -> Result<&'b mut [u64]> { for (idx, array) in arrays.iter().enumerate() { let combine_hash = idx > 0; @@ -178,9 +178,9 @@ impl HashExecutor { Ok(hashes) } - fn hash_one_inner<'a, 'b, S, H>(array: &'a Array, hashes: &'b mut [u64]) -> Result<()> + fn hash_one_inner<'a, 'b, S, H>(array: &'a Array2, hashes: &'b mut [u64]) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: HashValue, H: SetHash, { @@ -215,12 +215,12 @@ impl HashExecutor { Ok(()) } - fn hash_list(array: &Array, hashes: &mut [u64]) -> Result<()> + fn hash_list(array: &Array2, hashes: &mut [u64]) -> Result<()> where H: SetHash, { let inner = match array.array_data() { - ArrayData::List(list) => &list.array, + ArrayData2::List(list) => &list.array, other => { return Err(RayexecError::new(format!( "Unexpected array data for list hashing: {:?}", @@ -233,7 +233,7 @@ impl HashExecutor { let mut list_hashes_buf = vec![0; inner.logical_len()]; Self::hash_no_combine(inner, &mut list_hashes_buf)?; - let metadata = PhysicalList::get_storage(&array.data)?; + let metadata = PhysicalList_2::get_storage(&array.data)?; let selection = array.selection_vector(); match array.validity() { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index f1b0ff63d..ec53a50de 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -1,9 +1,9 @@ use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage}; +use crate::arrays::executor::physical_type::{PhysicalList_2, PhysicalStorage2}; use crate::arrays::executor::scalar::{ can_skip_validity_check, check_validity, @@ -12,7 +12,7 @@ use crate::arrays::executor::scalar::{ use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::{AddressableStorage, ListItemMetadata}; -pub trait BinaryListReducer { +pub trait BinaryListReducer2 { fn new(left_len: i32, right_len: i32) -> Self; fn put_values(&mut self, v1: T, v2: T); fn finish(self) -> O; @@ -36,13 +36,13 @@ impl { /// Execute a reducer on two list arrays. pub fn binary_reduce<'a, S, B, R>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, mut builder: ArrayBuilder, - ) -> Result + ) -> Result where - R: BinaryListReducer, B::Type>, - S: PhysicalStorage, + R: BinaryListReducer2, B::Type>, + S: PhysicalStorage2, B: ArrayDataBuffer, ::Type: Sized, { @@ -56,8 +56,8 @@ impl let validity2 = array2.validity(); if can_skip_validity_check([validity1, validity2]) { - let metadata1 = PhysicalList::get_storage(array1.array_data())?; - let metadata2 = PhysicalList::get_storage(array2.array_data())?; + let metadata1 = PhysicalList_2::get_storage(array1.array_data())?; + let metadata2 = PhysicalList_2::get_storage(array2.array_data())?; let (values1, inner_validity1) = get_inner_array_storage::(array1)?; let (values2, inner_validity2) = get_inner_array_storage::(array2)?; @@ -95,7 +95,7 @@ impl builder.buffer.put(idx, &out); } - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: None, @@ -139,7 +139,7 @@ impl builder.buffer.put(idx, &out); } - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: None, @@ -168,12 +168,12 @@ impl /// Gets the inner array storage. Checks to ensure the inner array does not /// contain NULLs. -fn get_inner_array_storage(array: &Array) -> Result<(S::Storage<'_>, Option<&Bitmap>)> +fn get_inner_array_storage(array: &Array2) -> Result<(S::Storage<'_>, Option<&Bitmap>)> where - S: PhysicalStorage, + S: PhysicalStorage2, { match array.array_data() { - ArrayData::List(d) => { + ArrayData2::List(d) => { let storage = S::get_storage(d.array.array_data())?; let validity = d.array.validity(); Ok((storage, validity)) @@ -182,9 +182,9 @@ where } } -fn get_inner_array_selection(array: &Array) -> Result> { +fn get_inner_array_selection(array: &Array2) -> Result> { match array.array_data() { - ArrayData::List(d) => Ok(d.array.selection_vector()), + ArrayData2::List(d) => Ok(d.array.selection_vector()), _ => Err(RayexecError::new("Expected list array data")), } } diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs index e2477f180..96c2b07f6 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs @@ -35,7 +35,7 @@ pub use fill::*; use rayexec_error::{RayexecError, Result}; use super::builder::ArrayDataBuffer; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; #[inline] @@ -64,7 +64,7 @@ where /// array matches the logical length of some other array. /// /// Returns the logical length. -pub(crate) fn validate_logical_len(buffer: &B, array: &Array) -> Result +pub(crate) fn validate_logical_len(buffer: &B, array: &Array2) -> Result where B: ArrayDataBuffer, { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs index 7e7e37e2d..d7cd0ef61 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; -use crate::arrays::array::Array; -use crate::arrays::executor::physical_type::{PhysicalBool, PhysicalStorage}; +use crate::arrays::array::Array2; +use crate::arrays::executor::physical_type::{PhysicalBool_2, PhysicalStorage2}; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; @@ -12,14 +12,14 @@ impl SelectExecutor { /// Writes row selections to `output_sel`. /// /// Errors if the provided array isn't a boolean array. - pub fn select(bool_array: &Array, output_sel: &mut SelectionVector) -> Result<()> { + pub fn select(bool_array: &Array2, output_sel: &mut SelectionVector) -> Result<()> { output_sel.clear(); let selection = bool_array.selection_vector(); let len = bool_array.logical_len(); match bool_array.validity() { Some(validity) => { - let values = PhysicalBool::get_storage(&bool_array.data)?; + let values = PhysicalBool_2::get_storage(&bool_array.data)?; for idx in 0..len { let sel = selection::get(selection, idx); @@ -35,7 +35,7 @@ impl SelectExecutor { } } None => { - let values = PhysicalBool::get_storage(&bool_array.data)?; + let values = PhysicalBool_2::get_storage(&bool_array.data)?; for idx in 0..len { let sel = selection::get(selection, idx); @@ -58,7 +58,7 @@ mod tests { #[test] fn select_simple() { - let arr = Array::from_iter([false, true, true, false, true]); + let arr = Array2::from_iter([false, true, true, false, true]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap(); @@ -69,7 +69,7 @@ mod tests { #[test] fn select_with_nulls() { - let arr = Array::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); + let arr = Array2::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap(); @@ -80,7 +80,7 @@ mod tests { #[test] fn select_with_selection() { - let mut arr = Array::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); + let mut arr = Array2::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); // => [NULL, false, true] arr.select_mut(SelectionVector::from_iter([2, 3, 4])); @@ -93,7 +93,7 @@ mod tests { #[test] fn select_wrong_type() { - let arr = Array::from_iter([1, 2, 3, 4, 5]); + let arr = Array2::from_iter([1, 2, 3, 4, 5]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap_err(); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs index cdde8aee2..69fb87d77 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs @@ -3,30 +3,30 @@ use std::fmt::Debug; use rayexec_error::Result; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct TernaryExecutor; +pub struct TernaryExecutor2; -impl TernaryExecutor { +impl TernaryExecutor2 { pub fn execute<'a, S1, S2, S3, B, Op>( - array1: &'a Array, - array2: &'a Array, - array3: &'a Array, + array1: &'a Array2, + array2: &'a Array2, + array3: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, S3::Type<'a>, &mut OutputBuffer), - S1: PhysicalStorage, - S2: PhysicalStorage, - S3: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, + S3: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array1)?; @@ -97,7 +97,7 @@ impl TernaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs index 778ff7838..4e9b04b9b 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs @@ -1,26 +1,26 @@ use rayexec_error::Result; use super::validate_logical_len; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone)] -pub struct UnaryExecutor; +pub struct UnaryExecutor2; -impl UnaryExecutor { +impl UnaryExecutor2 { /// Executes `op` on every non-null input. pub fn execute<'a, S, B, Op>( - array: &'a Array, + array: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S::Type<'a>, &mut OutputBuffer), - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array)?; @@ -67,7 +67,7 @@ impl UnaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -80,10 +80,10 @@ impl UnaryExecutor { /// /// `op` is called for each logical entry in the array with the index and /// either Some(val) if the value is valid, or None if it's not. - pub fn for_each<'a, S, Op>(array: &'a Array, mut op: Op) -> Result<()> + pub fn for_each<'a, S, Op>(array: &'a Array2, mut op: Op) -> Result<()> where Op: FnMut(usize, Option>), - S: PhysicalStorage, + S: PhysicalStorage2, { let selection = array.selection_vector(); let len = array.logical_len(); @@ -119,9 +119,9 @@ impl UnaryExecutor { /// Gets the value some index in the array. /// /// Returns Some if the value is valid, None otherwise. - pub fn value_at(array: &Array, idx: usize) -> Result>> + pub fn value_at(array: &Array2, idx: usize) -> Result>> where - S: PhysicalStorage, + S: PhysicalStorage2, { let selection = array.selection_vector(); @@ -154,18 +154,18 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{GermanVarlenBuffer, PrimitiveBuffer}; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; use crate::arrays::scalar::ScalarValue; #[test] fn int32_inc_by_2() { - let array = Array::from_iter([1, 2, 3]); + let array = Array2::from_iter([1, 2, 3]); let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::::with_len(3), }; - let got = UnaryExecutor::execute::(&array, builder, |v, buf| { + let got = UnaryExecutor2::execute::(&array, builder, |v, buf| { buf.put(&(v + 2)) }) .unwrap(); @@ -179,7 +179,7 @@ mod tests { fn string_double_named_func() { // Example with defined function, and allocating a new string every time. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -194,8 +194,9 @@ mod tests { buf.put(&double) } - let got = UnaryExecutor::execute::(&array, builder, my_string_double) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_double) + .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bbbb"), got.physical_scalar(1).unwrap()); @@ -210,7 +211,7 @@ mod tests { fn string_double_closure() { // Example with closure that reuses a string. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -227,8 +228,9 @@ mod tests { buf.put(buffer.as_str()) }; - let got = UnaryExecutor::execute::(&array, builder, my_string_double) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_double) + .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bbbb"), got.physical_scalar(1).unwrap()); @@ -243,7 +245,7 @@ mod tests { fn string_trunc_closure() { // Example with closure returning referencing to input. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -254,8 +256,9 @@ mod tests { buf.put(s.get(0..len).unwrap_or("")) }; - let got = UnaryExecutor::execute::(&array, builder, my_string_truncate) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_truncate) + .unwrap(); assert_eq!(ScalarValue::from("a"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bb"), got.physical_scalar(1).unwrap()); @@ -268,7 +271,7 @@ mod tests { // Example with selection vector whose logical length is greater than // the underlying physical data len. - let mut array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let mut array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let mut selection = SelectionVector::with_range(0..5); selection.set_unchecked(0, 3); selection.set_unchecked(1, 3); @@ -288,7 +291,7 @@ mod tests { }; let got = - UnaryExecutor::execute::(&array, builder, my_string_uppercase) + UnaryExecutor2::execute::(&array, builder, my_string_uppercase) .unwrap(); assert_eq!(ScalarValue::from("DDDD"), got.physical_scalar(0).unwrap()); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index 7a7b01083..3b0362b6a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -1,26 +1,26 @@ use rayexec_error::{RayexecError, Result}; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct UniformExecutor; +pub struct UniformExecutor2; -impl UniformExecutor { +impl UniformExecutor2 { pub fn execute<'a, S, B, Op>( - arrays: &[&'a Array], + arrays: &[&'a Array2], builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(&[S::Type<'a>], &mut OutputBuffer), - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, { let len = match arrays.first() { @@ -95,7 +95,7 @@ impl UniformExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -111,14 +111,14 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::GermanVarlenBuffer; - use crate::arrays::executor::physical_type::PhysicalUtf8; + use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::scalar::ScalarValue; #[test] fn uniform_string_concat_row_wise() { - let first = Array::from_iter(["a", "b", "c"]); - let second = Array::from_iter(["1", "2", "3"]); - let third = Array::from_iter(["dog", "cat", "horse"]); + let first = Array2::from_iter(["a", "b", "c"]); + let second = Array2::from_iter(["1", "2", "3"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -127,7 +127,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -150,10 +150,10 @@ mod tests { #[test] fn uniform_string_concat_row_wise_with_invalid() { - let first = Array::from_iter(["a", "b", "c"]); - let mut second = Array::from_iter(["1", "2", "3"]); + let first = Array2::from_iter(["a", "b", "c"]); + let mut second = Array2::from_iter(["1", "2", "3"]); second.set_physical_validity(1, false); // "2" => NULL - let third = Array::from_iter(["dog", "cat", "horse"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -162,7 +162,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -182,11 +182,11 @@ mod tests { #[test] fn uniform_string_concat_row_wise_with_invalid_and_reordered() { - let first = Array::from_iter(["a", "b", "c"]); - let mut second = Array::from_iter(["1", "2", "3"]); + let first = Array2::from_iter(["a", "b", "c"]); + let mut second = Array2::from_iter(["1", "2", "3"]); second.select_mut(SelectionVector::from_iter([1, 0, 2])); // ["1", "2", "3"] => ["2", "1", "3"] second.set_physical_validity(1, false); // "2" => NULL, referencing physical index - let third = Array::from_iter(["dog", "cat", "horse"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -195,7 +195,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs new file mode 100644 index 000000000..139cb5652 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs @@ -0,0 +1,124 @@ +use rayexec_error::Result; +use stdutil::iter::IntoExactSizeIterator; + +use super::AggregateState; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{Addressable, PhysicalStorage}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct BinaryNonNullUpdater; + +impl BinaryNonNullUpdater { + pub fn update( + array1: &Array, + array2: &Array, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + Output: ?Sized, + for<'a> State: AggregateState<(&'a S1::StorageType, &'a S2::StorageType), Output>, + { + // TODO: Dictionary + + // TODO: Length check. + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + if validity1.all_valid() && validity2.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let val1 = input1.get(input_idx).unwrap(); + let val2 = input2.get(input_idx).unwrap(); + + let state = &mut states[state_idx]; + + state.update((val1, val2))?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + if !validity1.is_valid(input_idx) || !validity2.is_valid(input_idx) { + continue; + } + + let val1 = input1.get(input_idx).unwrap(); + let val2 = input2.get(input_idx).unwrap(); + + let state = &mut states[state_idx]; + + state.update((val1, val2))?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalI32}; + use crate::arrays::executor_exp::PutBuffer; + + // SUM(col) + PRODUCT(col) + #[derive(Debug)] + struct TestAddSumAndProductState { + sum: i32, + product: i32, + } + + impl Default for TestAddSumAndProductState { + fn default() -> Self { + TestAddSumAndProductState { sum: 0, product: 1 } + } + } + + impl AggregateState<(&i32, &i32), i32> for TestAddSumAndProductState { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.sum += other.sum; + self.product *= other.product; + Ok(()) + } + + fn update(&mut self, (&i1, &i2): (&i32, &i32)) -> Result<()> { + self.sum += i1; + self.product *= i2; + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&(self.sum + self.product)); + Ok(()) + } + } + + #[test] + fn binary_primitive_single_state() { + let mut states = [TestAddSumAndProductState::default()]; + let array1 = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + let array2 = Array::try_from_iter([6, 7, 8, 9, 10]).unwrap(); + + BinaryNonNullUpdater::update::( + &array1, + &array2, + [1, 3, 4], + [0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(11, states[0].sum); + assert_eq!(630, states[0].product); + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs new file mode 100644 index 000000000..443ad1874 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs @@ -0,0 +1,53 @@ +pub mod binary; +pub mod unary; + +use std::fmt::Debug; + +use rayexec_error::Result; + +use super::PutBuffer; +use crate::arrays::buffer::physical_type::AddressableMut; + +/// State for a single group's aggregate. +/// +/// An example state for SUM would be a struct that takes a running sum from +/// values provided in `update`. +pub trait AggregateState: Debug { + /// Merge other state into this state. + fn merge(&mut self, other: &mut Self) -> Result<()>; + + /// Update this state with some input. + fn update(&mut self, input: Input) -> Result<()>; + + /// Produce a single value from the state, along with a bool indicating if + /// the value is valid. + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut; +} + +#[derive(Debug, Clone, Copy)] +pub struct StateCombiner; + +impl StateCombiner { + /// Combine states, merging states from `consume` into `targets`. + /// + /// `mapping` provides (from, to) mappings between `consume` and `targets`. + pub fn combine( + consume: &mut [State], + mapping: impl IntoIterator, + targets: &mut [State], + ) -> Result<()> + where + State: AggregateState, + Output: ?Sized, + { + for (from, to) in mapping { + let consume = &mut consume[from]; + let target = &mut targets[to]; + target.merge(consume)?; + } + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs new file mode 100644 index 000000000..2c09667d4 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs @@ -0,0 +1,239 @@ +use rayexec_error::Result; +use stdutil::iter::IntoExactSizeIterator; + +use super::AggregateState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, PhysicalStorage}; + +#[derive(Debug, Clone, Copy)] +pub struct UnaryNonNullUpdater; + +impl UnaryNonNullUpdater { + pub fn update( + array: &Array, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S: PhysicalStorage, + Output: ?Sized, + for<'a> State: AggregateState<&'a S::StorageType, Output>, + { + if array.is_dictionary() { + let flat = array.flat_view()?; + return Self::update_flat::(flat, selection, mapping, states); + } + + // TODO: Length check. + + let input = S::get_addressable(array.data())?; + let validity = array.validity(); + + if validity.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let val = input.get(input_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + if !validity.is_valid(input_idx) { + continue; + } + + let val = input.get(input_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } + + Ok(()) + } + + pub fn update_flat( + array: FlatArrayView<'_>, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S: PhysicalStorage, + Output: ?Sized, + for<'b> State: AggregateState<&'b S::StorageType, Output>, + { + let input = S::get_addressable(array.array_buffer)?; + let validity = &array.validity; + + if validity.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let selected_idx = array.selection.get(input_idx).unwrap(); + + let val = input.get(selected_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if !validity.is_valid(selected_idx) { + continue; + } + + let val = input.get(selected_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor_exp::PutBuffer; + + #[derive(Debug, Default)] + struct TestSumState { + val: i32, + } + + impl AggregateState<&i32, i32> for TestSumState { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.val += other.val; + Ok(()) + } + + fn update(&mut self, &input: &i32) -> Result<()> { + self.val += input; + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.val); + Ok(()) + } + } + + #[test] + fn unary_primitive_single_state() { + let mut states = [TestSumState::default()]; + let array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(11, states[0].val); + } + + #[test] + fn unary_primitive_single_state_dictionary() { + let mut states = [TestSumState::default()]; + let mut array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + // '[1, 5, 5, 5, 5, 2, 2]' + array + .select(&NopBufferManager, [0, 4, 4, 4, 4, 1, 1]) + .unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], // Select from the resulting dictionary. + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(16, states[0].val); + } + + #[test] + fn unary_primitive_single_state_skip_null() { + let mut states = [TestSumState::default()]; + let array = Array::try_from_iter([None, Some(2), Some(3), Some(4), Some(5)]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(10, states[0].val); + } + + #[test] + fn unary_primitive_multiple_states() { + let mut states = [TestSumState::default(), TestSumState::default()]; + let array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4, 0, 3, 3], + [0, 0, 0, 0, 1, 1, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(15, states[0].val); + assert_eq!(5, states[1].val); + } + + #[derive(Debug, Default)] + struct TestStringAgg { + val: String, + } + + impl AggregateState<&str, str> for TestStringAgg { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.val.push_str(&other.val); + Ok(()) + } + + fn update(&mut self, input: &str) -> Result<()> { + self.val.push_str(input); + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.val); + Ok(()) + } + } + + #[test] + fn unary_string_single_state() { + // Test just checks to ensure working with varlen is sane. + let mut states = [TestStringAgg::default()]; + let array = Array::try_from_iter(["aa", "bbb", "cccc"]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2], + [0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!("aabbbcccc", &states[0].val); + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs new file mode 100644 index 000000000..ab1049476 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs @@ -0,0 +1,59 @@ +pub mod aggregate; +pub mod scalar; + +use rayexec_error::Result; + +use super::array::exp::Array; +use super::array::validity::Validity; +use super::buffer::physical_type::AddressableMut; +use super::buffer::ArrayBuffer; + +/// Wrapper around an array buffer and validity buffer that will be used to +/// construct a full array. +#[derive(Debug)] +pub struct OutBuffer<'a> { + pub buffer: &'a mut ArrayBuffer, + pub validity: &'a mut Validity, +} + +impl<'a> OutBuffer<'a> { + pub fn from_array(array: &'a mut Array) -> Result { + Ok(OutBuffer { + buffer: array.data.try_as_mut()?, + validity: &mut array.validity, + }) + } +} + +/// Helper for assigning a value to a location in a buffer. +#[derive(Debug)] +pub struct PutBuffer<'a, M> +where + M: AddressableMut, +{ + pub(crate) idx: usize, + pub(crate) buffer: &'a mut M, + pub(crate) validity: &'a mut Validity, +} + +impl<'a, M> PutBuffer<'a, M> +where + M: AddressableMut, +{ + pub(crate) fn new(idx: usize, buffer: &'a mut M, validity: &'a mut Validity) -> Self { + debug_assert_eq!(buffer.len(), validity.len()); + PutBuffer { + idx, + buffer, + validity, + } + } + + pub fn put(self, val: &M::T) { + self.buffer.put(self.idx, val) + } + + pub fn put_null(self) { + self.validity.set_invalid(self.idx) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs new file mode 100644 index 000000000..3ddbdbc75 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs @@ -0,0 +1,282 @@ +use stdutil::iter::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct BinaryExecutor; + +impl BinaryExecutor { + pub fn execute( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&S1::StorageType, &S2::StorageType, PutBuffer>), + { + if array1.is_dictionary() || array2.is_dictionary() { + let view1 = FlatArrayView::from_array(array1)?; + let view2 = FlatArrayView::from_array(array2)?; + + return Self::execute_flat::(view1, sel1, view2, sel2, out, op); + } + + // TODO: length validation + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + if validity1.is_valid(input1_idx) && validity2.is_valid(input2_idx) { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat<'a, S1, S2, O, Op>( + array1: FlatArrayView<'a>, + sel1: impl IntoExactSizeIterator, + array2: FlatArrayView<'a>, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut(&S1::StorageType, &S2::StorageType, PutBuffer>), + { + // TODO: length validation + + let input1 = S1::get_addressable(array1.array_buffer)?; + let input2 = S2::get_addressable(array2.array_buffer)?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = &array1.validity; + let validity2 = &array2.validity; + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + + if validity1.is_valid(sel1) && validity2.is_valid(sel2) { + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::validity::Validity; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::buffer::string_view::StringViewHeap; + use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + + #[test] + fn binary_simple_add() { + let left = Array::try_from_iter([1, 2, 3]).unwrap(); + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[5, 7, 9], out_slice); + } + + #[test] + fn binary_simple_add_with_selection() { + let mut left = Array::try_from_iter([2]).unwrap(); + // [2, 2, 2] + left.select(&NopBufferManager, [0, 0, 0]).unwrap(); + + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[6, 7, 8], out_slice); + } + + #[test] + fn binary_string_repeat() { + let left = Array::try_from_iter([1, 2, 3]).unwrap(); + let right = Array::try_from_iter(["hello", "world", "goodbye!"]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + let mut validity = Validity::new_all_valid(3); + + let mut string_buf = String::new(); + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&repeat, s, buf| { + string_buf.clear(); + for _ in 0..repeat { + string_buf.push_str(s); + } + buf.put(&string_buf); + }, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + assert_eq!("hello", out.get(0).unwrap()); + assert_eq!("worldworld", out.get(1).unwrap()); + assert_eq!("goodbye!goodbye!goodbye!", out.get(2).unwrap()); + } + + #[test] + fn binary_add_with_invalid() { + let left = Array::try_from_iter([Some(1), None, Some(3)]).unwrap(); + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + + let out_slice = out.try_as_slice::().unwrap(); + + assert!(validity.is_valid(0)); + assert_eq!(5, out_slice[0]); + + assert!(!validity.is_valid(1)); + + assert!(validity.is_valid(2)); + assert_eq!(9, out_slice[2]); + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs new file mode 100644 index 000000000..c552c0161 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs @@ -0,0 +1,151 @@ +use stdutil::iter::IntoExactSizeIterator; +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalList, + PhysicalStorage, +}; +use crate::arrays::buffer::SecondaryBuffer; +use crate::arrays::executor_exp::OutBuffer; + +pub trait BinaryReducer: Default { + /// Put two values from each list into the reducer. + fn put_values(&mut self, v1: T1, v2: T2); + /// Produce the final value from the reducer. + fn finish(self) -> O; +} + +#[derive(Debug, Clone)] +pub struct BinaryListReducer; + +impl BinaryListReducer { + /// Iterate two list arrays, reducing lists from each array. + /// + /// List reduction requires that if both lists for a given row are non-null, + /// then both lists must be the same length and not contain nulls. + /// + /// If either list is null, the output row will be set to null (same as + /// other executor logic). + /// + /// `R` is used to create a new reducer for each pair of lists. + /// + /// `S1` and `S2` should be for the inner type within the list. + pub fn reduce( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + O::StorageType: Sized, + for<'a> R: BinaryReducer<&'a S1::StorageType, &'a S2::StorageType, O::StorageType>, + { + if array1.is_dictionary() || array2.is_dictionary() { + // TODO + } + + let inner1 = match array1.data().get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => return Err(RayexecError::new("Array 1 not a list array")), + }; + + let inner2 = match array2.data().get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => return Err(RayexecError::new("Array 2 not a list array")), + }; + + if !inner1.validity().all_valid() || !inner2.validity().all_valid() { + // TODO: This can be more selective. Rows that don't conform + // could be skipped with the selections. + return Err(RayexecError::new( + "List reduction requires all values be non-null", + )); + } + + let metadata1 = PhysicalList::get_addressable(array1.data())?; + let metadata2 = PhysicalList::get_addressable(array2.data())?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let input1 = S1::get_addressable(inner1.data())?; + let input2 = S2::get_addressable(inner2.data())?; + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let meta1 = metadata1.get(input1_idx).unwrap(); + let meta2 = metadata2.get(input2_idx).unwrap(); + + if meta1.len != meta2.len { + return Err(RayexecError::new( + "List reduction requires lists be the same length", + ) + .with_field("len1", meta1.len) + .with_field("len2", meta2.len)); + } + + let mut reducer = R::default(); + + for offset in 0..meta1.len { + let idx1 = meta1.offset + offset; + let idx2 = meta2.offset + offset; + + let v1 = input1.get(idx1 as usize).unwrap(); + let v2 = input2.get(idx2 as usize).unwrap(); + + reducer.put_values(v1, v2); + } + + output.put(output_idx, &reducer.finish()); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + if !validity1.is_valid(input1_idx) || !validity2.is_valid(input2_idx) { + out.validity.set_invalid(output_idx); + continue; + } + + let meta1 = metadata1.get(input1_idx).unwrap(); + let meta2 = metadata2.get(input2_idx).unwrap(); + + if meta1.len != meta2.len { + return Err(RayexecError::new( + "List reduction requires lists be the same length", + ) + .with_field("len1", meta1.len) + .with_field("len2", meta2.len)); + } + + let mut reducer = R::default(); + + for offset in 0..meta1.len { + let idx1 = meta1.offset + offset; + let idx2 = meta2.offset + offset; + + let v1 = input1.get(idx1 as usize).unwrap(); + let v2 = input2.get(idx2 as usize).unwrap(); + + reducer.put_values(v1, v2); + } + + output.put(output_idx, &reducer.finish()); + } + } + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs new file mode 100644 index 000000000..286006da0 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -0,0 +1,5 @@ +pub mod binary; +pub mod list_reduce; +pub mod ternary; +pub mod unary; +pub mod uniform; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs new file mode 100644 index 000000000..49599c903 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs @@ -0,0 +1,306 @@ +use stdutil::iter::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct TernaryExecutor; + +impl TernaryExecutor { + pub fn execute( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + array3: &Array, + sel3: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + S3: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut( + &S1::StorageType, + &S2::StorageType, + &S3::StorageType, + PutBuffer>, + ), + { + if array1.is_dictionary() || array2.is_dictionary() || array3.is_dictionary() { + let flat1 = array1.flat_view()?; + let flat2 = array2.flat_view()?; + let flat3 = array3.flat_view()?; + + return Self::execute_flat::( + flat1, sel1, flat2, sel2, flat3, sel3, out, op, + ); + } + + // TODO: length validation. + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + let input3 = S3::get_addressable(array3.data())?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + let validity3 = array3.validity(); + + if validity1.all_valid() && validity2.all_valid() && validity3.all_valid() { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + let val3 = input3.get(input3_idx).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + if validity1.is_valid(input1_idx) + && validity2.is_valid(input2_idx) + && validity3.is_valid(input3_idx) + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + let val3 = input3.get(input3_idx).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat<'a, S1, S2, S3, O, Op>( + array1: FlatArrayView<'a>, + sel1: impl IntoExactSizeIterator, + array2: FlatArrayView<'a>, + sel2: impl IntoExactSizeIterator, + array3: FlatArrayView<'a>, + sel3: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + S3: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut( + &S1::StorageType, + &S2::StorageType, + &S3::StorageType, + PutBuffer>, + ), + { + // TODO: length validation. + + let input1 = S1::get_addressable(array1.array_buffer)?; + let input2 = S2::get_addressable(array2.array_buffer)?; + let input3 = S3::get_addressable(array3.array_buffer)?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = &array1.validity; + let validity2 = &array2.validity; + let validity3 = &array3.validity; + + if validity1.all_valid() && validity2.all_valid() && validity3.all_valid() { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + let sel3 = array3.selection.get(input3_idx).unwrap(); + + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + let val3 = input3.get(sel3).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + let sel3 = array3.selection.get(input3_idx).unwrap(); + + if validity1.is_valid(sel1) && validity2.is_valid(sel2) && validity3.is_valid(sel3) + { + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + let val3 = input3.get(sel3).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn ternary_left_prepend_simple() { + let strings = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let count = Array::try_from_iter([1, 2, 3]).unwrap(); + let pad = Array::try_from_iter(["<", ".", "!"]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + TernaryExecutor::execute::( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([None, None, Some("!!!c")]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn ternary_left_prepend_dictionary() { + let strings = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let count = Array::try_from_iter([1, 2, 3]).unwrap(); + let mut pad = Array::try_from_iter(["<", ".", "!"]).unwrap(); + // '[".", ".", "<"]' + pad.select(&NopBufferManager, [1, 1, 0]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + TernaryExecutor::execute::( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([".a", "..b", "<<( + array: &Array, + selection: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&S::StorageType, PutBuffer>), + { + if array.is_dictionary() { + let view = array.flat_view()?; + return Self::execute_flat::(view, selection, out, op); + } + + let input = S::get_addressable(array.data())?; + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity = array.validity(); + + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + op( + input.get(input_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + if validity.is_valid(input_idx) { + op( + input.get(input_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat( + array: FlatArrayView<'_>, + selection: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut(&S::StorageType, PutBuffer>), + { + let input = S::get_addressable(array.array_buffer)?; + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity = array.validity; + + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + op( + input.get(selected_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + op( + input.get(selected_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + /// Executes an operation in place. + /// + /// Note that changing the lengths for variable length data is not yet + /// supported, as the length change won't persist since the metadata isn't + /// being changed. + pub fn execute_in_place( + array: &mut Array, + selection: impl IntoExactSizeIterator, + mut op: Op, + ) -> Result<()> + where + S: MutablePhysicalStorage, + Op: FnMut(&mut S::StorageType), + { + let validity = &array.validity; + let mut input = S::get_addressable_mut(array.data.try_as_mut()?)?; + + if validity.all_valid() { + for idx in selection.into_iter() { + op(input.get_mut(idx).unwrap()); + } + } else { + for idx in selection.into_iter() { + if validity.is_valid(idx) { + op(input.get_mut(idx).unwrap()); + } + } + } + + Ok(()) + } + + /// Iterate over all values in a flat array view, calling `op` for each row. + /// + /// Valid values are represented with Some, invalid values are represented + /// with None. + /// + /// Note this should really only be used for tests. + pub fn for_each_flat( + array: FlatArrayView<'_>, + selection: impl IntoExactSizeIterator, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + Op: FnMut(usize, Option<&S::StorageType>), + { + let input = S::get_addressable(array.array_buffer)?; + let validity = array.validity; + + // TODO: `op` should be called with input_idx? + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + let v = input.get(selected_idx).unwrap(); + + op(output_idx, Some(v)) + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + let v = input.get(selected_idx).unwrap(); + op(output_idx, Some(v)); + } else { + op(output_idx, None); + } + } + } + + Ok(()) + } + + pub fn select( + array: &Array, + selection: impl IntoExactSizeIterator, + true_indices: &mut Vec, + ) -> Result<()> { + let flat = array.flat_view()?; + + let bools = PhysicalBool::get_addressable(&flat.array_buffer)?; + let validity = flat.validity; + + if validity.all_valid() { + for input_idx in selection.into_iter() { + let selected_idx = flat.selection.get(input_idx).unwrap(); + let v = *bools.get(selected_idx).unwrap(); + + if v { + true_indices.push(input_idx); + } + } + } else { + for input_idx in selection.into_iter() { + let selected_idx = flat.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + let v = *bools.get(selected_idx).unwrap(); + if v { + true_indices.push(input_idx); + } + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::validity::Validity; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::buffer::string_view::{StringViewAddressableMut, StringViewHeap}; + use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + + #[test] + fn int32_inc_by_2() { + let array = Array::try_from_iter([1, 2, 3]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + UnaryExecutor::execute::( + &array, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], out_slice); + } + + #[test] + fn int32_inc_by_2_using_flat_view() { + let array = Array::try_from_iter([1, 2, 3]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + let flat = FlatArrayView::from_array(&array).unwrap(); + + UnaryExecutor::execute_flat::( + flat, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], out_slice); + } + + #[test] + fn int32_inc_by_2_in_place() { + let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); + + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v += 2).unwrap(); + + let arr_slice = array.data().try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], arr_slice); + } + + #[test] + fn string_double_named_func() { + // Example with defined function, and allocating a new string every time. + let array = Array::try_from_iter([ + "a", + "bb", + "ccc", + "dddd", + "heapafter", // Inlined, will be moved to heap after doubling. + "alongerstringdontinline", + ]) + .unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut validity = Validity::new_all_valid(6); + + fn my_string_double(s: &str, buf: PutBuffer) { + let mut double = s.to_string(); + double.push_str(s); + buf.put(&double); + } + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + my_string_double, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + + assert_eq!("aa", out.get(0).unwrap()); + assert_eq!("bbbb", out.get(1).unwrap()); + assert_eq!("cccccc", out.get(2).unwrap()); + assert_eq!("dddddddd", out.get(3).unwrap()); + assert_eq!("heapafterheapafter", out.get(4).unwrap()); + assert_eq!( + "alongerstringdontinlinealongerstringdontinline", + out.get(5).unwrap() + ); + } + + #[test] + fn string_double_closure_reused_buf() { + // Same thing, but with closure reusing a string buffer. + let array = Array::try_from_iter([ + "a", + "bb", + "ccc", + "dddd", + "heapafter", // Inlined, will be moved to heap after doubling. + "alongerstringdontinline", + ]) + .unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut validity = Validity::new_all_valid(6); + + let mut string_buf = String::new(); + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |s, buf| { + string_buf.clear(); + + string_buf.push_str(s); + string_buf.push_str(s); + + buf.put(&string_buf); + }, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + + assert_eq!("aa", out.get(0).unwrap()); + assert_eq!("bbbb", out.get(1).unwrap()); + assert_eq!("cccccc", out.get(2).unwrap()); + assert_eq!("dddddddd", out.get(3).unwrap()); + assert_eq!("heapafterheapafter", out.get(4).unwrap()); + assert_eq!( + "alongerstringdontinlinealongerstringdontinline", + out.get(5).unwrap() + ); + } + + #[test] + fn string_uppercase_in_place() { + let mut array = Array::try_from_iter(["a", "bb", "ccc"]).unwrap(); + + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| { + v.make_ascii_uppercase() + }) + .unwrap(); + + let out = array.data().try_as_string_view_addressable().unwrap(); + + assert_eq!("A", out.get(0).unwrap()); + assert_eq!("BB", out.get(1).unwrap()); + assert_eq!("CCC", out.get(2).unwrap()); + } + + #[test] + fn int32_inc_by_2_with_dict() { + let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); + // [3, 3, 2, 1, 1, 3] + array.select(&NopBufferManager, [2, 2, 1, 0, 0, 2]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + let mut validity = Validity::new_all_valid(6); + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[5, 5, 4, 3, 3, 5], out_slice); + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs new file mode 100644 index 000000000..8770acda3 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs @@ -0,0 +1,272 @@ +use stdutil::iter::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct UniformExecutor; + +impl UniformExecutor { + /// Executes an operation across uniform array types. + /// + /// The selection applies to all arrays. + pub fn execute( + arrays: &[Array], + sel: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&[&S::StorageType], PutBuffer>), + { + if arrays.iter().any(|arr| arr.is_dictionary()) { + let flats = arrays + .iter() + .map(|arr| arr.flat_view()) + .collect::>>()?; + + return Self::execute_flat::(&flats, sel, out, op); + } + + let inputs = arrays + .iter() + .map(|arr| S::get_addressable(arr.data())) + .collect::>>()?; + + let all_valid = arrays.iter().all(|arr| arr.validity().all_valid()); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let mut op_inputs = Vec::with_capacity(arrays.len()); + + if all_valid { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + op_inputs.clear(); + for input in &inputs { + op_inputs.push(input.get(input_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + let validities: Vec<_> = arrays.iter().map(|arr| arr.validity()).collect(); + + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let all_valid = validities.iter().all(|v| v.is_valid(input_idx)); + + if all_valid { + op_inputs.clear(); + for input in &inputs { + op_inputs.push(input.get(input_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat( + arrays: &[FlatArrayView], + sel: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&[&S::StorageType], PutBuffer>), + { + // TODO: length check + + let inputs = arrays + .iter() + .map(|arr| S::get_addressable(arr.array_buffer)) + .collect::>>()?; + + let all_valid = arrays.iter().all(|arr| arr.validity.all_valid()); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let mut op_inputs = Vec::with_capacity(arrays.len()); + + if all_valid { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + op_inputs.clear(); + for (input, array) in inputs.iter().zip(arrays) { + let sel_idx = array.selection.get(input_idx).unwrap(); + op_inputs.push(input.get(sel_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let mut all_valid = true; + + for array in arrays { + let sel_idx = array.selection.get(input_idx).unwrap(); + all_valid = all_valid && array.validity.is_valid(sel_idx); + } + + if all_valid { + for (input, array) in inputs.iter().zip(arrays) { + let sel_idx = array.selection.get(input_idx).unwrap(); + op_inputs.push(input.get(sel_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn uniform_and_simple() { + let a = Array::try_from_iter([true, true, true]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let c = Array::try_from_iter([true, false, false]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |bools, buf| { + let v = bools.iter().all(|b| **b); + buf.put(&v); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([true, false, false]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise() { + let a = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let c = Array::try_from_iter(["dog", "cat", "horse"]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["a1dog", "b2cat", "c3horse"]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise_with_invalid() { + let a = Array::try_from_iter([Some("a"), Some("b"), None]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let c = Array::try_from_iter([Some("dog"), None, Some("horse")]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([Some("a1dog"), None, None]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise_with_dictionary() { + let a = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let mut c = Array::try_from_iter(["dog", "cat", "horse"]).unwrap(); + // '["horse", "horse", "dog"] + c.select(&NopBufferManager, [2, 2, 0]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["a1horse", "b2horse", "c3dog"]).unwrap(); + + assert_arrays_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/arrays/format/mod.rs b/crates/rayexec_execution/src/arrays/format/mod.rs index 106625af5..0f384c254 100644 --- a/crates/rayexec_execution/src/arrays/format/mod.rs +++ b/crates/rayexec_execution/src/arrays/format/mod.rs @@ -5,7 +5,7 @@ use std::fmt; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::scalar::ScalarValue; /// Formatting options for arrays and scalars. @@ -57,7 +57,7 @@ impl<'a> Formatter<'a> { /// Returns `None` if the idx is out of bounds. pub fn format_array_value<'b>( &self, - array: &'b Array, + array: &'b Array2, idx: usize, ) -> Result> { let scalar = array.logical_value(idx)?; diff --git a/crates/rayexec_execution/src/arrays/format/pretty/table.rs b/crates/rayexec_execution/src/arrays/format/pretty/table.rs index 3896f95a2..24d8789cb 100644 --- a/crates/rayexec_execution/src/arrays/format/pretty/table.rs +++ b/crates/rayexec_execution/src/arrays/format/pretty/table.rs @@ -7,8 +7,8 @@ use textwrap::core::display_width; use textwrap::{fill_inplace, wrap}; use super::display::{table_width, Alignment, PrettyFooter, PrettyHeader, PrettyValues}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::field::Schema; use crate::arrays::format::{FormatOptions, Formatter}; @@ -21,7 +21,7 @@ const DEFAULT_MAX_ROWS: usize = 50; pub fn pretty_format_batches( schema: &Schema, - batches: &[Batch], + batches: &[Batch2], max_width: usize, max_rows: Option, ) -> Result { @@ -40,7 +40,7 @@ impl PrettyTable { /// Try to create a new pretty-formatted table. pub fn try_new( schema: &Schema, - batches: &[Batch], + batches: &[Batch2], max_width: usize, max_rows: Option, ) -> Result { @@ -248,7 +248,7 @@ impl PrettyTable { } fn column_values_for_batch( - batch: &Batch, + batch: &Batch2, format: &TableFormat, range: Range, ) -> Result<(Vec, usize)> { @@ -552,7 +552,7 @@ impl ColumnValues { /// If the upper bound in the range exceeds the length of the array, it'll /// be clamped to the length of the array. pub fn try_from_array( - array: &Array, + array: &Array2, range: Option>, max_width: Option, ) -> Result { @@ -841,9 +841,9 @@ mod tests { Field::new("b", DataType::Int32, true), ]); - let batch = Batch::try_new(vec![ - Array::from_iter([Some("a"), Some("b"), None, Some("d")]), - Array::from_iter([Some(1), None, Some(10), Some(100)]), + let batch = Batch2::try_new(vec![ + Array2::from_iter([Some("a"), Some("b"), None, Some("d")]), + Array2::from_iter([Some(1), None, Some(10), Some(100)]), ]) .unwrap(); @@ -873,10 +873,10 @@ mod tests { Field::new("c3", DataType::Utf8, true), ]); - let batch = Batch::try_new(vec![ - Array::from_iter([Some("a\nb"), Some("c"), Some("d")]), - Array::from_iter([Some(1), Some(10), Some(100)]), - Array::from_iter([Some("Mario"), Some("Yoshi"), Some("Luigi\nPeach")]), + let batch = Batch2::try_new(vec![ + Array2::from_iter([Some("a\nb"), Some("c"), Some("d")]), + Array2::from_iter([Some(1), Some(10), Some(100)]), + Array2::from_iter([Some("Mario"), Some("Yoshi"), Some("Luigi\nPeach")]), ]) .unwrap(); @@ -906,9 +906,9 @@ mod tests { Field::new("b", DataType::Int32, true), ]); - let batch = Batch::try_new(vec![ - Array::from_iter([Some("a")]), - Array::from_iter([Some(1)]), + let batch = Batch2::try_new(vec![ + Array2::from_iter([Some("a")]), + Array2::from_iter([Some(1)]), ]) .unwrap(); @@ -940,7 +940,7 @@ mod tests { ]); let create_batch = |s, n| { - Batch::try_new([Array::from_iter([Some(s)]), Array::from_iter([Some(n)])]).unwrap() + Batch2::try_new([Array2::from_iter([Some(s)]), Array2::from_iter([Some(n)])]).unwrap() }; let batches = vec![ @@ -983,8 +983,11 @@ mod tests { let a_vals: Vec<_> = (0..10).map(|v| v.to_string()).collect(); let b_vals: Vec<_> = (0..10).map(Some).collect(); - let batches = - vec![Batch::try_new(vec![Array::from_iter(a_vals), Array::from_iter(b_vals)]).unwrap()]; + let batches = vec![Batch2::try_new(vec![ + Array2::from_iter(a_vals), + Array2::from_iter(b_vals), + ]) + .unwrap()]; let table = pretty_format_batches(&schema, &batches, 80, Some(4)).unwrap(); @@ -1017,8 +1020,11 @@ mod tests { let a_vals: Vec<_> = (0..10).map(|v| Some(v.to_string())).collect(); let b_vals: Vec<_> = (0..10).map(Some).collect(); - let batches = - vec![Batch::try_new(vec![Array::from_iter(a_vals), Array::from_iter(b_vals)]).unwrap()]; + let batches = vec![Batch2::try_new(vec![ + Array2::from_iter(a_vals), + Array2::from_iter(b_vals), + ]) + .unwrap()]; let table = pretty_format_batches(&schema, &batches, 80, Some(3)).unwrap(); @@ -1050,11 +1056,11 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Batch2::try_new(vec![ + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1100,11 +1106,11 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Batch2::try_new(vec![ + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1144,10 +1150,10 @@ mod tests { ]); let create_batch = |a, b, c| { - Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), + Batch2::try_new(vec![ + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), ]) .unwrap() }; @@ -1187,11 +1193,11 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Batch2::try_new(vec![ + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1262,23 +1268,23 @@ mod tests { ]); // First record should be printed. - let first = Batch::try_new(vec![ - Array::from_iter([Some("1"), Some("2")]), - Array::from_iter([Some(1), Some(2)]), + let first = Batch2::try_new(vec![ + Array2::from_iter([Some("1"), Some("2")]), + Array2::from_iter([Some(1), Some(2)]), ]) .unwrap(); // Nothing in this batch should be printed. - let middle = Batch::try_new(vec![ - Array::from_iter([Some("3"), Some("4")]), - Array::from_iter([Some(3), Some(4)]), + let middle = Batch2::try_new(vec![ + Array2::from_iter([Some("3"), Some("4")]), + Array2::from_iter([Some(3), Some(4)]), ]) .unwrap(); // Last record should be printed. - let last = Batch::try_new(vec![ - Array::from_iter([Some("5"), Some("6")]), - Array::from_iter([Some(5), Some(6)]), + let last = Batch2::try_new(vec![ + Array2::from_iter([Some("5"), Some("6")]), + Array2::from_iter([Some(5), Some(6)]), ]) .unwrap(); diff --git a/crates/rayexec_execution/src/arrays/format/ugly.rs b/crates/rayexec_execution/src/arrays/format/ugly.rs index f52d5565a..097902afe 100644 --- a/crates/rayexec_execution/src/arrays/format/ugly.rs +++ b/crates/rayexec_execution/src/arrays/format/ugly.rs @@ -2,13 +2,13 @@ use std::fmt::Write as _; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::arrays::format::{FormatOptions, Formatter}; pub fn ugly_format_no_schema<'a, I>(batches: I) -> Result where - I: IntoIterator, + I: IntoIterator, { const OPTS: FormatOptions = FormatOptions::new(); let formatter = Formatter::new(OPTS); @@ -40,7 +40,7 @@ where pub fn ugly_format<'a, I>(schema: &Schema, batches: I) -> Result where - I: IntoIterator, + I: IntoIterator, { const OPTS: FormatOptions = FormatOptions::new(); let formatter = Formatter::new(OPTS); diff --git a/crates/rayexec_execution/src/arrays/mod.rs b/crates/rayexec_execution/src/arrays/mod.rs index d35785fe0..2d84ca46e 100644 --- a/crates/rayexec_execution/src/arrays/mod.rs +++ b/crates/rayexec_execution/src/arrays/mod.rs @@ -1,9 +1,12 @@ pub mod array; pub mod batch; +pub mod batch_exp; pub mod bitmap; +pub mod buffer; pub mod compute; pub mod datatype; pub mod executor; +pub mod executor_exp; pub mod field; pub mod format; pub mod row; diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index 67d059e0f..a84148ef6 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -1,28 +1,28 @@ use half::f16; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData2, BinaryData}; use crate::arrays::executor::physical_type::{ AsBytes, - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalStorage, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalStorage2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::interval::Interval; /// Binary-encoded rows suitable for comparisons. @@ -159,7 +159,7 @@ pub struct ComparableRowEncoder { } impl ComparableRowEncoder { - pub fn encode(&self, columns: &[&Array]) -> Result { + pub fn encode(&self, columns: &[&Array2]) -> Result { if columns.len() != self.columns.len() { return Err(RayexecError::new("Column mismatch")); } @@ -192,58 +192,58 @@ impl ComparableRowEncoder { let mut row_offset = *offsets.last().unwrap(); for (arr, cmp_col) in columns.iter().zip(self.columns.iter()) { row_offset = match arr.array_data() { - ArrayData::UntypedNull(_) => { + ArrayData2::UntypedNull(_) => { Self::encode_untyped_null(cmp_col, data, row_offset)? } - ArrayData::Boolean(_) => Self::encode_primitive::( + ArrayData2::Boolean(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int8(_) => Self::encode_primitive::( + ArrayData2::Int8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int16(_) => Self::encode_primitive::( + ArrayData2::Int16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int32(_) => Self::encode_primitive::( + ArrayData2::Int32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int64(_) => Self::encode_primitive::( + ArrayData2::Int64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int128(_) => Self::encode_primitive::( + ArrayData2::Int128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt8(_) => Self::encode_primitive::( + ArrayData2::UInt8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt16(_) => Self::encode_primitive::( + ArrayData2::UInt16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt32(_) => Self::encode_primitive::( + ArrayData2::UInt32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt64(_) => Self::encode_primitive::( + ArrayData2::UInt64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt128(_) => Self::encode_primitive::( + ArrayData2::UInt128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float16(_) => Self::encode_primitive::( + ArrayData2::Float16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float32(_) => Self::encode_primitive::( + ArrayData2::Float32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float64(_) => Self::encode_primitive::( + ArrayData2::Float64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Interval(_) => Self::encode_primitive::( + ArrayData2::Interval(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Binary(_) => Self::encode_varlen::( + ArrayData2::Binary(_) => Self::encode_varlen::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::List(_) => not_implemented!("Row encode list"), + ArrayData2::List(_) => not_implemented!("Row encode list"), }; } @@ -255,32 +255,32 @@ impl ComparableRowEncoder { /// Compute the size of the data buffer we'll need for storing all encoded /// rows. - fn compute_data_size(&self, columns: &[&Array]) -> Result { + fn compute_data_size(&self, columns: &[&Array2]) -> Result { let mut size = 0; for arr in columns { let mut arr_size = match arr.array_data() { - ArrayData::UntypedNull(_) => 0, // Nulls will be encoded in the "validity" portion of the row. - ArrayData::Boolean(d) => d.len() * std::mem::size_of::(), // Note this will expand the 1 bit bools to bytes. - ArrayData::Int8(d) => d.data_size_bytes(), - ArrayData::Int16(d) => d.data_size_bytes(), - ArrayData::Int32(d) => d.data_size_bytes(), - ArrayData::Int64(d) => d.data_size_bytes(), - ArrayData::Int128(d) => d.data_size_bytes(), - ArrayData::UInt8(d) => d.data_size_bytes(), - ArrayData::UInt16(d) => d.data_size_bytes(), - ArrayData::UInt32(d) => d.data_size_bytes(), - ArrayData::UInt64(d) => d.data_size_bytes(), - ArrayData::UInt128(d) => d.data_size_bytes(), - ArrayData::Float16(d) => d.data_size_bytes(), - ArrayData::Float32(d) => d.data_size_bytes(), - ArrayData::Float64(d) => d.data_size_bytes(), - ArrayData::Interval(d) => d.data_size_bytes(), - ArrayData::Binary(d) => match d { + ArrayData2::UntypedNull(_) => 0, // Nulls will be encoded in the "validity" portion of the row. + ArrayData2::Boolean(d) => d.len() * std::mem::size_of::(), // Note this will expand the 1 bit bools to bytes. + ArrayData2::Int8(d) => d.data_size_bytes(), + ArrayData2::Int16(d) => d.data_size_bytes(), + ArrayData2::Int32(d) => d.data_size_bytes(), + ArrayData2::Int64(d) => d.data_size_bytes(), + ArrayData2::Int128(d) => d.data_size_bytes(), + ArrayData2::UInt8(d) => d.data_size_bytes(), + ArrayData2::UInt16(d) => d.data_size_bytes(), + ArrayData2::UInt32(d) => d.data_size_bytes(), + ArrayData2::UInt64(d) => d.data_size_bytes(), + ArrayData2::UInt128(d) => d.data_size_bytes(), + ArrayData2::Float16(d) => d.data_size_bytes(), + ArrayData2::Float32(d) => d.data_size_bytes(), + ArrayData2::Float64(d) => d.data_size_bytes(), + ArrayData2::Interval(d) => d.data_size_bytes(), + ArrayData2::Binary(d) => match d { BinaryData::Binary(d) => d.data_size_bytes(), BinaryData::LargeBinary(d) => d.data_size_bytes(), BinaryData::German(d) => d.data_size_bytes(), }, - ArrayData::List(_) => not_implemented!("Row encode list"), + ArrayData2::List(_) => not_implemented!("Row encode list"), }; // Account for validities. @@ -301,19 +301,19 @@ impl ComparableRowEncoder { /// This should return the new offset to write to for the next value. fn encode_varlen<'a, S>( col: &ComparableColumn, - arr: &'a Array, + arr: &'a Array2, row: usize, buf: &mut [u8], start: usize, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ComparableEncode + AsBytes, { let null_byte = col.null_byte(); let valid_byte = col.valid_byte(); - match UnaryExecutor::value_at::(arr, row)? { + match UnaryExecutor2::value_at::(arr, row)? { Some(val) => { buf[start] = valid_byte; let end = start + 1 + val.as_bytes().len(); @@ -342,19 +342,19 @@ impl ComparableRowEncoder { /// This should return the new offset to write to for the next value. fn encode_primitive<'a, S>( col: &ComparableColumn, - arr: &'a Array, + arr: &'a Array2, row: usize, buf: &mut [u8], start: usize, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ComparableEncode, { let null_byte = col.null_byte(); let valid_byte = col.valid_byte(); - match UnaryExecutor::value_at::(arr, row)? { + match UnaryExecutor2::value_at::(arr, row)? { Some(val) => { buf[start] = valid_byte; let end = start + 1 + std::mem::size_of::>(); @@ -482,8 +482,8 @@ mod tests { #[test] fn simple_primitive_cmp_between_cols_asc() { - let col1 = Array::from_iter([-1, 0, 1]); - let col2 = Array::from_iter([1, 0, -1]); + let col1 = Array2::from_iter([-1, 0, 1]); + let col2 = Array2::from_iter([1, 0, -1]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -508,8 +508,8 @@ mod tests { #[test] fn simple_primitive_cmp_between_cols_desc() { - let col1 = Array::from_iter([-1, 0, 1]); - let col2 = Array::from_iter([1, 0, -1]); + let col1 = Array2::from_iter([-1, 0, 1]); + let col2 = Array2::from_iter([1, 0, -1]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -535,8 +535,8 @@ mod tests { #[test] fn simple_varlen_cmp_between_cols_asc() { - let col1 = Array::from_iter(["a", "aa", "bb"]); - let col2 = Array::from_iter(["aa", "a", "bb"]); + let col1 = Array2::from_iter(["a", "aa", "bb"]); + let col2 = Array2::from_iter(["aa", "a", "bb"]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -561,8 +561,8 @@ mod tests { #[test] fn primitive_nulls_last_asc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -585,8 +585,8 @@ mod tests { #[test] fn primitive_nulls_last_desc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -609,8 +609,8 @@ mod tests { #[test] fn primitive_nulls_first_asc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -633,8 +633,8 @@ mod tests { #[test] fn primitive_nulls_first_desc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { diff --git a/crates/rayexec_execution/src/arrays/row/mod.rs b/crates/rayexec_execution/src/arrays/row/mod.rs index ed1ce3d72..81563cd58 100644 --- a/crates/rayexec_execution/src/arrays/row/mod.rs +++ b/crates/rayexec_execution/src/arrays/row/mod.rs @@ -2,7 +2,7 @@ pub mod encoding; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::scalar::ScalarValue; /// Scalar representation of a single row. @@ -23,7 +23,7 @@ impl<'a> ScalarRow<'a> { } /// Create a new row representation backed by data from arrays. - pub fn try_new_from_arrays(arrays: &[&'a Array], row: usize) -> Result> { + pub fn try_new_from_arrays(arrays: &[&'a Array2], row: usize) -> Result> { let vals = arrays .iter() .map(|a| a.logical_value(row)) diff --git a/crates/rayexec_execution/src/arrays/scalar/decimal.rs b/crates/rayexec_execution/src/arrays/scalar/decimal.rs index 041a7aca5..c23abf276 100644 --- a/crates/rayexec_execution/src/arrays/scalar/decimal.rs +++ b/crates/rayexec_execution/src/arrays/scalar/decimal.rs @@ -5,7 +5,8 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::executor::physical_type::{PhysicalI128, PhysicalI64, PhysicalStorage}; +use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalI128, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalI128_2, PhysicalI64_2, PhysicalStorage2}; pub trait DecimalPrimitive: PrimInt + FromPrimitive + Signed + Default + Debug + Display + Sync + Send @@ -30,11 +31,18 @@ impl DecimalPrimitive for i128 { pub trait DecimalType: Debug + Sync + Send + Copy + 'static where - for<'a> Self::Storage: PhysicalStorage = Self::Primitive>, + for<'a> Self::Storage2: PhysicalStorage2 = Self::Primitive>, { /// The underlying primitive type storing the decimal's value. type Primitive: DecimalPrimitive; - type Storage: PhysicalStorage; + + type Storage: MutablePhysicalStorage< + PrimaryBufferType = Self::Primitive, + StorageType = Self::Primitive, + >; + + // TODO: Remove + type Storage2: PhysicalStorage2; /// Max precision for this decimal type. const MAX_PRECISION: u8; @@ -72,6 +80,7 @@ pub struct Decimal64Type; impl DecimalType for Decimal64Type { type Primitive = i64; type Storage = PhysicalI64; + type Storage2 = PhysicalI64_2; const MAX_PRECISION: u8 = 18; // Note that changing this would require changing some of the date functions // since they assume this is 3. @@ -84,6 +93,7 @@ pub struct Decimal128Type; impl DecimalType for Decimal128Type { type Primitive = i128; type Storage = PhysicalI128; + type Storage2 = PhysicalI128_2; const MAX_PRECISION: u8 = 38; const DEFAULT_SCALE: i8 = 9; } diff --git a/crates/rayexec_execution/src/arrays/scalar/mod.rs b/crates/rayexec_execution/src/arrays/scalar/mod.rs index fb0741a82..74341e3cc 100644 --- a/crates/rayexec_execution/src/arrays/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/scalar/mod.rs @@ -14,7 +14,7 @@ use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; use timestamp::TimestampScalar; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::compute::cast::format::{ BoolFormatter, @@ -204,9 +204,9 @@ impl ScalarValue<'_> { } /// Create an array of size `n` using the scalar value. - pub fn as_array(&self, n: usize) -> Result { - let data: ArrayData = match self { - Self::Null => return Ok(Array::new_untyped_null_array(n)), + pub fn as_array(&self, n: usize) -> Result { + let data: ArrayData2 = match self { + Self::Null => return Ok(Array2::new_untyped_null_array(n)), Self::Boolean(v) => BooleanStorage(Bitmap::new_with_val(*v, 1)).into(), Self::Float16(v) => PrimitiveStorage::from(vec![*v]).into(), Self::Float32(v) => PrimitiveStorage::from(vec![*v]).into(), @@ -235,7 +235,7 @@ impl ScalarValue<'_> { ListStorage { metadata: vec![metadata].into(), - array: Array::new_untyped_null_array(0), + array: Array2::new_untyped_null_array(0), } .into() } else { @@ -261,7 +261,7 @@ impl ScalarValue<'_> { other => not_implemented!("{other:?} to array"), // Struct, List }; - let mut array = Array::new_with_array_data(self.datatype(), data); + let mut array = Array2::new_with_array_data(self.datatype(), data); array.selection = Some(SelectionVector::repeated(n, 0).into()); Ok(array) diff --git a/crates/rayexec_execution/src/arrays/storage/list.rs b/crates/rayexec_execution/src/arrays/storage/list.rs index f2107de67..bc35ac4f7 100644 --- a/crates/rayexec_execution/src/arrays/storage/list.rs +++ b/crates/rayexec_execution/src/arrays/storage/list.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use super::PrimitiveStorage; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct ListItemMetadata { @@ -12,13 +12,13 @@ pub struct ListItemMetadata { #[derive(Debug, PartialEq)] pub struct ListStorage { pub(crate) metadata: PrimitiveStorage, - pub(crate) array: Array, + pub(crate) array: Array2, } impl ListStorage { pub fn try_new( metadata: impl Into>, - array: Array, + array: Array2, ) -> Result { let metadata = metadata.into(); @@ -41,14 +41,14 @@ impl ListStorage { Ok(ListStorage { metadata, array }) } - pub fn empty_list(array: Array) -> Self { + pub fn empty_list(array: Array2) -> Self { ListStorage { metadata: vec![ListItemMetadata { offset: 0, len: 0 }].into(), array, } } - pub fn single_list(array: Array) -> Self { + pub fn single_list(array: Array2) -> Self { let len = array.logical_len(); ListStorage { @@ -61,7 +61,7 @@ impl ListStorage { } } - pub fn inner_array(&self) -> &Array { + pub fn inner_array(&self) -> &Array2 { &self.array } diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index e2d84d947..7b6b8fb64 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -5,63 +5,296 @@ //! //! Should not be used outside of tests. -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use std::collections::BTreeMap; +use std::fmt::Debug; -/// Asserts that two arrays are logically equal. -pub fn assert_arrays_eq(a: &Array, b: &Array) { - assert_eq!(a.datatype(), b.datatype(), "data types differ"); - assert_eq!(a.logical_len(), b.logical_len(), "logical lengths differ"); +use stdutil::iter::IntoExactSizeIterator; - for row_idx in 0..a.logical_len() { - let a_val = a.logical_value(row_idx).unwrap(); - let b_val = b.logical_value(row_idx).unwrap(); +use super::array::exp::Array; +use super::batch_exp::Batch; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{ + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUtf8, +}; +use crate::arrays::buffer::SecondaryBuffer; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; - assert_eq!(a_val, b_val); +/// Assert two arrays are logically equal. +/// +/// This will assume that the array's capacity is the array's logical length. +#[track_caller] +pub fn assert_arrays_eq(array1: &Array, array2: &Array) { + assert_eq!( + array1.capacity(), + array2.capacity(), + "array capacities differ" + ); + + let sel = 0..array1.capacity(); + + assert_arrays_eq_sel(array1, sel.clone(), array2, sel) +} + +/// Asserts that two arrays are logically equal for the first `count` rows. +/// +/// This will check valid and invalid values. Assertion error messages will +/// print out Some/None to represent valid/invalid. +#[track_caller] +pub fn assert_arrays_eq_sel( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, +) { + assert_eq!(array1.datatype, array2.datatype); + + let flat1 = array1.flat_view().unwrap(); + let flat2 = array2.flat_view().unwrap(); + + match array1.datatype.physical_type() { + PhysicalType::Boolean => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Int8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int16 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int32 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int64 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int128 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::UInt8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt16 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt32 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt64 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt128 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float16 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float32 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float64 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Utf8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::List => { + assert_arrays_eq_sel_list_inner(flat1, sel1, flat2, sel2); + } + other => unimplemented!("{other:?}"), + } +} + +fn assert_arrays_eq_sel_list_inner( + flat1: FlatArrayView, + sel1: impl IntoExactSizeIterator, + flat2: FlatArrayView, + sel2: impl IntoExactSizeIterator, +) { + let inner1 = match flat1.array_buffer.get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => panic!("Missing child for array 1"), + }; + + let inner2 = match flat2.array_buffer.get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => panic!("Missing child for array 2"), + }; + + let metas1 = PhysicalList::get_addressable(flat1.array_buffer).unwrap(); + let metas2 = PhysicalList::get_addressable(flat2.array_buffer).unwrap(); + + let sel1 = sel1.into_iter(); + let sel2 = sel2.into_iter(); + assert_eq!(sel1.len(), sel2.len()); + + for (row_idx, (idx1, idx2)) in sel1.zip(sel2).enumerate() { + let idx1 = flat1.selection.get(idx1).unwrap(); + let idx2 = flat1.selection.get(idx2).unwrap(); + + assert_eq!( + flat1.validity.is_valid(idx1), + flat2.validity.is_valid(idx2), + "validity mismatch for row {row_idx}" + ); + + let m1 = metas1.get(idx1).unwrap(); + let m2 = metas2.get(idx2).unwrap(); + + let sel1 = (m1.offset as usize)..((m1.offset + m1.len) as usize); + let sel2 = (m2.offset as usize)..((m2.offset + m2.len) as usize); + + assert_arrays_eq_sel(inner1, sel1, inner2, sel2); } } -/// Asserts that two batches are logically equal. -pub fn assert_batches_eq(a: &Batch, b: &Batch) { - assert_eq!(a.num_rows(), b.num_rows(), "num rows differ"); - assert_eq!(a.num_columns(), b.num_columns(), "num columns differ"); +fn assert_arrays_eq_sel_inner( + flat1: FlatArrayView, + sel1: impl IntoExactSizeIterator, + flat2: FlatArrayView, + sel2: impl IntoExactSizeIterator, +) where + S: PhysicalStorage, + S::StorageType: ToOwned, +{ + let mut out = BTreeMap::new(); - for col_idx in 0..a.num_columns() { - let a_col = a.column(col_idx).unwrap(); - let b_col = b.column(col_idx).unwrap(); + UnaryExecutor::for_each_flat::(flat1, sel1, |idx, v| { + out.insert(idx, v.map(|v| v.to_owned())); + }) + .unwrap(); - assert_arrays_eq(a_col, b_col); + UnaryExecutor::for_each_flat::(flat2, sel2, |idx, v| match out.remove(&idx) { + Some(existing) => { + let v = v.map(|v| v.to_owned()); + assert_eq!(existing, v, "values differ at index {idx}"); + } + None => panic!("missing value for index in array 1 {idx}"), + }) + .unwrap(); + + if !out.is_empty() { + panic!("extra entries in array 1: {:?}", out); + } +} + +/// Asserts two batches are logically equal. +#[track_caller] +pub fn assert_batches_eq(batch1: &Batch, batch2: &Batch) { + let arrays1 = batch1.arrays(); + let arrays2 = batch2.arrays(); + + assert_eq!( + arrays1.len(), + arrays2.len(), + "batches have different number of arrays" + ); + assert_eq!( + batch1.num_rows(), + batch2.num_rows(), + "batches have different number of rows" + ); + + for (array1, array2) in arrays1.iter().zip(arrays2) { + let sel = 0..batch1.num_rows(); + assert_arrays_eq_sel(array1, sel.clone(), array2, sel); } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; - use crate::arrays::selection::SelectionVector; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + + #[test] + fn assert_i32_arrays_eq_simple() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5, 6]).unwrap(); + + assert_arrays_eq(&array1, &array2); + } #[test] - fn arrays_eq() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([1, 2, 3]); + fn assert_i32_arrays_eq_with_dictionary() { + let array1 = Array::try_from_iter([5, 4, 4]).unwrap(); + let mut array2 = Array::try_from_iter([4, 5]).unwrap(); + array2.select(&NopBufferManager, [1, 0, 0]).unwrap(); + + assert_arrays_eq(&array1, &array2); + } + + #[test] + fn assert_i32_arrays_eq_with_invalid() { + let mut array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + array1.validity.set_invalid(1); + + let mut array2 = Array::try_from_iter([4, 8, 6]).unwrap(); + array2.validity.set_invalid(1); - assert_arrays_eq(&a, &b); + assert_arrays_eq(&array1, &array2); } #[test] - fn arrays_eq_with_selection() { - let a = Array::from_iter([2, 2, 2]); - let mut b = Array::from_iter([2]); - b.select_mut(SelectionVector::repeated(3, 0)); + fn assert_batches_eq_simple() { + let batch1 = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + let batch2 = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + + assert_batches_eq(&batch1, &batch2); + } + + #[test] + fn assert_batches_eq_logical_row_count() { + let mut batch1 = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6, 7, 8]).unwrap(), + Array::try_from_iter(["a", "b", "c", "d", "e"]).unwrap(), + ], + false, + ) + .unwrap(); + batch1.set_num_rows(3).unwrap(); + + let batch2 = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + + assert_batches_eq(&batch1, &batch2); + } + + #[test] + #[should_panic] + fn assert_i32_arrays_eq_not_eq() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5, 7]).unwrap(); - assert_arrays_eq(&a, &b); + assert_arrays_eq(&array1, &array2); } #[test] #[should_panic] - fn arrays_not_eq() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter(["a", "b", "c"]); + fn assert_i32_arrays_different_lengths() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5]).unwrap(); - assert_arrays_eq(&a, &b); + assert_arrays_eq(&array1, &array2); } } diff --git a/crates/rayexec_execution/src/engine/result.rs b/crates/rayexec_execution/src/engine/result.rs index 2b5fef6d7..45a0ab6b0 100644 --- a/crates/rayexec_execution/src/engine/result.rs +++ b/crates/rayexec_execution/src/engine/result.rs @@ -9,7 +9,7 @@ use rayexec_error::{RayexecError, Result}; use tracing::warn; use super::profiler::PlanningProfileData; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::database::DatabaseContext; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -52,7 +52,7 @@ pub struct ResultStream { } impl Stream for ResultStream { - type Item = Result; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let mut inner = self.inner.lock(); @@ -117,7 +117,7 @@ pub struct ResultPartitionSink { } impl PartitionSink for ResultPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(PushFuture { batch: Some(batch), inner: self.inner.clone(), @@ -157,7 +157,7 @@ impl ErrorSink for ResultErrorSink { /// This lets us inject an error into the stream that arises outside of stream. #[derive(Debug)] struct InnerState { - batch: Option, + batch: Option, error: Option, finished: bool, push_waker: Option, @@ -165,7 +165,7 @@ struct InnerState { } struct PushFuture { - batch: Option, + batch: Option, inner: Arc>, } diff --git a/crates/rayexec_execution/src/engine/server_state.rs b/crates/rayexec_execution/src/engine/server_state.rs index b0b3d3228..c8c7c9bfc 100644 --- a/crates/rayexec_execution/src/engine/server_state.rs +++ b/crates/rayexec_execution/src/engine/server_state.rs @@ -4,7 +4,7 @@ use dashmap::DashMap; use rayexec_error::{not_implemented, RayexecError, Result}; use uuid::Uuid; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::{Field, Schema}; use crate::config::execution::{ExecutablePlanConfig, IntermediatePlanConfig}; use crate::config::session::SessionConfig; @@ -184,7 +184,7 @@ where Ok(()) } - pub fn push_batch_for_stream(&self, stream_id: StreamId, batch: Batch) -> Result<()> { + pub fn push_batch_for_stream(&self, stream_id: StreamId, batch: Batch2) -> Result<()> { self.buffers.push_batch_for_stream(&stream_id, batch) } diff --git a/crates/rayexec_execution/src/execution/computed_batch.rs b/crates/rayexec_execution/src/execution/computed_batch.rs index 2086c4800..4d60ad7e5 100644 --- a/crates/rayexec_execution/src/execution/computed_batch.rs +++ b/crates/rayexec_execution/src/execution/computed_batch.rs @@ -2,18 +2,18 @@ use std::collections::VecDeque; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Computed batch results from an operator. #[derive(Debug, PartialEq)] pub enum ComputedBatches { /// A single batch was computed. - Single(Batch), + Single(Batch2), /// Multiple batches were computed. /// /// These should be ordered by which batch should be pushed to next operator /// first. - Multi(VecDeque), + Multi(VecDeque), /// No batches computed. None, // TODO: Spill references @@ -25,7 +25,7 @@ impl ComputedBatches { /// This will filter out any batches that have no rows. pub fn new(batches: I) -> Self where - I: IntoIterator, + I: IntoIterator, I::IntoIter: ExactSizeIterator, { let mut iter = batches.into_iter(); @@ -83,7 +83,7 @@ impl ComputedBatches { /// Tries to get the next batch from this collection, returning None when no /// batches remain. - pub fn try_pop_front(&mut self) -> Result> { + pub fn try_pop_front(&mut self) -> Result> { match self { Self::Single(_) => { let orig = std::mem::replace(self, Self::None); @@ -100,8 +100,8 @@ impl ComputedBatches { } } -impl From for ComputedBatches { - fn from(value: Batch) -> Self { +impl From for ComputedBatches { + fn from(value: Batch2) -> Self { Self::Single(value) } } diff --git a/crates/rayexec_execution/src/execution/executable/pipeline.rs b/crates/rayexec_execution/src/execution/executable/pipeline.rs index de96dd8fd..533233e48 100644 --- a/crates/rayexec_execution/src/execution/executable/pipeline.rs +++ b/crates/rayexec_execution/src/execution/executable/pipeline.rs @@ -6,16 +6,16 @@ use rayexec_error::{RayexecError, Result}; use tracing::trace; use super::profiler::OperatorProfileData; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::computed_batch::ComputedBatches; use crate::execution::operators::{ ExecutableOperator, OperatorState, PartitionState, PhysicalOperator, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::runtime::time::{RuntimeInstant, Timer}; @@ -274,7 +274,7 @@ pub enum PipelinePartitionState { operator_idx: usize, }, /// Need to push to an operator. - PushTo { batch: Batch, operator_idx: usize }, + PushTo { batch: Batch2, operator_idx: usize }, /// Need to finalize a push to an operator. FinalizePush { operator_idx: usize }, /// Pipeline is completed. @@ -346,7 +346,7 @@ impl ExecutablePartitionPipeline { // Otherwise do a normal pull. let timer = Timer::::start(); - let poll_pull = operator.physical.poll_pull( + let poll_pull = operator.physical.poll_pull2( cx, &mut operator.partition_state, &operator.operator_state, @@ -355,7 +355,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.elapsed += elapsed; match poll_pull { - Ok(PollPull::Computed(mut computed)) => { + Ok(PollPull2::Computed(mut computed)) => { operator.profile_data.rows_emitted += computed.total_num_rows(); // TODO: We should have something to indicate materialized vs not. let batch = match computed.try_pop_front()? { @@ -385,10 +385,10 @@ impl ExecutablePartitionPipeline { }; continue; } - Ok(PollPull::Pending) => { + Ok(PollPull2::Pending) => { return Poll::Pending; } - Ok(PollPull::Exhausted) => { + Ok(PollPull2::Exhausted) => { // Finalize the next operator to indicate that it // will no longer be receiving batch inputs. *state = PipelinePartitionState::FinalizePush { @@ -416,7 +416,7 @@ impl ExecutablePartitionPipeline { .expect("next operator to exist"); let timer = Timer::::start(); - let poll_finalize = next_operator.physical.poll_finalize_push( + let poll_finalize = next_operator.physical.poll_finalize_push2( cx, &mut next_operator.partition_state, &next_operator.operator_state, @@ -425,7 +425,7 @@ impl ExecutablePartitionPipeline { next_operator.profile_data.elapsed += elapsed; match poll_finalize { - Ok(PollFinalize::Finalized) => { + Ok(PollFinalize2::Finalized) => { if self.pull_start.pull_start == self.operators.len() - 1 { // This partition pipeline has been completely exhausted, and // we've just finalized the "sink" operator. We're done. @@ -437,7 +437,7 @@ impl ExecutablePartitionPipeline { // next non-exhausted operator. *state = self.pull_start.next_start_state()?; } - Ok(PollFinalize::Pending) => return Poll::Pending, + Ok(PollFinalize2::Pending) => return Poll::Pending, Err(e) => { // Erroring on finalize is not recoverable. *state = PipelinePartitionState::Completed; @@ -450,7 +450,7 @@ impl ExecutablePartitionPipeline { operator_idx, } => { // To satisfy ownership. State will be updated anyways. - let batch = std::mem::replace(batch, Batch::empty()); + let batch = std::mem::replace(batch, Batch2::empty()); let operator = self .operators @@ -460,7 +460,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.rows_read += batch.num_rows(); let timer = Timer::::start(); - let poll_push = operator.physical.poll_push( + let poll_push = operator.physical.poll_push2( cx, &mut operator.partition_state, &operator.operator_state, @@ -470,7 +470,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.elapsed += elapsed; match poll_push { - Ok(PollPush::Pushed) => { + Ok(PollPush2::Pushed) => { // We successfully pushed to the operator. // // If we pushed to last operator (the 'sink'), we @@ -489,7 +489,7 @@ impl ExecutablePartitionPipeline { } continue; } - Ok(PollPush::Pending(batch)) => { + Ok(PollPush2::Pending(batch)) => { // Operator not ready to accept input. // // Waker has been registered, and this pipeline will @@ -502,7 +502,7 @@ impl ExecutablePartitionPipeline { }; return Poll::Pending; } - Ok(PollPush::Break) => { + Ok(PollPush2::Break) => { // Operator has received everything it needs. Set // the pipeline to start pulling from the operator, // even if the operator we're currently pull from @@ -520,7 +520,7 @@ impl ExecutablePartitionPipeline { }; continue; } - Ok(PollPush::NeedsMore) => { + Ok(PollPush2::NeedsMore) => { // Operator accepted input, but needs more input // before it will produce output. // diff --git a/crates/rayexec_execution/src/execution/executable/planner.rs b/crates/rayexec_execution/src/execution/executable/planner.rs index e26b4d20c..fbd177dcf 100644 --- a/crates/rayexec_execution/src/execution/executable/planner.rs +++ b/crates/rayexec_execution/src/execution/executable/planner.rs @@ -22,7 +22,7 @@ use crate::execution::operators::sink::{SinkOperation, SinkOperator}; use crate::execution::operators::source::{SourceOperation, SourceOperator}; use crate::execution::operators::{ ExecutableOperator, - InputOutputStates, + InputOutputStates2, OperatorState, PartitionState, PhysicalOperator, @@ -363,9 +363,9 @@ impl PendingQuery { } let operator = Arc::new(PhysicalOperator::ResultSink(SinkOperator::new(sink))); - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => return Err(RayexecError::new("invalid partition states for query sink")), }; @@ -429,9 +429,9 @@ impl PendingQuery { } }; - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => return Err(RayexecError::new("invalid partition states")), }; @@ -549,9 +549,9 @@ impl PendingQuery { } }; - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => { return Err(RayexecError::new( "Invalid partition states for query source", @@ -610,10 +610,10 @@ impl PendingQuery { ) -> Result { let rr_operator = Arc::new(PhysicalOperator::RoundRobin(PhysicalRoundRobinRepartition)); let states = rr_operator - .create_states(context, vec![pipeline.num_partitions(), output_partitions])?; + .create_states2(context, vec![pipeline.num_partitions(), output_partitions])?; let (push_states, pull_states) = match states.partition_states { - InputOutputStates::SeparateInputOutput { + InputOutputStates2::SeparateInputOutput { push_states, pull_states, } => (push_states, pull_states), @@ -696,17 +696,19 @@ impl PendingOperatorWithState { .unwrap_or(config.partitions); // TODO: How to get other input partitions. - let states = operator.operator.create_states(context, vec![partitions])?; + let states = operator + .operator + .create_states2(context, vec![partitions])?; Ok(match states.partition_states { - InputOutputStates::OneToOne { partition_states } => PendingOperatorWithState { + InputOutputStates2::OneToOne { partition_states } => PendingOperatorWithState { operator: operator.operator, operator_state: states.operator_state, input_states: vec![Some(partition_states)], pull_states: VecDeque::new(), trunk_idx: 0, }, - InputOutputStates::NaryInputSingleOutput { + InputOutputStates2::NaryInputSingleOutput { partition_states, pull_states, } => { @@ -719,7 +721,7 @@ impl PendingOperatorWithState { trunk_idx: pull_states, } } - InputOutputStates::SeparateInputOutput { + InputOutputStates2::SeparateInputOutput { push_states, pull_states, } => PendingOperatorWithState { diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs index 4f9e88d37..ecc45944c 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use super::{IntermediatePipelineBuildState, Materializations, PipelineIdGen}; use crate::execution::intermediate::pipeline::IntermediateOperator; use crate::execution::operators::hash_aggregate::PhysicalHashAggregate; -use crate::execution::operators::project::{PhysicalProject, ProjectOperation}; +use crate::execution::operators::project::{PhysicalProject2, ProjectOperation}; use crate::execution::operators::ungrouped_aggregate::PhysicalUngroupedAggregate; use crate::execution::operators::PhysicalOperator; use crate::expr::physical::column_expr::PhysicalColumnExpr; @@ -41,7 +41,6 @@ impl IntermediatePipelineBuildState<'_> { } }; - let start_col_index = preproject_exprs.len(); for arg in &agg.agg.inputs { let scalar = self .expr_planner @@ -49,13 +48,19 @@ impl IntermediatePipelineBuildState<'_> { .context("Failed to plan expressions for aggregate pre-projection")?; preproject_exprs.push(scalar); } - let end_col_index = preproject_exprs.len(); + + let columns = preproject_exprs + .iter() + .enumerate() + .map(|(idx, expr)| PhysicalColumnExpr { + idx, + datatype: expr.datatype(), + }) + .collect(); let phys_agg = PhysicalAggregateExpression { function: agg.agg, - columns: (start_col_index..end_col_index) - .map(|idx| PhysicalColumnExpr { idx }) - .collect(), + columns, is_distinct: agg.distinct, }; @@ -77,7 +82,7 @@ impl IntermediatePipelineBuildState<'_> { self.push_intermediate_operator( IntermediateOperator { - operator: Arc::new(PhysicalOperator::Project(PhysicalProject { + operator: Arc::new(PhysicalOperator::Project(PhysicalProject2 { operation: ProjectOperation::new(preproject_exprs), })), partitioning_requirement: None, diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs index 3525b942d..9a03c18d0 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -23,10 +23,10 @@ impl IntermediatePipelineBuildState<'_> { return Err(RayexecError::new("Expected in progress to be None")); } - let names = Array::from_iter(describe.node.schema.iter().map(|f| f.name.as_str())); + let names = Array2::from_iter(describe.node.schema.iter().map(|f| f.name.as_str())); let datatypes = - Array::from_iter(describe.node.schema.iter().map(|f| f.datatype.to_string())); - let batch = Batch::try_new(vec![names, datatypes])?; + Array2::from_iter(describe.node.schema.iter().map(|f| f.datatype.to_string())); + let batch = Batch2::try_new(vec![names, datatypes])?; let operator = IntermediateOperator { operator: Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![batch]))), diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs index 3845bb7a5..9678e120f 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::{IntermediatePipelineBuildState, Materializations, PipelineIdGen}; use crate::execution::intermediate::pipeline::IntermediateOperator; use crate::execution::operators::hash_aggregate::PhysicalHashAggregate; -use crate::execution::operators::project::{PhysicalProject, ProjectOperation}; +use crate::execution::operators::project::{PhysicalProject2, ProjectOperation}; use crate::execution::operators::PhysicalOperator; use crate::logical::logical_distinct::LogicalDistinct; use crate::logical::operator::{LogicalNode, Node}; @@ -35,7 +35,7 @@ impl IntermediatePipelineBuildState<'_> { self.push_intermediate_operator( IntermediateOperator { - operator: Arc::new(PhysicalOperator::Project(PhysicalProject { + operator: Arc::new(PhysicalOperator::Project(PhysicalProject2 { operation: ProjectOperation::new(group_exprs), })), partitioning_requirement: None, diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs index 23dbc22e6..eebfbf13e 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs @@ -4,8 +4,8 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use tracing::error; use super::{InProgressPipeline, IntermediatePipelineBuildState, Materializations, PipelineIdGen}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -81,9 +81,9 @@ impl IntermediatePipelineBuildState<'_> { } let physical = Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ - Batch::try_new([ - Array::from_iter(type_strings), - Array::from_iter(plan_strings), + Batch2::try_new([ + Array2::from_iter(type_strings), + Array2::from_iter(plan_strings), ])?, ]))); diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs index af92d317e..c5b968492 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use rayexec_error::{not_implemented, RayexecError, Result, ResultExt}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::scan::PhysicalScan; use crate::execution::operators::table_function::PhysicalTableFunction; @@ -77,15 +77,15 @@ impl IntermediatePipelineBuildState<'_> { &self, projections: Projections, rows: Vec>, - ) -> Result> { + ) -> Result> { if self.in_progress.is_some() { return Err(RayexecError::new("Expected in progress to be None")); } // TODO: This could probably be simplified. - let mut row_arrs: Vec> = Vec::new(); // Row oriented. - let dummy_batch = Batch::empty_with_num_rows(1); + let mut row_arrs: Vec> = Vec::new(); // Row oriented. + let dummy_batch = Batch2::empty_with_num_rows(1); // Convert expressions into arrays of one element each. for row_exprs in rows { @@ -96,7 +96,7 @@ impl IntermediatePipelineBuildState<'_> { let arrs = exprs .into_iter() .map(|expr| { - let arr = expr.eval(&dummy_batch)?; + let arr = expr.eval2(&dummy_batch)?; Ok(arr.into_owned()) }) .collect::>>()?; @@ -106,7 +106,7 @@ impl IntermediatePipelineBuildState<'_> { let batches = row_arrs .into_iter() .map(|cols| { - let batch = Batch::try_new(cols)?; + let batch = Batch2::try_new(cols)?; // TODO: Got lazy, we can just avoid evaluating the expressions above. match &projections.column_indices { diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs index 5eb96bc88..3c348b4e1 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -26,7 +26,7 @@ impl IntermediatePipelineBuildState<'_> { let operator = IntermediateOperator { operator: Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ - Batch::try_new([Array::from_iter([show.value.to_string().as_str()])])?, + Batch2::try_new([Array2::from_iter([show.value.to_string().as_str()])])?, ]))), partitioning_requirement: Some(1), }; diff --git a/crates/rayexec_execution/src/execution/operators/analyze.rs b/crates/rayexec_execution/src/execution/operators/analyze.rs index ca4cb2deb..8fdbd2921 100644 --- a/crates/rayexec_execution/src/execution/operators/analyze.rs +++ b/crates/rayexec_execution/src/execution/operators/analyze.rs @@ -4,14 +4,14 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -20,39 +20,39 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; pub struct PhysicalAnalyze {} impl ExecutableOperator for PhysicalAnalyze { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/batch_collection.rs b/crates/rayexec_execution/src/execution/operators/batch_collection.rs new file mode 100644 index 000000000..449aeaacb --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/batch_collection.rs @@ -0,0 +1,225 @@ +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; + +#[derive(Debug)] +pub struct BatchCollection { + /// Datatypes of the arrays we're storing. + datatypes: Vec, + /// All blocks making up this collection. + blocks: Vec, +} + +impl BatchCollection {} + +#[derive(Debug)] +pub struct BatchCollectionBlock { + /// Number of rows we're currently storing in this block. + row_count: usize, + /// Max number of rows this block store. + capacity: usize, + /// Arrays making up this block. + arrays: Vec, +} + +impl BatchCollectionBlock { + pub fn new(datatypes: &[DataType], capacity: usize) -> Result { + let arrays = datatypes + .iter() + .map(|datatype| Array::new(&NopBufferManager, datatype.clone(), capacity)) + .collect::>>()?; + + Ok(BatchCollectionBlock { + row_count: 0, + capacity, + arrays, + }) + } + + pub fn capacity(&self) -> usize { + self.capacity + } + + pub fn set_row_count(&mut self, count: usize) -> Result<()> { + if count > self.capacity { + return Err(RayexecError::new("Row count would exceed capacity")); + } + self.row_count = count; + Ok(()) + } + + pub fn row_count(&self) -> usize { + self.row_count + } + + pub fn arrays(&self) -> &[Array] { + &self.arrays + } + + pub fn has_capacity_for_rows(&self, additional: usize) -> bool { + self.row_count + additional < self.capacity + } + + /// Appends a batch to this block. + pub fn append_batch_data(&mut self, batch: &Batch) -> Result<()> { + let total_num_rows = self.row_count + batch.num_rows(); + if total_num_rows > self.capacity { + return Err( + RayexecError::new("New row count for batch block would exceed capacity") + .with_field("new_row_count", total_num_rows) + .with_field("capacity", self.capacity), + ); + } + + if self.arrays.len() != batch.arrays().len() { + return Err(RayexecError::new("Array length mismatch")); + } + + for (from, to) in batch.arrays.iter().zip(self.arrays.iter_mut()) { + // [0..batch_num_rows) => [self_row_count..) + let mapping = + (0..batch.num_rows()).zip(self.row_count..(self.row_count + batch.num_rows())); + from.copy_rows(mapping, to)?; + } + + self.row_count += batch.num_rows(); + + Ok(()) + } + + /// Copies a single row from another block. + pub fn copy_row_from_other( + &mut self, + dest_row: usize, + source: &BatchCollectionBlock, + source_row: usize, + ) -> Result<()> { + if self.arrays.len() != source.arrays.len() { + return Err(RayexecError::new( + "Number of arrays in self and other differ", + )); + } + + for (from, to) in source.arrays().iter().zip(self.arrays.iter_mut()) { + let mapping = [(source_row, dest_row)]; + from.copy_rows(mapping, to)?; + } + + Ok(()) + } + + pub fn into_batch(self) -> Result { + let mut batch = Batch::try_from_arrays(self.arrays, false)?; + batch.set_num_rows(self.row_count)?; + + Ok(batch) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::testutil::assert_batches_eq; + + #[test] + fn block_append_i32() { + let mut block = BatchCollectionBlock::new(&[DataType::Int32], 4096).unwrap(); + + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([7, 8]).unwrap(); + let array3 = Array::try_from_iter([9, 10, 11]).unwrap(); + + let batch1 = Batch::try_from_arrays([array1], true).unwrap(); + let batch2 = Batch::try_from_arrays([array2], true).unwrap(); + let batch3 = Batch::try_from_arrays([array3], true).unwrap(); + + block.append_batch_data(&batch1).unwrap(); + block.append_batch_data(&batch2).unwrap(); + block.append_batch_data(&batch3).unwrap(); + + let out = block.into_batch().unwrap(); + + let expected = Batch::try_from_arrays( + [Array::try_from_iter([4, 5, 6, 7, 8, 9, 10, 11]).unwrap()], + true, + ) + .unwrap(); + + assert_batches_eq(&expected, &out); + } + + #[test] + fn block_append_i32_dictionary() { + let mut block = BatchCollectionBlock::new(&[DataType::Int32], 4096).unwrap(); + + let mut array = Array::try_from_iter([4, 5, 6]).unwrap(); + // '[4, 4, 6, 6, 5, 5]' + array.select(&NopBufferManager, [0, 0, 2, 2, 1, 1]).unwrap(); + + let batch = Batch::try_from_arrays([array], true).unwrap(); + block.append_batch_data(&batch).unwrap(); + + assert_eq!(6, block.row_count()); + + let out = block.into_batch().unwrap(); + + let expected = + Batch::try_from_arrays([Array::try_from_iter([4, 4, 6, 6, 5, 5]).unwrap()], true) + .unwrap(); + + assert_batches_eq(&expected, &out); + } + + #[test] + fn block_copy_row_i32_string() { + let mut block1 = + BatchCollectionBlock::new(&[DataType::Int32, DataType::Utf8], 4096).unwrap(); + let mut block2 = + BatchCollectionBlock::new(&[DataType::Int32, DataType::Utf8], 4096).unwrap(); + + block1 + .append_batch_data( + &Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(), + ) + .unwrap(); + + block2 + .append_batch_data( + &Batch::try_from_arrays( + [ + Array::try_from_iter([7, 8]).unwrap(), + Array::try_from_iter(["dog", "cat"]).unwrap(), + ], + true, + ) + .unwrap(), + ) + .unwrap(); + + block1.copy_row_from_other(1, &block2, 0).unwrap(); + + let out = block1.into_batch().unwrap(); + let expected = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 7, 6]).unwrap(), + Array::try_from_iter(["a", "dog", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + + assert_batches_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs index 23c63d4d3..34c798d10 100644 --- a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs @@ -6,15 +6,15 @@ use rayexec_error::Result; use super::util::resizer::{BatchResizer, DEFAULT_TARGET_BATCH_SIZE}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::computed_batch::ComputedBatches; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -37,14 +37,14 @@ pub struct BatchResizerPartitionState { pub struct PhysicalBatchResizer; impl ExecutableOperator for PhysicalBatchResizer { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| { PartitionState::BatchResizer(BatchResizerPartitionState { @@ -60,13 +60,13 @@ impl ExecutableOperator for PhysicalBatchResizer { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -80,7 +80,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let computed = state.resizer.try_push(batch)?; @@ -92,19 +92,19 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } else { // Otherwise we need more batches. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -118,7 +118,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } state.exhausted = true; @@ -128,15 +128,15 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -144,7 +144,7 @@ impl ExecutableOperator for PhysicalBatchResizer { if state.buffered.is_empty() { if state.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // Register wakeup. @@ -153,7 +153,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake() } - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } let buffered = state.buffered.take(); @@ -162,7 +162,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake() } - Ok(PollPull::Computed(buffered)) + Ok(PollPull2::Computed(buffered)) } } diff --git a/crates/rayexec_execution/src/execution/operators/create_schema.rs b/crates/rayexec_execution/src/execution/operators/create_schema.rs index 39bbf69ff..ea68d4f1a 100644 --- a/crates/rayexec_execution/src/execution/operators/create_schema.rs +++ b/crates/rayexec_execution/src/execution/operators/create_schema.rs @@ -9,15 +9,15 @@ use rayexec_proto::ProtoConv; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateSchemaInfo; use crate::database::DatabaseContext; @@ -50,11 +50,11 @@ impl PhysicalCreateSchema { } impl ExecutableOperator for PhysicalCreateSchema { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new( "Create schema operator can only handle 1 partition", @@ -73,9 +73,9 @@ impl ExecutableOperator for PhysicalCreateSchema { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::CreateSchema(CreateSchemaPartitionState { create, })], @@ -83,36 +83,36 @@ impl ExecutableOperator for PhysicalCreateSchema { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::CreateSchema(state) => match state.create.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/create_table.rs b/crates/rayexec_execution/src/execution/operators/create_table.rs index e58aab9e7..3784096a1 100644 --- a/crates/rayexec_execution/src/execution/operators/create_table.rs +++ b/crates/rayexec_execution/src/execution/operators/create_table.rs @@ -6,7 +6,7 @@ use rayexec_proto::ProtoConv; use super::sink::{PartitionSink, SinkOperation, SinkOperator}; use super::util::barrier::PartitionBarrier; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateTableInfo; use crate::database::DatabaseContext; @@ -119,7 +119,7 @@ struct CreateTablePartitionSink { } impl PartitionSink for CreateTablePartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.create_table_if_has_fut().await?; self.wait_for_sink_if_none().await; diff --git a/crates/rayexec_execution/src/execution/operators/create_view.rs b/crates/rayexec_execution/src/execution/operators/create_view.rs index 02a595433..7a5bda5ea 100644 --- a/crates/rayexec_execution/src/execution/operators/create_view.rs +++ b/crates/rayexec_execution/src/execution/operators/create_view.rs @@ -8,15 +8,15 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateViewInfo; use crate::database::DatabaseContext; @@ -41,11 +41,11 @@ pub struct PhysicalCreateView { } impl ExecutableOperator for PhysicalCreateView { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new( "Create schema operator can only handle 1 partition", @@ -71,9 +71,9 @@ impl ExecutableOperator for PhysicalCreateView { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::CreateView(CreateViewPartitionState { create, })], @@ -81,36 +81,36 @@ impl ExecutableOperator for PhysicalCreateView { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical create view")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create view")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::CreateView(state) => match state.create.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/drop.rs b/crates/rayexec_execution/src/execution/operators/drop.rs index cf1d78fc8..c30a5a20a 100644 --- a/crates/rayexec_execution/src/execution/operators/drop.rs +++ b/crates/rayexec_execution/src/execution/operators/drop.rs @@ -9,15 +9,15 @@ use rayexec_proto::ProtoConv; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::drop::DropInfo; use crate::database::DatabaseContext; @@ -47,11 +47,11 @@ impl PhysicalDrop { } impl ExecutableOperator for PhysicalDrop { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new("Drop can only handle one partition")); } @@ -68,44 +68,44 @@ impl ExecutableOperator for PhysicalDrop { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::Drop(DropPartitionState { drop })], }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Drop(state) => match state.drop.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/empty.rs b/crates/rayexec_execution/src/execution/operators/empty.rs index 44ebb489e..e8adec2bc 100644 --- a/crates/rayexec_execution/src/execution/operators/empty.rs +++ b/crates/rayexec_execution/src/execution/operators/empty.rs @@ -5,16 +5,16 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; -use crate::execution::operators::InputOutputStates; +use crate::execution::operators::InputOutputStates2; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -29,14 +29,14 @@ pub struct EmptyPartitionState { pub struct PhysicalEmpty; impl ExecutableOperator for PhysicalEmpty { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| PartitionState::Empty(EmptyPartitionState { finished: false })) .collect(), @@ -44,38 +44,38 @@ impl ExecutableOperator for PhysicalEmpty { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical empty")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical empty")) } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Empty(state) => { if state.finished { - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } else { state.finished = true; - Ok(PollPull::Computed(Batch::empty_with_num_rows(1).into())) + Ok(PollPull2::Computed(Batch2::empty_with_num_rows(1).into())) } } other => panic!("inner join state is not building: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/filter.rs b/crates/rayexec_execution/src/execution/operators/filter.rs index 3315fcbf5..fa1da0533 100644 --- a/crates/rayexec_execution/src/execution/operators/filter.rs +++ b/crates/rayexec_execution/src/execution/operators/filter.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{OptionExt, Result}; use super::simple::{SimpleOperator, StatelessOperation}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; @@ -23,7 +23,7 @@ impl FilterOperation { } impl StatelessOperation for FilterOperation { - fn execute(&self, batch: Batch) -> Result { + fn execute(&self, batch: Batch2) -> Result { let selection = self.predicate.select(&batch)?; let batch = batch.select(Arc::new(selection)); // TODO: Select mut diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 46caf2618..c404b4179 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -2,11 +2,10 @@ use rayexec_error::Result; use super::hash_table::GroupAddress; use super::AggregateStates; -use crate::arrays::array::Array; -use crate::arrays::executor::physical_type::PhysicalType; +use crate::arrays::array::Array2; +use crate::arrays::executor::physical_type::PhysicalType2; use crate::arrays::executor::scalar::concat; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; -use crate::functions::aggregate::ChunkGroupAddressIter; /// Holds a chunk of value for the aggregate hash table. #[derive(Debug)] @@ -18,7 +17,7 @@ pub struct GroupChunk { /// All row hashes. pub hashes: Vec, /// Arrays making up the group values. - pub arrays: Vec, + pub arrays: Vec, /// Aggregate states we're keeping track of. pub aggregate_states: Vec, } @@ -27,7 +26,7 @@ impl GroupChunk { pub fn can_append( &self, new_groups: usize, - group_vals: impl ExactSizeIterator, + group_vals: impl ExactSizeIterator, ) -> bool { if self.num_groups + new_groups > DEFAULT_TARGET_BATCH_SIZE { return false; @@ -50,7 +49,7 @@ impl GroupChunk { /// states. pub fn append_group_values( &mut self, - group_vals: impl ExactSizeIterator, + group_vals: impl ExactSizeIterator, hashes: impl ExactSizeIterator, ) -> Result<()> { debug_assert_eq!(self.arrays.len(), group_vals.len()); @@ -69,7 +68,7 @@ impl GroupChunk { self.hashes.extend(hashes); for states in &mut self.aggregate_states { - states.states.new_states(new_groups); + states.states.new_groups(new_groups); } self.num_groups += new_groups; @@ -82,7 +81,7 @@ impl GroupChunk { /// `addrs` contains a list of group addresses we'll be using to map input /// rows to the state index. If and address is for a different chunk, that /// row will be skipped. - pub fn update_states(&mut self, inputs: &[Array], addrs: &[GroupAddress]) -> Result<()> { + pub fn update_states(&mut self, inputs: &[Array2], addrs: &[GroupAddress]) -> Result<()> { for agg_states in &mut self.aggregate_states { let input_cols: Vec<_> = agg_states .col_selection @@ -91,10 +90,11 @@ impl GroupChunk { .filter_map(|(selected, arr)| if selected { Some(arr) } else { None }) .collect(); - agg_states.states.update_states( - &input_cols, - ChunkGroupAddressIter::new(self.chunk_idx, addrs), - )?; + unimplemented!() + // agg_states.states.update_states2( + // &input_cols, + // ChunkGroupAddressIter::new(self.chunk_idx, addrs), + // )?; } Ok(()) @@ -110,10 +110,11 @@ impl GroupChunk { let own_state = &mut self.aggregate_states[agg_idx]; let other_state = &mut other.aggregate_states[agg_idx]; - own_state.states.combine( - &mut other_state.states, - ChunkGroupAddressIter::new(self.chunk_idx, addrs), - )?; + unimplemented!() + // own_state.states.combine( + // &mut other_state.states, + // ChunkGroupAddressIter::new(self.chunk_idx, addrs), + // )?; } Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index 74c781e5d..2446ed345 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -4,35 +4,35 @@ use rayexec_error::{not_implemented, Result}; use super::chunk::GroupChunk; use super::hash_table::GroupAddress; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalStorage, - PhysicalType, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, - PhysicalUtf8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalStorage2, + PhysicalType2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::{can_skip_validity_check, check_validity}; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; pub fn group_values_eq( - inputs: &[Array], + inputs: &[Array2], input_sel: &SelectionVector, chunks: &[GroupChunk], addresses: &[GroupAddress], @@ -69,8 +69,8 @@ pub fn group_values_eq( } fn compare_group_rows_eq( - arrays1: &[Array], - arrays2: &[Array], + arrays1: &[Array2], + arrays2: &[Array2], rows1: I1, rows2: I2, not_eq_rows: &mut BTreeSet, @@ -96,69 +96,73 @@ where } match array1.physical_type() { - PhysicalType::UntypedNull => compare_rows_eq::( + PhysicalType2::UntypedNull => compare_rows_eq::( array1, array2, rows1, rows2, not_eq_rows, )?, - PhysicalType::Boolean => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Boolean => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Int8 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Int16 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Int32 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Int64 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int128 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Int128 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::UInt8 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::UInt16 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::UInt32 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::UInt64 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt128 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::UInt128 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Float16 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Float32 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Float64 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Interval => compare_rows_eq::( + PhysicalType2::Interval => compare_rows_eq::( array1, array2, rows1, rows2, not_eq_rows, )?, - PhysicalType::Binary => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? - } - PhysicalType::Utf8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + PhysicalType2::Binary => compare_rows_eq::( + array1, + array2, + rows1, + rows2, + not_eq_rows, + )?, + PhysicalType2::Utf8 => { + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::List => { + PhysicalType2::List => { not_implemented!("Row compare list") } } @@ -173,14 +177,14 @@ where /// When a row is not equal, the row from the `rows1` iter will be inserted into /// `not_eq_rows`. fn compare_rows_eq<'a, S, I1, I2>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, rows1: I1, rows2: I2, not_eq_rows: &mut BTreeSet, ) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, as AddressableStorage>::T: PartialEq, I1: Iterator, I2: Iterator, diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index ff21e4169..787a3a3b3 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -1,14 +1,10 @@ -use std::sync::Arc; use rayexec_error::Result; use super::hash_table::HashTable; -use crate::arrays::array::Array; -use crate::arrays::executor::scalar::HashExecutor; -use crate::arrays::selection::SelectionVector; -use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; use crate::functions::aggregate::states::{AggregateGroupStates, OpaqueStatesMut}; -use crate::functions::aggregate::ChunkGroupAddressIter; /// And implementation of GroupedStates that buffers inputs to an aggregate in a /// hash table to ensure the aggregate is computed with distinct values. @@ -17,6 +13,8 @@ use crate::functions::aggregate::ChunkGroupAddressIter; pub struct DistinctGroupedStates { /// Distinct inputs per group. distinct_inputs: Vec>, + /// Index to begin draining at. + drain_idx: usize, /// The underlying states. /// /// These won't be initialized until we've received all distinct input. @@ -31,6 +29,7 @@ impl DistinctGroupedStates { distinct_inputs: Vec::new(), states, hash_buf: Vec::new(), + drain_idx: 0, } } } @@ -40,117 +39,131 @@ impl AggregateGroupStates for DistinctGroupedStates { OpaqueStatesMut(&mut self.distinct_inputs) } - fn new_states(&mut self, count: usize) { + fn new_groups(&mut self, count: usize) { // Hash tables created with empty aggregates. self.distinct_inputs .extend((0..count).map(|_| Some(HashTable::new(16, Vec::new())))); } - fn num_states(&self) -> usize { + fn num_groups(&self) -> usize { self.distinct_inputs.len() } - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()> { - // TODO: Would be cool not needing to do this. - let mappings: Vec<_> = mapping.collect(); - - // For each group we're tracking, select the rows from the input and - // insert into the group specific hash table. - for state_idx in 0..self.distinct_inputs.len() { - let row_sel = Arc::new(SelectionVector::from_iter(mappings.iter().filter_map( - |row_mapping| { - if row_mapping.to_state == state_idx { - Some(row_mapping.from_row) - } else { - None - } - }, - ))); - - let inputs: Vec<_> = inputs - .iter() - .map(|&arr| { - let mut arr = arr.clone(); - arr.select_mut(row_sel.clone()); - arr - }) - .collect(); - - let len = match inputs.first() { - Some(arr) => arr.logical_len(), - None => return Ok(()), - }; - - self.hash_buf.clear(); - self.hash_buf.resize(len, 0); - - HashExecutor::hash_many(&inputs, &mut self.hash_buf)?; - - // Insert into hash map with empty inputs. - self.distinct_inputs[state_idx] - .as_mut() - .expect("hash table to exist") - .insert(&inputs, &self.hash_buf, &[])?; - } - - Ok(()) + fn update_group_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()> { + unimplemented!() } + // fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { + // // TODO: Would be cool not needing to do this. + // let mappings: Vec<_> = mapping.collect(); + + // // For each group we're tracking, select the rows from the input and + // // insert into the group specific hash table. + // for state_idx in 0..self.distinct_inputs.len() { + // let row_sel = Arc::new(SelectionVector::from_iter(mappings.iter().filter_map( + // |&(from, to)| { + // if to == state_idx { + // Some(from) + // } else { + // None + // } + // }, + // ))); + + // let inputs: Vec<_> = inputs + // .iter() + // .map(|&arr| { + // let mut arr = arr.clone(); + // arr.select_mut(row_sel.clone()); + // arr + // }) + // .collect(); + + // let len = match inputs.first() { + // Some(arr) => arr.logical_len(), + // None => return Ok(()), + // }; + + // self.hash_buf.clear(); + // self.hash_buf.resize(len, 0); + + // HashExecutor::hash_many(&inputs, &mut self.hash_buf)?; + + // // Insert into hash map with empty inputs. + // self.distinct_inputs[state_idx] + // .as_mut() + // .expect("hash table to exist") + // .insert(&inputs, &self.hash_buf, &[])?; + // } + + // Ok(()) + // } + fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()> { let other_distinct_inputs = consume .opaque_states_mut() .downcast::>>()?; - for mapping in mapping { - let target = self.distinct_inputs[mapping.to_state].as_mut().unwrap(); - let consume = other_distinct_inputs[mapping.from_row].as_mut().unwrap(); + for (from, to) in selection.iter().zip(mapping.iter().copied()) { + let consume = other_distinct_inputs[from].as_mut().unwrap(); + let target = self.distinct_inputs[to].as_mut().unwrap(); target.merge(consume)?; } Ok(()) } - fn finalize(&mut self) -> Result { - // And now we actually create the states we need. - self.states.new_states(self.distinct_inputs.len()); - - let mut addresses_buf = Vec::new(); - - for (group_idx, hash_table) in self.distinct_inputs.iter_mut().enumerate() { - // Drain the hash table and inserting them into the newly created - // states. - let drain = hash_table.take().unwrap().into_drain(); - - for result in drain { - let batch = result?; - let len = batch.num_rows(); - // TODO: Prune group id column? - let arrays = batch.into_arrays(); - - // TODO: Bit jank, but works. We just assume we're working with - // chunk 0 always. - // - // I would like to have `GroupStates` be able to accept any - // iterator that produce row mappings, but can't really do that - // with dynamic dispatch. - addresses_buf.clear(); - addresses_buf.extend((0..len).map(|_| GroupAddress { - chunk_idx: 0, - row_idx: group_idx as u16, - })); - - let chunk_iter = ChunkGroupAddressIter::new(0, &addresses_buf); - - let inputs: Vec<_> = arrays.iter().collect(); // TODO - self.states.update_states(&inputs, chunk_iter)?; - } - } - - // Now we can actually drain the states. - self.states.finalize() + // fn finalize2(&mut self) -> Result { + // // And now we actually create the states we need. + // self.states.new_groups(self.distinct_inputs.len()); + + // let mut addresses_buf = Vec::new(); + + // for (group_idx, hash_table) in self.distinct_inputs.iter_mut().enumerate() { + // // Drain the hash table and inserting them into the newly created + // // states. + // let drain = hash_table.take().unwrap().into_drain(); + + // for result in drain { + // let batch = result?; + // let len = batch.num_rows(); + // // TODO: Prune group id column? + // let arrays = batch.into_arrays(); + + // // TODO: Bit jank, but works. We just assume we're working with + // // chunk 0 always. + // // + // // I would like to have `GroupStates` be able to accept any + // // iterator that produce row mappings, but can't really do that + // // with dynamic dispatch. + // addresses_buf.clear(); + // addresses_buf.extend((0..len).map(|_| GroupAddress { + // chunk_idx: 0, + // row_idx: group_idx as u16, + // })); + + // let chunk_iter = ChunkGroupAddressIter::new(0, &addresses_buf); + + // let inputs: Vec<_> = arrays.iter().collect(); // TODO + // self.states.update_states2(&inputs, chunk_iter)?; + // } + // } + + // // Now we can actually drain the states. + // self.states.finalize2() + // } + + fn drain(&mut self, output: &mut Array) -> Result { + unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs index af73e0466..c71829984 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::hash_table::HashTable; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Drains a hash table. /// @@ -15,7 +15,7 @@ pub struct HashTableDrain { } impl HashTableDrain { - fn next_inner(&mut self) -> Result> { + fn next_inner(&mut self) -> Result> { if self.drain_idx >= self.table.chunks.len() { return Ok(None); } @@ -23,22 +23,23 @@ impl HashTableDrain { let chunk = &mut self.table.chunks[self.drain_idx]; self.drain_idx += 1; - // Computed aggregate columns. - let results = chunk - .aggregate_states - .iter_mut() - .map(|s| s.states.finalize()) - .collect::>>()?; + unimplemented!() + // // Computed aggregate columns. + // let results = chunk + // .aggregate_states + // .iter_mut() + // .map(|s| s.states.finalize2()) + // .collect::>>()?; - // Chunk arrays includes the GROUP ID column (last). - let batch = Batch::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; + // // Chunk arrays includes the GROUP ID column (last). + // let batch = Batch2::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; - Ok(Some(batch)) + // Ok(Some(batch)) } } impl Iterator for HashTableDrain { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { self.next_inner().transpose() diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs index f6ce18a95..43b2e655d 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs @@ -8,7 +8,7 @@ use super::compare::group_values_eq; use super::drain::HashTableDrain; use super::entry::EntryKey; use super::Aggregate; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::selection::SelectionVector; const LOAD_FACTOR: f64 = 0.7; @@ -86,7 +86,7 @@ impl HashTable { self.entries.len() } - pub fn insert(&mut self, groups: &[Array], hashes: &[u64], inputs: &[Array]) -> Result<()> { + pub fn insert(&mut self, groups: &[Array2], hashes: &[u64], inputs: &[Array2]) -> Result<()> { // Find and create groups as needed. self.find_or_create_groups(groups, hashes)?; @@ -155,7 +155,7 @@ impl HashTable { } } - fn find_or_create_groups(&mut self, groups: &[Array], hashes: &[u64]) -> Result<()> { + fn find_or_create_groups(&mut self, groups: &[Array2], hashes: &[u64]) -> Result<()> { let num_inputs = hashes.len(); // Resize addresses, this will be where we store all the group @@ -308,7 +308,7 @@ impl HashTable { // Initialize the states. for state in &mut states { - state.states.new_states(num_new_groups); + state.states.new_groups(num_new_groups); } let chunk = GroupChunk { @@ -519,8 +519,8 @@ mod tests { #[test] fn insert_simple() { - let groups = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = [4, 5, 4]; // Hashes for group values. @@ -535,12 +535,12 @@ mod tests { fn insert_chunk_append() { // Assumes knowledge of internals. - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes1 = [4, 5, 4]; - let groups2 = [Array::from_iter(["g1", "g2", "g3"])]; - let inputs2 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups2 = [Array2::from_iter(["g1", "g2", "g3"])]; + let inputs2 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes2 = [4, 5, 6]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -554,8 +554,8 @@ mod tests { #[test] fn insert_hash_collision() { - let groups = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = [4, 4, 4]; @@ -570,8 +570,8 @@ mod tests { fn insert_require_resize() { // 17 unique groups (> initial 16 capacity) - let groups = [Array::from_iter(0..17)]; - let inputs = [Array::from_iter(0_i64..17_i64)]; + let groups = [Array2::from_iter(0..17)]; + let inputs = [Array2::from_iter(0_i64..17_i64)]; let hashes = vec![44; 17]; // All hashes collide. @@ -587,8 +587,8 @@ mod tests { // 33 unique groups, more than twice initial capacity. Caught bug where // resize by doubling didn't increase capacity enough. - let groups = [Array::from_iter(0..33)]; - let inputs = [Array::from_iter(0_i64..33_i64)]; + let groups = [Array2::from_iter(0..33)]; + let inputs = [Array2::from_iter(0_i64..33_i64)]; let hashes = vec![44; 33]; // All hashes collide. @@ -601,8 +601,8 @@ mod tests { #[test] fn merge_simple() { - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -610,8 +610,8 @@ mod tests { let mut t1 = make_hash_table(agg.clone()); t1.insert(&groups1, &hashes, &inputs1).unwrap(); - let groups2 = [Array::from_iter(["g3", "g2", "g1"])]; - let inputs2 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups2 = [Array2::from_iter(["g3", "g2", "g1"])]; + let inputs2 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = vec![6, 5, 4]; @@ -627,8 +627,8 @@ mod tests { fn merge_non_empty_then_merge_empty() { // Tests that we properly resize internal buffers to account for merging // in empty hash tables after already merging in non-empty hash tables. - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -638,8 +638,8 @@ mod tests { t1.insert(&groups1, &hashes, &inputs1).unwrap(); // Second hash table, not empty - let groups2 = [Array::from_iter(["g1", "g2"])]; - let inputs2 = [Array::from_iter::<[i64; 2]>([4, 5])]; + let groups2 = [Array2::from_iter(["g1", "g2"])]; + let inputs2 = [Array2::from_iter::<[i64; 2]>([4, 5])]; let hashes = vec![4, 5]; let mut t2 = make_hash_table(agg.clone()); t2.insert(&groups2, &hashes, &inputs2).unwrap(); diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index b6c7ea605..738b51ef9 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -15,14 +15,14 @@ use hash_table::HashTable; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use super::{ExecutionStates, InputOutputStates, PollFinalize}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use super::{ExecutionStates2, InputOutputStates2, PollFinalize2}; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalU64; -use crate::arrays::executor::scalar::{HashExecutor, UnaryExecutor}; +use crate::arrays::executor::physical_type::PhysicalU64_2; +use crate::arrays::executor::scalar::{HashExecutor, UnaryExecutor2}; use crate::arrays::scalar::ScalarValue; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -32,8 +32,8 @@ use crate::execution::operators::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalAggregateExpression; @@ -223,11 +223,11 @@ impl PhysicalHashAggregate { } impl ExecutableOperator for PhysicalHashAggregate { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; // Create column selection bitmaps for each aggregate expression. These @@ -287,19 +287,19 @@ impl ExecutableOperator for PhysicalHashAggregate { partition_states.push(partition_state); } - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(operator_state), - partition_states: InputOutputStates::OneToOne { partition_states }, + partition_states: InputOutputStates2::OneToOne { partition_states }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -310,7 +310,7 @@ impl ExecutableOperator for PhysicalHashAggregate { self.insert_batch_agg_hash_table(state, batch)?; // Aggregates don't produce anything until it's been finalized. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } HashAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to push to partition that should be producing batches", @@ -318,12 +318,12 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -368,7 +368,7 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } HashAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to finalize a partition that's producing output", @@ -376,12 +376,12 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -402,7 +402,7 @@ impl ExecutableOperator for PhysicalHashAggregate { // Still need to wait for some input partitions to complete. Store our // waker and come back later. shared_state.pull_waker = Some(cx.waker().clone()); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // Othewise let's build the final table. Note that @@ -422,7 +422,7 @@ impl ExecutableOperator for PhysicalHashAggregate { let batch = match state.hashtable_drain.as_mut().unwrap().next() { Some(Ok(batch)) => batch, Some(Err(e)) => return Err(e), - None => return Ok(PollPull::Exhausted), + None => return Ok(PollPull2::Exhausted), }; // Prune off GROUP ID column, generate appropriate GROUPING @@ -440,7 +440,7 @@ impl ExecutableOperator for PhysicalHashAggregate { buffer: PrimitiveBuffer::with_len(group_ids.logical_len()), }; - let array = UnaryExecutor::execute::( + let array = UnaryExecutor2::execute::( &group_ids, builder, |id, buf| { @@ -466,14 +466,14 @@ impl ExecutableOperator for PhysicalHashAggregate { arrays.push(array); } - let batch = Batch::try_new(arrays)?; + let batch = Batch2::try_new(arrays)?; - Ok(PollPull::Computed(ComputedBatches::Single(batch))) + Ok(PollPull2::Computed(ComputedBatches::Single(batch))) } HashAggregatePartitionState::Aggregating(state) => { let mut shared = operator_state.output_states[state.partition_idx].lock(); shared.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } @@ -484,7 +484,7 @@ impl PhysicalHashAggregate { fn insert_batch_agg_hash_table( &self, state: &mut AggregatingPartitionState, - batch: Batch, + batch: Batch2, ) -> Result<()> { if batch.num_rows() == 0 { return Ok(()); @@ -514,7 +514,7 @@ impl PhysicalHashAggregate { state.hash_buf.resize(num_rows, 0); state.partitions_idx_buf.resize(num_rows, 0); - let mut masked_grouping_columns: Vec = Vec::with_capacity(grouping_columns.len()); + let mut masked_grouping_columns: Vec = Vec::with_capacity(grouping_columns.len()); // Reused to select hashes per partition. let mut partition_hashes = Vec::new(); @@ -527,7 +527,7 @@ impl PhysicalHashAggregate { for (col_idx, col_is_null) in null_mask.iter().enumerate() { if col_is_null { // Create column with all nulls but retain the datatype. - let null_col = Array::new_typed_null_array( + let null_col = Array2::new_typed_null_array( grouping_columns[col_idx].datatype().clone(), num_rows, )?; diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs index 355b64274..b16ef7cea 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs @@ -3,8 +3,8 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; use crate::expr::physical::PhysicalScalarExpression; @@ -45,7 +45,7 @@ impl fmt::Display for HashJoinCondition { #[derive(Debug)] pub struct LeftPrecomputedJoinCondition { /// Precomputed results for left batches. - pub left_precomputed: Vec, + pub left_precomputed: Vec, pub left: PhysicalScalarExpression, pub right: PhysicalScalarExpression, pub function: PlannedScalarFunction, @@ -77,9 +77,9 @@ pub struct LeftPrecomputedJoinConditions { impl LeftPrecomputedJoinConditions { /// Compute the left side of the condition using the provided batch as /// input. - pub fn precompute_for_left_batch(&mut self, left: &Batch) -> Result<()> { + pub fn precompute_for_left_batch(&mut self, left: &Batch2) -> Result<()> { for condition in &mut self.conditions { - let precomputed = condition.left.eval(left)?; + let precomputed = condition.left.eval2(left)?; condition.left_precomputed.push(precomputed.into_owned()) } @@ -96,7 +96,7 @@ impl LeftPrecomputedJoinConditions { left_batch_idx: usize, left_row_sel: SelectionVector, right_row_sel: SelectionVector, - right: &Batch, + right: &Batch2, ) -> Result<(SelectionVector, SelectionVector)> { assert_eq!(left_row_sel.num_rows(), right_row_sel.num_rows()); @@ -121,20 +121,20 @@ impl LeftPrecomputedJoinConditions { left_precomputed.select_mut(left_row_sel.clone()); // Eval the right side. - let right_arr = condition.right.eval(&selected_right)?; + let right_arr = condition.right.eval2(&selected_right)?; // Compute join condition result. let result = condition .function .function_impl - .execute(&[&left_precomputed, right_arr.as_ref()])?; + .execute2(&[&left_precomputed, right_arr.as_ref()])?; results.push(result); } // AND the results. let refs: Vec<_> = results.iter().collect(); - let out = AndImpl.execute(&refs)?; + let out = AndImpl.execute2(&refs)?; // Generate a selection for the left and right selections. let mut select_the_selection = SelectionVector::with_capacity(out.logical_len()); diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs index a034c9964..6d0507f65 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs @@ -11,7 +11,7 @@ use super::condition::{ LeftPrecomputedJoinConditions, }; use super::partition_hash_table::{PartitionHashTable, RowKey}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::selection::SelectionVector; use crate::execution::operators::util::outer_join_tracker::{ @@ -26,7 +26,7 @@ use crate::execution::operators::util::outer_join_tracker::{ /// side. pub struct GlobalHashTable { /// All collected batches. - batches: Vec, + batches: Vec, /// Conditions we're joining on. conditions: LeftPrecomputedJoinConditions, /// Hash table pointing to a row. @@ -119,17 +119,17 @@ impl GlobalHashTable { } } - pub fn collected_batches(&self) -> &[Batch] { + pub fn collected_batches(&self) -> &[Batch2] { &self.batches } /// Probe the table. pub fn probe( &self, - right: &Batch, + right: &Batch2, hashes: &[u64], mut left_outer_tracker: Option<&mut LeftOuterJoinTracker>, - ) -> Result> { + ) -> Result> { // Track per-batch row indices that match the input columns. // // The value is a vec of (left_idx, right_idx) pairs pointing to rows in @@ -228,7 +228,7 @@ impl GlobalHashTable { let right_cols = right.select(Arc::new(right_row_sel)).into_arrays(); // Create final batch. - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; batches.push(batch); } diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs index f9cf3d145..5b385d0c2 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs @@ -16,15 +16,15 @@ use super::util::outer_join_tracker::{LeftOuterJoinDrainState, LeftOuterJoinTrac use super::{ ComputedBatches, ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::executor::scalar::HashExecutor; use crate::database::DatabaseContext; @@ -32,7 +32,7 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::logical::logical_join::JoinType; #[derive(Debug)] -pub struct HashJoinBuildPartitionState { +pub struct HashJoinBuildPartitionState2 { /// Hash table this partition will be writing to. /// /// Optional to enable moving from the local to global state once this @@ -43,7 +43,7 @@ pub struct HashJoinBuildPartitionState { } #[derive(Debug)] -pub struct HashJoinProbePartitionState { +pub struct HashJoinProbePartitionState2 { /// Index of this partition. partition_idx: usize, /// The final output table. If None, the global state should be checked to @@ -188,11 +188,11 @@ impl PhysicalHashJoin { } impl ExecutableOperator for PhysicalHashJoin { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Determine if this is what we want. let build_partitions = partitions[0]; let probe_partitions = partitions[0]; @@ -214,7 +214,7 @@ impl ExecutableOperator for PhysicalHashJoin { let build_states: Vec<_> = (0..build_partitions) .map(|_| { - PartitionState::HashJoinBuild(HashJoinBuildPartitionState { + PartitionState::HashJoinBuild2(HashJoinBuildPartitionState2 { local_hashtable: Some(PartitionHashTable::new(&self.conditions)), hash_buf: Vec::new(), }) @@ -223,7 +223,7 @@ impl ExecutableOperator for PhysicalHashJoin { let probe_states: Vec<_> = (0..probe_partitions) .map(|idx| { - PartitionState::HashJoinProbe(HashJoinProbePartitionState { + PartitionState::HashJoinProbe2(HashJoinProbePartitionState2 { partition_idx: idx, global: None, hash_buf: Vec::new(), @@ -237,33 +237,33 @@ impl ExecutableOperator for PhysicalHashJoin { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::HashJoin(operator_state)), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![build_states, probe_states], pull_states: Self::PROBE_SIDE_INPUT_INDEX, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { match partition_state { - PartitionState::HashJoinBuild(state) => { + PartitionState::HashJoinBuild2(state) => { self.insert_into_local_table(state, batch)?; - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } - PartitionState::HashJoinProbe(state) => { + PartitionState::HashJoinProbe2(state) => { // If we have pending output, we need to wait for that to get // pulled before trying to compute additional batches. if !state.buffered_output.is_empty() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let operator_state = match operator_state { @@ -280,7 +280,7 @@ impl ExecutableOperator for PhysicalHashJoin { // waker to come back later. if shared.build_inputs_remaining != 0 { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let global = match shared.global_hash_table.as_ref() { @@ -290,7 +290,7 @@ impl ExecutableOperator for PhysicalHashJoin { // thread. Come back when it's ready. shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } }; @@ -314,7 +314,7 @@ impl ExecutableOperator for PhysicalHashJoin { state.hash_buf.resize(batch.num_rows(), 0); for (idx, equality) in self.equalities.iter().enumerate() { - let result = equality.right.eval(&batch)?; + let result = equality.right.eval2(&batch)?; if idx == 0 { HashExecutor::hash_no_combine(&result, &mut state.hash_buf)?; @@ -334,27 +334,27 @@ impl ExecutableOperator for PhysicalHashJoin { state.buffered_output = ComputedBatches::new(batches); if state.buffered_output.is_empty() { // No batches joined, keep pushing to this operator. - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { - PartitionState::HashJoinBuild(state) => { + PartitionState::HashJoinBuild2(state) => { let mut shared = match operator_state { OperatorState::HashJoin(state) => state.inner.lock(), other => panic!("invalid operator state: {other:?}"), @@ -413,9 +413,9 @@ impl ExecutableOperator for PhysicalHashJoin { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - PartitionState::HashJoinProbe(state) => { + PartitionState::HashJoinProbe2(state) => { let mut shared = match operator_state { OperatorState::HashJoin(state) => state.inner.lock(), other => panic!("invalid operator state: {other:?}"), @@ -429,7 +429,7 @@ impl ExecutableOperator for PhysicalHashJoin { // left side. if shared.build_inputs_remaining != 0 { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } // It's possible for this partition not have this if we pushed @@ -443,7 +443,7 @@ impl ExecutableOperator for PhysicalHashJoin { } None => { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } } @@ -485,21 +485,21 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { - PartitionState::HashJoinProbe(state) => state, - PartitionState::HashJoinBuild(_) => { + PartitionState::HashJoinProbe2(state) => state, + PartitionState::HashJoinBuild2(_) => { // We should only be pulling with the "probe" state. The "build" // state acts as a sink into the operator. panic!("should not pull with a build state") @@ -515,7 +515,7 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollPull::Computed(computed)) + Ok(PollPull2::Computed(computed)) } else { // No batches computed, check if we're done. if state.input_finished { @@ -530,7 +530,7 @@ impl ExecutableOperator for PhysicalHashJoin { if shared.probe_inputs_remaining != 0 { // Global state does not yet have all inputs. Need to wait. shared.probe_drain_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } let start_idx = state.partition_idx; @@ -565,26 +565,26 @@ impl ExecutableOperator for PhysicalHashJoin { if matches!(self.join_type, JoinType::LeftMark { .. }) { // Mark drain match drain_state.drain_mark_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } else if matches!(self.join_type, JoinType::Semi) { // Semi drain match drain_state.drain_semi_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } else { // Normal left drain match drain_state.drain_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } } // We're done. - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // No batch available, come back later. @@ -595,7 +595,7 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } @@ -604,8 +604,8 @@ impl PhysicalHashJoin { /// Inserts a batch into a partition-local hash table. fn insert_into_local_table( &self, - state: &mut HashJoinBuildPartitionState, - batch: Batch, + state: &mut HashJoinBuildPartitionState2, + batch: Batch2, ) -> Result<()> { // Compute left hashes on equality conditions. @@ -613,7 +613,7 @@ impl PhysicalHashJoin { state.hash_buf.resize(batch.num_rows(), 0); for (idx, equality) in self.equalities.iter().enumerate() { - let result = equality.left.eval(&batch)?; + let result = equality.left.eval2(&batch)?; if idx == 0 { HashExecutor::hash_no_combine(&result, &mut state.hash_buf)?; diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs index 22b42f751..a6cfd1d50 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs @@ -4,7 +4,7 @@ use hashbrown::raw::RawTable; use rayexec_error::Result; use super::condition::{HashJoinCondition, LeftPrecomputedJoinConditions}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Points to a row in the hash table. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -17,7 +17,7 @@ pub struct RowKey { pub struct PartitionHashTable { /// All collected batches. - pub batches: Vec, + pub batches: Vec, /// Conditions we're joining on. pub conditions: LeftPrecomputedJoinConditions, /// Hash table pointing to a row. @@ -39,7 +39,7 @@ impl PartitionHashTable { /// /// `hash_indices` indicates which columns in the batch was used to compute /// the hashes. - pub fn insert_batch(&mut self, batch: Batch, hashes: &[u64]) -> Result<()> { + pub fn insert_batch(&mut self, batch: Batch2, hashes: &[u64]) -> Result<()> { assert_eq!(batch.num_rows(), hashes.len()); self.conditions.precompute_for_left_batch(&batch)?; diff --git a/crates/rayexec_execution/src/execution/operators/limit.rs b/crates/rayexec_execution/src/execution/operators/limit.rs index ee0fca6ff..3f6fe68df 100644 --- a/crates/rayexec_execution/src/execution/operators/limit.rs +++ b/crates/rayexec_execution/src/execution/operators/limit.rs @@ -5,15 +5,15 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -29,7 +29,7 @@ pub struct LimitPartitionState { remaining_count: usize, /// A buffered batch. - buffer: Option, + buffer: Option, /// Waker on pull side if no batch is ready. pull_waker: Option, @@ -62,16 +62,16 @@ impl PhysicalLimit { } impl ExecutableOperator for PhysicalLimit { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions) .map(|_| { PartitionState::Limit(LimitPartitionState { @@ -88,13 +88,13 @@ impl ExecutableOperator for PhysicalLimit { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -102,7 +102,7 @@ impl ExecutableOperator for PhysicalLimit { if state.buffer.is_some() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let batch = if state.remaining_offset > 0 { @@ -110,7 +110,7 @@ impl ExecutableOperator for PhysicalLimit { // batch, and keep asking for more input. if state.remaining_offset >= batch.num_rows() { state.remaining_offset -= batch.num_rows(); - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } // Otherwise we have to slice the batch at the offset point. @@ -148,18 +148,18 @@ impl ExecutableOperator for PhysicalLimit { // instead the partition pipeline will immediately start to pull // from this operator. state.finished = true; - Ok(PollPush::Break) + Ok(PollPush2::Break) } else { - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -170,31 +170,31 @@ impl ExecutableOperator for PhysicalLimit { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), }; match state.buffer.take() { - Some(batch) => Ok(PollPull::Computed(batch.into())), + Some(batch) => Ok(PollPull2::Computed(batch.into())), None => { if state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } state.pull_waker = Some(cx.waker().clone()); if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } @@ -234,20 +234,20 @@ mod tests { use super::*; use crate::arrays::scalar::ScalarValue; - use crate::execution::operators::test_util::{ + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::{ logical_value, make_i32_batch, - test_database_context, unwrap_poll_pull_batch, TestWakerContext, }; fn create_states(operator: &PhysicalLimit, partitions: usize) -> Vec { let context = test_database_context(); - let states = operator.create_states(&context, vec![partitions]).unwrap(); + let states = operator.create_states2(&context, vec![partitions]).unwrap(); match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, other => panic!("invalid states: {other:?}"), } } @@ -268,7 +268,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push our first batch. let push_cx = TestWakerContext::new(); @@ -280,7 +280,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Pull side should have been woken. assert_eq!(1, pull_cx.wake_count()); @@ -300,7 +300,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); // We did _not_ store a new pull waker, the current count for the pull // waker should still be one. @@ -340,7 +340,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -360,7 +360,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); // Pull part of next batch. let poll_pull = pull_cx @@ -395,7 +395,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); // Keep pushing... let poll_push = push_cx @@ -406,7 +406,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -435,7 +435,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -446,6 +446,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/materialize.rs b/crates/rayexec_execution/src/execution/operators/materialize.rs index d4e7510f1..0c8f04d0b 100644 --- a/crates/rayexec_execution/src/execution/operators/materialize.rs +++ b/crates/rayexec_execution/src/execution/operators/materialize.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result}; use super::sink::{PartitionSink, SinkOperation}; use super::source::{PartitionSource, SourceOperation}; use super::util::broadcast::{BroadcastChannel, BroadcastReceiver}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::logical::binder::bind_context::MaterializationRef; @@ -131,7 +131,7 @@ pub struct MaterializedDataPartitionSource { } impl PartitionSource for MaterializedDataPartitionSource { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { let fut = self.recv.recv(); Box::pin(async move { Ok(fut.await) }) } @@ -143,7 +143,7 @@ pub struct MaterializedDataPartitionSink { } impl PartitionSink for MaterializedDataPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.sender.send(batch); Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 1ceb06bcb..7ec92eac5 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -30,10 +30,15 @@ pub mod unnest; pub mod values; pub mod window; +pub mod batch_collection; +pub mod physical_filter; +pub mod physical_hash_join; +pub mod physical_project; + pub(crate) mod util; #[cfg(test)] -mod test_util; +mod testutil; use std::fmt::Debug; use std::sync::Arc; @@ -49,17 +54,19 @@ use empty::PhysicalEmpty; use filter::{FilterOperation, PhysicalFilter}; use hash_aggregate::PhysicalHashAggregate; use hash_join::{ - HashJoinBuildPartitionState, + HashJoinBuildPartitionState2, HashJoinOperatorState, - HashJoinProbePartitionState, + HashJoinProbePartitionState2, PhysicalHashJoin, }; use insert::PhysicalInsert; use limit::PhysicalLimit; use materialize::{MaterializeSourceOperation, MaterializedSinkOperation}; use nl_join::PhysicalNestedLoopJoin; -use project::{PhysicalProject, ProjectOperation}; -use rayexec_error::{not_implemented, OptionExt, Result}; +use physical_filter::FilterPartitionState; +use physical_project::ProjectPartitionState; +use project::{PhysicalProject2, ProjectOperation}; +use rayexec_error::{not_implemented, OptionExt, RayexecError, Result}; use round_robin::PhysicalRoundRobinRepartition; use scan::{PhysicalScan, ScanPartitionState}; use simple::SimpleOperator; @@ -101,7 +108,8 @@ use self::sort::gather_sort::{ use self::sort::scatter_sort::ScatterSortPartitionState; use self::values::ValuesPartitionState; use super::computed_batch::ComputedBatches; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; +use crate::arrays::batch_exp::Batch; use crate::database::DatabaseContext; use crate::engine::result::ResultSink; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -111,12 +119,15 @@ use crate::proto::DatabaseProtoConv; // Current size: 264 bytes #[derive(Debug)] pub enum PartitionState { + Project(ProjectPartitionState), + Filter(FilterPartitionState), + HashAggregate(HashAggregatePartitionState), UngroupedAggregate(UngroupedAggregatePartitionState), NestedLoopJoinBuild(NestedLoopJoinBuildPartitionState), NestedLoopJoinProbe(NestedLoopJoinProbePartitionState), - HashJoinBuild(HashJoinBuildPartitionState), - HashJoinProbe(HashJoinProbePartitionState), + HashJoinBuild2(HashJoinBuildPartitionState2), + HashJoinProbe2(HashJoinProbePartitionState2), Values(ValuesPartitionState), Sink(SinkPartitionState), Source(SourcePartitionState), @@ -156,13 +167,113 @@ pub enum OperatorState { None, } +/// Poll result for operator execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PollExecute { + /// Operator accepted input and wrote its output to the output batch. + /// + /// The next poll should be with a new input batch. + Ready, + /// Push pending. Waker stored, re-execute with the exact same state. + Pending, + /// Operator accepted as much input at can handle. Don't provide any + /// additional input. + Break, + /// Operator needs more input before it'll produce any meaningful output. + NeedsMore, + /// Operator has more output. Call again with the same input batch. + HasMore, + /// No more output. + Exhausted, +} + +/// Poll result for operator finalization. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PollFinalize { + /// Operator finalized, execution of this operator finished. + /// + /// `poll_execute` will not be called after this is returned. + Finalized, + /// This operator needs to be drained. + /// + /// `poll_execute` will be called with empty input batches until the + /// opperator indicates it's been exhausted. + NeedsDrain, + /// Finalize pending, re-execute with the same state. + Pending, +} + +#[derive(Debug)] +pub enum PartitionAndOperatorStates { + /// Operators that have a single input/output. + Branchless { + /// Global operator state. + operator_state: OperatorState, + /// State per-partition. + partition_states: Vec, + }, + /// Operators that produce 1 or more output branches. + /// + /// Mostly for materializations. + BranchingOutput { + /// Global operator state. + operator_state: OperatorState, + /// Single set of input states. + inputs_states: Vec, + /// Multiple sets of output states. + output_states: Vec>, + }, + /// Operators that have two children, with this operator acting as the + /// "sink" for one child. + /// + /// For joins, the build side is the terminating input, while the probe side + /// is non-terminating. + TerminatingInput { + /// Global operator state. + operator_state: OperatorState, + /// States for the input that is non-terminating. + nonterminating_states: Vec, + /// States for the input that is terminated by this operator. + terminating_states: Vec, + }, +} + +impl PartitionAndOperatorStates { + pub fn branchless_into_states(self) -> Result<(OperatorState, Vec)> { + match self { + Self::Branchless { + operator_state, + partition_states, + } => Ok((operator_state, partition_states)), + Self::BranchingOutput { .. } => Err(RayexecError::new( + "Expected branchless states, got branching output", + )), + Self::TerminatingInput { .. } => Err(RayexecError::new( + "Expected branchless states, got terminating input", + )), + } + } +} + +#[derive(Debug)] +pub struct ExecuteInOutState<'a> { + /// Input batch being pushed to the operator. + /// + /// May be None for operators that are only producing output. + input: Option<&'a mut Batch>, + /// Output batch the operator should write to. + /// + /// May be None for operators that only consume batches. + output: Option<&'a mut Batch>, +} + /// Result of a push to an operator. /// /// An operator may not be ready to accept input either because it's waiting on /// something else to complete (e.g. the right side of a join needs to the left /// side to complete first) or some internal buffer is full. #[derive(Debug, PartialEq)] -pub enum PollPush { +pub enum PollPush2 { /// Batch was successfully pushed. Pushed, @@ -170,7 +281,7 @@ pub enum PollPush { /// /// A waker will be registered for a later wakeup. This same batch should be /// pushed at that time. - Pending(Batch), + Pending(Batch2), /// This operator requires no more input. /// @@ -184,7 +295,7 @@ pub enum PollPush { /// Result of a pull from a Source. #[derive(Debug, PartialEq)] -pub enum PollPull { +pub enum PollPull2 { /// Successfully received computed results. Computed(ComputedBatches), @@ -199,14 +310,14 @@ pub enum PollPull { } #[derive(Debug, PartialEq)] -pub enum PollFinalize { +pub enum PollFinalize2 { Finalized, Pending, } /// Describes the relationships of partition states for operators. #[derive(Debug)] -pub enum InputOutputStates { +pub enum InputOutputStates2 { /// Input and output partition states have a one-to-one mapping. /// /// The states used for pushing to an operator are the same states used to @@ -262,54 +373,90 @@ pub enum InputOutputStates { /// States generates from an operator to use during execution. #[derive(Debug)] -pub struct ExecutionStates { +pub struct ExecutionStates2 { /// Global operator state. pub operator_state: Arc, /// Partition states for the operator. - pub partition_states: InputOutputStates, + pub partition_states: InputOutputStates2, } pub trait ExecutableOperator: Sync + Send + Debug + Explainable { + fn create_states( + &self, + context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + unimplemented!() + } + + fn poll_execute( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + unimplemented!() + } + + fn poll_finalize( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + unimplemented!() + } + /// Create execution states for this operator. /// /// `input_partitions` is the partitioning for each input that will be /// pushing batches through this operator. /// /// Joins are assumed to have two inputs. - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result; + ) -> Result { + unimplemented!() + } /// Try to push a batch for this partition. - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result; + batch: Batch2, + ) -> Result { + unimplemented!() + } /// Finalize pushing to partition. /// /// This indicates the operator will receive no more input for a given /// partition, allowing the operator to execution some finalization logic. - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result { + unimplemented!() + } /// Try to pull a batch for this partition. - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result { + unimplemented!() + } } // 144 bytes @@ -348,173 +495,173 @@ pub enum PhysicalOperator { } impl ExecutableOperator for PhysicalOperator { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.create_states(context, partitions), - Self::UngroupedAggregate(op) => op.create_states(context, partitions), - Self::Window(op) => op.create_states(context, partitions), - Self::NestedLoopJoin(op) => op.create_states(context, partitions), - Self::HashJoin(op) => op.create_states(context, partitions), - Self::Values(op) => op.create_states(context, partitions), - Self::ResultSink(op) => op.create_states(context, partitions), - Self::DynSink(op) => op.create_states(context, partitions), - Self::DynSource(op) => op.create_states(context, partitions), - Self::MaterializedSink(op) => op.create_states(context, partitions), - Self::MaterializedSource(op) => op.create_states(context, partitions), - Self::RoundRobin(op) => op.create_states(context, partitions), - Self::MergeSorted(op) => op.create_states(context, partitions), - Self::LocalSort(op) => op.create_states(context, partitions), - Self::Limit(op) => op.create_states(context, partitions), - Self::Union(op) => op.create_states(context, partitions), - Self::Filter(op) => op.create_states(context, partitions), - Self::Project(op) => op.create_states(context, partitions), - Self::Unnest(op) => op.create_states(context, partitions), - Self::Scan(op) => op.create_states(context, partitions), - Self::TableFunction(op) => op.create_states(context, partitions), - Self::TableInOut(op) => op.create_states(context, partitions), - Self::Insert(op) => op.create_states(context, partitions), - Self::CopyTo(op) => op.create_states(context, partitions), - Self::CreateTable(op) => op.create_states(context, partitions), - Self::CreateSchema(op) => op.create_states(context, partitions), - Self::CreateView(op) => op.create_states(context, partitions), - Self::Drop(op) => op.create_states(context, partitions), - Self::Empty(op) => op.create_states(context, partitions), - Self::BatchResizer(op) => op.create_states(context, partitions), + Self::HashAggregate(op) => op.create_states2(context, partitions), + Self::UngroupedAggregate(op) => op.create_states2(context, partitions), + Self::Window(op) => op.create_states2(context, partitions), + Self::NestedLoopJoin(op) => op.create_states2(context, partitions), + Self::HashJoin(op) => op.create_states2(context, partitions), + Self::Values(op) => op.create_states2(context, partitions), + Self::ResultSink(op) => op.create_states2(context, partitions), + Self::DynSink(op) => op.create_states2(context, partitions), + Self::DynSource(op) => op.create_states2(context, partitions), + Self::MaterializedSink(op) => op.create_states2(context, partitions), + Self::MaterializedSource(op) => op.create_states2(context, partitions), + Self::RoundRobin(op) => op.create_states2(context, partitions), + Self::MergeSorted(op) => op.create_states2(context, partitions), + Self::LocalSort(op) => op.create_states2(context, partitions), + Self::Limit(op) => op.create_states2(context, partitions), + Self::Union(op) => op.create_states2(context, partitions), + Self::Filter(op) => op.create_states2(context, partitions), + Self::Project(op) => op.create_states2(context, partitions), + Self::Unnest(op) => op.create_states2(context, partitions), + Self::Scan(op) => op.create_states2(context, partitions), + Self::TableFunction(op) => op.create_states2(context, partitions), + Self::TableInOut(op) => op.create_states2(context, partitions), + Self::Insert(op) => op.create_states2(context, partitions), + Self::CopyTo(op) => op.create_states2(context, partitions), + Self::CreateTable(op) => op.create_states2(context, partitions), + Self::CreateSchema(op) => op.create_states2(context, partitions), + Self::CreateView(op) => op.create_states2(context, partitions), + Self::Drop(op) => op.create_states2(context, partitions), + Self::Empty(op) => op.create_states2(context, partitions), + Self::BatchResizer(op) => op.create_states2(context, partitions), } } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::HashAggregate(op) => op.poll_push2(cx, partition_state, operator_state, batch), Self::UngroupedAggregate(op) => { - op.poll_push(cx, partition_state, operator_state, batch) + op.poll_push2(cx, partition_state, operator_state, batch) } - Self::Window(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::NestedLoopJoin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::HashJoin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Values(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::ResultSink(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::DynSink(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::DynSource(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::MaterializedSink(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::Window(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::NestedLoopJoin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::HashJoin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Values(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::ResultSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::DynSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::DynSource(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::MaterializedSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), Self::MaterializedSource(op) => { - op.poll_push(cx, partition_state, operator_state, batch) + op.poll_push2(cx, partition_state, operator_state, batch) } - Self::RoundRobin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::MergeSorted(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::LocalSort(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Limit(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Union(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Filter(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Project(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Unnest(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Scan(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::TableFunction(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::TableInOut(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Insert(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CopyTo(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateTable(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateSchema(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateView(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Drop(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Empty(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::BatchResizer(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::RoundRobin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::MergeSorted(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::LocalSort(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Limit(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Union(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Filter(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Project(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Unnest(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Scan(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::TableFunction(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::TableInOut(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Insert(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CopyTo(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateTable(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateSchema(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateView(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Drop(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Empty(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::BatchResizer(op) => op.poll_push2(cx, partition_state, operator_state, batch), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::HashAggregate(op) => op.poll_finalize_push2(cx, partition_state, operator_state), Self::UngroupedAggregate(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } - Self::Window(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::NestedLoopJoin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::HashJoin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Values(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::ResultSink(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::DynSink(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::DynSource(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::Window(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::NestedLoopJoin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::HashJoin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Values(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::ResultSink(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::DynSink(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::DynSource(op) => op.poll_finalize_push2(cx, partition_state, operator_state), Self::MaterializedSink(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } Self::MaterializedSource(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } - Self::RoundRobin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::MergeSorted(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::LocalSort(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Limit(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Union(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Filter(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Project(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Unnest(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Scan(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::TableFunction(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::TableInOut(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Insert(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CopyTo(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateTable(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateSchema(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateView(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Drop(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Empty(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::BatchResizer(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::RoundRobin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::MergeSorted(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::LocalSort(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Limit(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Union(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Filter(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Project(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Unnest(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Scan(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::TableFunction(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::TableInOut(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Insert(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CopyTo(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateTable(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateSchema(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateView(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Drop(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Empty(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::BatchResizer(op) => op.poll_finalize_push2(cx, partition_state, operator_state), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_pull(cx, partition_state, operator_state), - Self::UngroupedAggregate(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Window(op) => op.poll_pull(cx, partition_state, operator_state), - Self::NestedLoopJoin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::HashJoin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Values(op) => op.poll_pull(cx, partition_state, operator_state), - Self::ResultSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::DynSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::DynSource(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MaterializedSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MaterializedSource(op) => op.poll_pull(cx, partition_state, operator_state), - Self::RoundRobin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MergeSorted(op) => op.poll_pull(cx, partition_state, operator_state), - Self::LocalSort(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Limit(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Union(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Filter(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Project(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Unnest(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Scan(op) => op.poll_pull(cx, partition_state, operator_state), - Self::TableFunction(op) => op.poll_pull(cx, partition_state, operator_state), - Self::TableInOut(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Insert(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CopyTo(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateTable(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateSchema(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateView(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Drop(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Empty(op) => op.poll_pull(cx, partition_state, operator_state), - Self::BatchResizer(op) => op.poll_pull(cx, partition_state, operator_state), + Self::HashAggregate(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::UngroupedAggregate(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Window(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::NestedLoopJoin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::HashJoin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Values(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::ResultSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::DynSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::DynSource(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MaterializedSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MaterializedSource(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::RoundRobin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MergeSorted(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::LocalSort(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Limit(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Union(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Filter(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Project(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Unnest(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Scan(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::TableFunction(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::TableInOut(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Insert(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CopyTo(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateTable(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateSchema(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateView(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Drop(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Empty(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::BatchResizer(op) => op.poll_pull2(cx, partition_state, operator_state), } } } @@ -604,7 +751,7 @@ impl DatabaseProtoConv for PhysicalOperator { PhysicalOperator::Filter(PhysicalFilter::from_proto_ctx(op, context)?) } Value::Project(op) => { - PhysicalOperator::Project(PhysicalProject::from_proto_ctx(op, context)?) + PhysicalOperator::Project(PhysicalProject2::from_proto_ctx(op, context)?) } Value::Insert(op) => { PhysicalOperator::Insert(PhysicalInsert::from_proto_ctx(op, context)?) diff --git a/crates/rayexec_execution/src/execution/operators/nl_join.rs b/crates/rayexec_execution/src/execution/operators/nl_join.rs index aeb4af246..bdef053f8 100644 --- a/crates/rayexec_execution/src/execution/operators/nl_join.rs +++ b/crates/rayexec_execution/src/execution/operators/nl_join.rs @@ -6,18 +6,18 @@ use rayexec_error::Result; use super::util::outer_join_tracker::LeftOuterJoinTracker; use super::ComputedBatches; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; @@ -30,7 +30,7 @@ pub struct NestedLoopJoinBuildPartitionState { /// All batches on the build side for a single partition. /// /// For hash joins, this would be a partition-local hash map. - batches: Vec, + batches: Vec, } /// Partition-local state on the probe side. @@ -47,7 +47,7 @@ pub struct NestedLoopJoinProbePartitionState { /// All batches from all partitions received on the build side. /// /// Store in the probe side local state to avoid needing to lock. - all_batches: Arc>, + all_batches: Arc>, /// Bool for determining if `all_batches` has been populated from the global /// operator state. @@ -125,7 +125,7 @@ enum SharedOperatorState { Building { /// Build sides partitions write their batches here once they're done /// building. - batches: Vec, + batches: Vec, /// Number of partitions we're still waiting to complete on the build /// side. @@ -143,7 +143,7 @@ enum SharedOperatorState { /// Build is complete, we're now in the probing phase. Probing { /// All batches from all partitions. - batches: Arc>, + batches: Arc>, /// Union of all bitmaps across all partitions. /// @@ -209,11 +209,11 @@ impl PhysicalNestedLoopJoin { } impl ExecutableOperator for PhysicalNestedLoopJoin { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Allow different number of partitions on left & right? let num_partitions = partitions[0]; @@ -231,28 +231,28 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::NestedLoopJoin( NestedLoopJoinOperatorState::new(num_partitions, num_partitions), )), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![left_states, right_states], pull_states: 1, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { match partition_state { PartitionState::NestedLoopJoinBuild(state) => { state.batches.push(batch); - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } PartitionState::NestedLoopJoinProbe(state) => { // Check that the partition-local state has a reference to the @@ -273,7 +273,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { // ourselves for a later wakeup when the build is // complete. probe_side_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } SharedOperatorState::Probing { batches, @@ -298,7 +298,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { // it's empty. if !state.buffered.is_empty() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Do the join. @@ -318,7 +318,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { state.buffered = ComputedBatches::new(batches); if state.buffered.is_empty() { // Nothing produces, signal to push more. - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } // We have stuff in the buffer, wake up the puller. @@ -326,18 +326,18 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::NestedLoopJoinBuild(state) => { let operator_state = match operator_state { @@ -365,7 +365,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { } // And we're done. - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("inner join state is not building: {other:?}"), } @@ -375,32 +375,32 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::NestedLoopJoinProbe(state) => { let computed = state.buffered.take(); if computed.has_batches() { - Ok(PollPull::Computed(computed)) + Ok(PollPull2::Computed(computed)) } else if state.input_finished { - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } else { // We just gotta wait for more input. if let Some(waker) = state.push_waker.take() { waker.wake(); } state.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } PartitionState::NestedLoopJoinBuild(_) => { @@ -421,12 +421,12 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { /// result. fn cross_join( left_batch_idx: usize, - left: &Batch, - right: &Batch, + left: &Batch2, + right: &Batch2, filter_expr: Option<&PhysicalScalarExpression>, mut left_outer_tracker: Option<&mut LeftOuterJoinTracker>, _right_join: bool, -) -> Result> { +) -> Result> { let mut batches = Vec::with_capacity(left.num_rows() * right.num_rows()); // For each row in the left batch, join the entirety of right. @@ -439,7 +439,7 @@ fn cross_join( // Columns from the right, all rows. let right_columns = right.clone().into_arrays(); - let mut output = Batch::try_new(left_columns.into_iter().chain(right_columns))?; + let mut output = Batch2::try_new(left_columns.into_iter().chain(right_columns))?; // If we have a filter, apply it to the output batch. if let Some(filter_expr) = &filter_expr { diff --git a/crates/rayexec_execution/src/execution/operators/physical_filter.rs b/crates/rayexec_execution/src/execution/operators/physical_filter.rs new file mode 100644 index 000000000..b94f0b969 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_filter.rs @@ -0,0 +1,215 @@ +use std::task::Context; + +use rayexec_error::{OptionExt, Result}; + +use super::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionAndOperatorStates, + PartitionState, + PollExecute, + PollFinalize, +}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::database::DatabaseContext; +use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; +use crate::expr::physical::evaluator::ExpressionEvaluator; +use crate::expr::physical::PhysicalScalarExpression; + +#[derive(Debug)] +pub struct PhysicalFilter { + pub(crate) predicate: PhysicalScalarExpression, +} + +#[derive(Debug)] +pub struct FilterPartitionState { + evaluator: ExpressionEvaluator, + /// Boolean array for holding the output of the filter expression. + output: Array, + /// Selected indices buffer. + selection: Vec, +} + +impl ExecutableOperator for PhysicalFilter { + fn create_states( + &self, + _context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + let partition_states = (0..partitions) + .map(|_| { + Ok(PartitionState::Filter(FilterPartitionState { + evaluator: ExpressionEvaluator::try_new( + vec![self.predicate.clone()], + batch_size, + )?, + output: Array::new(&NopBufferManager, DataType::Boolean, batch_size)?, + selection: Vec::with_capacity(batch_size), + })) + }) + .collect::>>()?; + + Ok(PartitionAndOperatorStates::Branchless { + operator_state: OperatorState::None, + partition_states, + }) + } + + fn poll_execute( + &self, + _cx: &mut Context, + partition_state: &mut PartitionState, + _operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let state = match partition_state { + PartitionState::Filter(state) => state, + other => panic!("invalid state: {other:?}"), + }; + + let input = inout.input.required("batch input")?; + let output = inout.output.required("batch output")?; + + state.output.reset_for_write(&NopBufferManager)?; + state + .evaluator + .eval_single_expression(input, input.selection(), &mut state.output)?; + + state.selection.clear(); + UnaryExecutor::select( + &state.output, + Selection::linear(input.num_rows()), + &mut state.selection, + )?; + + output.clone_from(&NopBufferManager, input)?; + + if state.selection.len() != output.num_rows() { + // Only add selection if we're actually omitting rows. + output.select(&NopBufferManager, &state.selection)?; + } + + Ok(PollExecute::Ready) + } + + fn poll_finalize( + &self, + _cx: &mut Context, + _partition_state: &mut PartitionState, + _operator_state: &OperatorState, + ) -> Result { + Ok(PollFinalize::Finalized) + } +} + +impl Explainable for PhysicalFilter { + fn explain_entry(&self, _conf: ExplainConfig) -> ExplainEntry { + ExplainEntry::new("Filter").with_value("predicate", &self.predicate) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::exp::Array; + use crate::arrays::batch_exp::Batch; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_batches_eq; + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::wrapper::OperatorWrapper; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + + #[test] + fn filter_simple() { + let operator = PhysicalFilter { + predicate: PhysicalScalarExpression::Column(PhysicalColumnExpr { + datatype: DataType::Boolean, + idx: 0, + }), + }; + + let states = operator + .create_states(&test_database_context(), 4, 1) + .unwrap(); + let (operator_state, mut partition_states) = states.branchless_into_states().unwrap(); + let wrapper = OperatorWrapper::new(operator); + + let mut out = Batch::try_from_arrays( + [ + Array::new(&NopBufferManager, DataType::Boolean, 4).unwrap(), + Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(), + ], + false, + ) + .unwrap(); + + let mut in1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in1), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, true, true]).unwrap(), + Array::try_from_iter([8, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected1, &out); + + let mut in2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, false, false]).unwrap(), + Array::try_from_iter([4, 3, 2, 1]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in2), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true]).unwrap(), + Array::try_from_iter([4]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected2, &out); + } +} diff --git a/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs new file mode 100644 index 000000000..8f6f029d9 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs @@ -0,0 +1,129 @@ +use std::task::{Context, Waker}; + +use parking_lot::Mutex; + +use super::{ExecutableOperator, PartitionAndOperatorStates, PartitionState}; +use crate::arrays::datatype::DataType; +use crate::database::DatabaseContext; + +#[derive(Debug)] +pub enum HashJoinBuildPartitionState { + /// Partition is building. + Building(InProgressBuildState), + /// Partition finished building. + Finished, +} + +#[derive(Debug)] +pub struct InProgressBuildState { + // build_data: HashedBlockCollection, +} + +#[derive(Debug)] +pub enum HashJoinProbePartitionState { + /// Partition waiting for build side to complete. + Waiting(usize), + /// Partition is probing. + Probing(ProbeState), + /// Left-join drain state. + Draining(DrainState), + /// Probing finished. + Finished, +} + +#[derive(Debug)] +pub struct ProbeState {} + +#[derive(Debug)] +pub struct DrainState {} + +#[derive(Debug)] +pub struct HashJoinOperatorState { + inner: Mutex, +} + +#[derive(Debug)] +struct HashJoinOperatorStateInner { + /// Wakers from the probe side that are waiting for the build side to + /// complete. + /// + /// Keyed by probe-side partition index. + build_waiting_probers: Vec>, +} + +#[derive(Debug)] +pub struct PhysicalHashJoin { + /// Data types from the left (build) side of the join. + left_types: Vec, + /// Data types from the right (probe) side of the join. + right_types: Vec, +} + +impl ExecutableOperator for PhysicalHashJoin { + fn create_states( + &self, + context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + unimplemented!() + } + + fn poll_execute( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + match partition_state { + PartitionState::HashJoinBuild2(state) => { + let state = match state { + HashJoinBuildPartitionState::Building(state) => state, + HashJoinBuildPartitionState::Finished => return Ok(PollExecute::Exhausted), // TODO: Probably should error instead. + }; + + let batch = inout.input.required("input batch required")?; + state + .build_data + .push_batch(&NopBufferManager, &self.left_types, batch)?; + + Ok(PollExecute::NeedsMore) + } + PartitionState::HashJoinProbe2(state) => { + match state { + HashJoinProbePartitionState::Waiting(probe_idx) => { + // Still waiting for build side to complete, just need + // to register a waker. + + let mut operator_state = match operator_state { + OperatorState::HashJoin(state) => state.inner.lock(), + other => panic!("invalid operator state: {other:?}"), + }; + + operator_state.build_waiting_probers[*probe_idx] = Some(cx.waker().clone()); + + Ok(PollExecute::Pending) + } + _ => unimplemented!(), + } + } + other => panic!("invalid partition state: {other:?}"), + } + } + + fn poll_finalize( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + unimplemented!() + } +} + +impl Explainable for PhysicalHashJoin { + fn explain_entry(&self, conf: ExplainConfig) -> ExplainEntry { + unimplemented!() + } +} diff --git a/crates/rayexec_execution/src/execution/operators/physical_project.rs b/crates/rayexec_execution/src/execution/operators/physical_project.rs new file mode 100644 index 000000000..48caf8ed1 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_project.rs @@ -0,0 +1,191 @@ +use std::task::Context; + +use rayexec_error::{OptionExt, Result}; + +use super::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionAndOperatorStates, + PartitionState, + PollExecute, + PollFinalize, +}; +use crate::database::DatabaseContext; +use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; +use crate::expr::physical::evaluator::ExpressionEvaluator; +use crate::expr::physical::PhysicalScalarExpression; + +#[derive(Debug)] +pub struct PhysicalProject { + pub(crate) projections: Vec, +} + +#[derive(Debug)] +pub struct ProjectPartitionState { + evaluator: ExpressionEvaluator, +} + +impl ExecutableOperator for PhysicalProject { + fn create_states( + &self, + _context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + let partition_states = (0..partitions) + .map(|_| { + Ok(PartitionState::Project(ProjectPartitionState { + evaluator: ExpressionEvaluator::try_new(self.projections.clone(), batch_size)?, + })) + }) + .collect::>>()?; + + Ok(PartitionAndOperatorStates::Branchless { + operator_state: OperatorState::None, + partition_states, + }) + } + + fn poll_execute( + &self, + _cx: &mut Context, + partition_state: &mut PartitionState, + _operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let state = match partition_state { + PartitionState::Project(state) => state, + other => panic!("invalid state: {other:?}"), + }; + + let input = inout.input.required("batch input")?; + let output = inout.output.required("batch output")?; + + let sel = input.selection(); + state.evaluator.eval_batch(input, sel, output)?; + + Ok(PollExecute::Ready) + } + + fn poll_finalize( + &self, + _cx: &mut Context, + _partition_state: &mut PartitionState, + _operator_state: &OperatorState, + ) -> Result { + Ok(PollFinalize::Finalized) + } +} + +impl Explainable for PhysicalProject { + fn explain_entry(&self, conf: ExplainConfig) -> ExplainEntry { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::exp::Array; + use crate::arrays::batch_exp::Batch; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_batches_eq; + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::wrapper::OperatorWrapper; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; + + #[test] + fn project_simple() { + let projections = vec![ + PhysicalScalarExpression::Column(PhysicalColumnExpr { + datatype: DataType::Int32, + idx: 1, + }), + PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: "lit".into(), + }), + ]; + + let operator = PhysicalProject { projections }; + let states = operator + .create_states(&test_database_context(), 4, 1) + .unwrap(); + let (operator_state, mut partition_states) = states.branchless_into_states().unwrap(); + + let wrapper = OperatorWrapper::new(operator); + + let mut out = Batch::try_from_arrays( + [ + Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(), + Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(), + ], + false, + ) + .unwrap(); + + let mut in1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in1), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected1 = Batch::try_from_arrays( + [ + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + Array::try_from_iter(["lit", "lit", "lit", "lit"]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected1, &out); + + let mut in2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([Some(4), Some(5), None, Some(7)]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in2), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected2 = Batch::try_from_arrays( + [ + Array::try_from_iter([Some(4), Some(5), None, Some(7)]).unwrap(), + Array::try_from_iter(["lit", "lit", "lit", "lit"]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected2, &out); + } +} diff --git a/crates/rayexec_execution/src/execution/operators/project.rs b/crates/rayexec_execution/src/execution/operators/project.rs index 6bd1bc271..49b4abbd8 100644 --- a/crates/rayexec_execution/src/execution/operators/project.rs +++ b/crates/rayexec_execution/src/execution/operators/project.rs @@ -1,13 +1,13 @@ use rayexec_error::Result; use super::simple::{SimpleOperator, StatelessOperation}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; use crate::proto::DatabaseProtoConv; -pub type PhysicalProject = SimpleOperator; +pub type PhysicalProject2 = SimpleOperator; #[derive(Debug)] pub struct ProjectOperation { @@ -21,17 +21,17 @@ impl ProjectOperation { } impl StatelessOperation for ProjectOperation { - fn execute(&self, batch: Batch) -> Result { + fn execute(&self, batch: Batch2) -> Result { let arrs = self .exprs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; - Batch::try_new(arrs) + Batch2::try_new(arrs) } } @@ -41,7 +41,7 @@ impl Explainable for ProjectOperation { } } -impl DatabaseProtoConv for PhysicalProject { +impl DatabaseProtoConv for PhysicalProject2 { type ProtoType = rayexec_proto::generated::execution::PhysicalProject; fn to_proto_ctx(&self, context: &DatabaseContext) -> Result { diff --git a/crates/rayexec_execution/src/execution/operators/round_robin.rs b/crates/rayexec_execution/src/execution/operators/round_robin.rs index a3fb83271..4e09bda61 100644 --- a/crates/rayexec_execution/src/execution/operators/round_robin.rs +++ b/crates/rayexec_execution/src/execution/operators/round_robin.rs @@ -7,15 +7,15 @@ use std::task::{Context, Waker}; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use super::{ExecutionStates, InputOutputStates, PollFinalize}; -use crate::arrays::batch::Batch; +use super::{ExecutionStates2, InputOutputStates2, PollFinalize2}; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -58,11 +58,11 @@ pub struct RoundRobinOperatorState { pub struct PhysicalRoundRobinRepartition; impl ExecutableOperator for PhysicalRoundRobinRepartition { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions.len() != 2 { return Err(RayexecError::new( "Round robin expects to values (input, output) in partition vec", @@ -114,22 +114,22 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { .map(|buffer| PartitionState::RoundRobinPull(RoundRobinPullPartitionState { buffer })) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::RoundRobin(operator_state)), - partition_states: InputOutputStates::SeparateInputOutput { + partition_states: InputOutputStates2::SeparateInputOutput { push_states, pull_states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::RoundRobinPush(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -146,7 +146,7 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // wakeup when there's room. if output.batches.len() >= state.max_buffer_capacity { output.send_wakers[state.own_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Otherwise push our batch. @@ -160,15 +160,15 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // call to `poll_push`. state.push_to = (state.push_to + 1) % state.output_buffers.len(); - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let operator_state = match operator_state { OperatorState::RoundRobin(state) => state, other => panic!("invalid operator state: {other:?}"), @@ -199,15 +199,15 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::RoundRobinPull(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -218,11 +218,11 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { match inner.batches.pop_front() { Some(batch) => { inner.wake_n_senders(1); - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } None => { if inner.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // Register ourselves for wakeup. inner.recv_waker = Some(cx.waker().clone()); @@ -230,7 +230,7 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // Try to wake up any pushers to fill up the buffer. inner.wake_all_senders(); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } @@ -247,7 +247,7 @@ struct BatchBufferInner { /// Batches buffer. /// /// Should be bounded to some capacity. - batches: VecDeque, + batches: VecDeque, /// Waker on the receiving side of the buffer. recv_waker: Option, diff --git a/crates/rayexec_execution/src/execution/operators/scan.rs b/crates/rayexec_execution/src/execution/operators/scan.rs index b58989bc9..c963f565f 100644 --- a/crates/rayexec_execution/src/execution/operators/scan.rs +++ b/crates/rayexec_execution/src/execution/operators/scan.rs @@ -9,15 +9,15 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::catalog_entry::CatalogEntry; use crate::database::DatabaseContext; @@ -28,7 +28,7 @@ use crate::storage::table_storage::{DataTableScan, Projections}; pub struct ScanPartitionState { scan: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for ScanPartitionState { @@ -62,11 +62,11 @@ impl PhysicalScan { } impl ExecutableOperator for PhysicalScan { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Placeholder for now. Transaction info should probably go on the // operator. let _tx = CatalogTx::new(); @@ -86,63 +86,63 @@ impl ExecutableOperator for PhysicalScan { .map(|scan| PartitionState::Scan(ScanPartitionState { scan, future: None })) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Scan(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.scan.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Scan lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/simple.rs b/crates/rayexec_execution/src/execution/operators/simple.rs index 1a34ba2b9..5bb08dc36 100644 --- a/crates/rayexec_execution/src/execution/operators/simple.rs +++ b/crates/rayexec_execution/src/execution/operators/simple.rs @@ -6,22 +6,22 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; #[derive(Debug)] pub struct SimplePartitionState { /// A batch that's waiting to be pulled. - buffered: Option, + buffered: Option, /// Waker on the pull side. /// @@ -58,7 +58,7 @@ impl SimplePartitionState { /// A stateless operation on a batch. pub trait StatelessOperation: Sync + Send + Debug + Explainable { - fn execute(&self, batch: Batch) -> Result; + fn execute(&self, batch: Batch2) -> Result; } /// A simple operator is an operator that wraps a function that requires no @@ -77,14 +77,14 @@ impl SimpleOperator { } impl ExecutableOperator for SimpleOperator { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| PartitionState::Simple(SimplePartitionState::new())) .collect(), @@ -92,13 +92,13 @@ impl ExecutableOperator for SimpleOperator { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -110,7 +110,7 @@ impl ExecutableOperator for SimpleOperator { if let Some(waker) = state.pull_waker.take() { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Otherwise we're good to go. @@ -121,15 +121,15 @@ impl ExecutableOperator for SimpleOperator { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -141,15 +141,15 @@ impl ExecutableOperator for SimpleOperator { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -160,18 +160,18 @@ impl ExecutableOperator for SimpleOperator { if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Computed(out.into())) + Ok(PollPull2::Computed(out.into())) } None => { if state.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } state.pull_waker = Some(cx.waker().clone()); if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/sink.rs b/crates/rayexec_execution/src/execution/operators/sink.rs index 098dcf968..be36d824b 100644 --- a/crates/rayexec_execution/src/execution/operators/sink.rs +++ b/crates/rayexec_execution/src/execution/operators/sink.rs @@ -10,16 +10,16 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -64,7 +64,7 @@ pub trait PartitionSink: Debug + Send { /// Push a batch to the sink. /// /// Batches are pushed in the order they're received in. - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>>; + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>>; /// Finalize the sink. /// @@ -141,11 +141,11 @@ impl SinkOperator { } impl ExecutableOperator for SinkOperator { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states: Vec<_> = self @@ -164,7 +164,7 @@ impl ExecutableOperator for SinkOperator { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::Sink(SinkOperatorState { inner: Mutex::new(SinkOperatorStateInner { global_row_count: 0, @@ -172,19 +172,19 @@ impl ExecutableOperator for SinkOperator { partitions_remaining: partitions, }), })), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, future } => { @@ -199,7 +199,7 @@ impl ExecutableOperator for SinkOperator { *future = None; } Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPush::Pending(batch)), + Poll::Pending => return Ok(PollPush2::Pending(batch)), } } @@ -207,7 +207,7 @@ impl ExecutableOperator for SinkOperator { // necessary, but it makes me a feel a bit better than the // hacky stuff is localized to just here. if batch.num_rows() == 0 { - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } let inner = inner.as_mut().unwrap(); @@ -217,7 +217,7 @@ impl ExecutableOperator for SinkOperator { match push_future.poll_unpin(cx) { Poll::Ready(Ok(_)) => { // Future completed, need more batches. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } Poll::Ready(Err(e)) => Err(e), Poll::Pending => { @@ -233,7 +233,7 @@ impl ExecutableOperator for SinkOperator { // // I think we'll want to do a similar thing for inserts so that // we can implement them as "just" async functions. - Ok(PollPush::Pending(Batch::empty())) + Ok(PollPush2::Pending(Batch2::empty())) } } } @@ -245,12 +245,12 @@ impl ExecutableOperator for SinkOperator { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, future } => { @@ -262,7 +262,7 @@ impl ExecutableOperator for SinkOperator { *future = None; } Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollFinalize::Pending), + Poll::Pending => return Ok(PollFinalize2::Pending), } } @@ -288,7 +288,7 @@ impl ExecutableOperator for SinkOperator { *state = SinkPartitionState::Finished; - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } Poll::Ready(Err(e)) => Err(e), Poll::Pending => { @@ -302,7 +302,7 @@ impl ExecutableOperator for SinkOperator { future, }; - Ok(PollFinalize::Pending) + Ok(PollFinalize2::Pending) } } } @@ -326,10 +326,10 @@ impl ExecutableOperator for SinkOperator { *state = SinkPartitionState::Finished; - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollFinalize::Pending), + Poll::Pending => Ok(PollFinalize2::Pending), } } other => Err(RayexecError::new(format!( @@ -340,12 +340,12 @@ impl ExecutableOperator for SinkOperator { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, .. } @@ -353,7 +353,7 @@ impl ExecutableOperator for SinkOperator { if let Some(inner) = inner.as_mut() { inner.pull_waker = Some(cx.waker().clone()); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } SinkPartitionState::Finished => { let mut shared = match operator_state { @@ -362,7 +362,7 @@ impl ExecutableOperator for SinkOperator { }; if shared.global_row_count_returned { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } if shared.partitions_remaining == 0 { @@ -370,12 +370,12 @@ impl ExecutableOperator for SinkOperator { let row_count = shared.global_row_count as u64; - let row_count_batch = Batch::try_new([Array::from_iter([row_count])])?; + let row_count_batch = Batch2::try_new([Array2::from_iter([row_count])])?; - return Ok(PollPull::Computed(row_count_batch.into())); + return Ok(PollPull2::Computed(row_count_batch.into())); } - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } }, other => panic!("invalid partition state: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs index e1968400c..cf624a08a 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs @@ -7,18 +7,18 @@ use rayexec_error::Result; use super::util::merger::{KWayMerger, MergeResult}; use super::util::sort_keys::SortKeysExtractor; use super::util::sorted_batch::{PhysicallySortedBatch, SortedKeysIter}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::sort::util::merger::IterState; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalSortExpression; @@ -192,11 +192,11 @@ impl PhysicalGatherSort { } impl ExecutableOperator for PhysicalGatherSort { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let input_partitions = partitions[0]; let operator_state = OperatorState::GatherSort(GatherSortOperatorState { @@ -230,22 +230,22 @@ impl ExecutableOperator for PhysicalGatherSort { }, )]; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(operator_state), - partition_states: InputOutputStates::SeparateInputOutput { + partition_states: InputOutputStates2::SeparateInputOutput { push_states, pull_states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::GatherSortPush(state) => state, PartitionState::GatherSortPull(_) => { @@ -262,7 +262,7 @@ impl ExecutableOperator for PhysicalGatherSort { if shared.batches[state.partition_idx].is_some() { // Can't push, global state already has a batch for this partition. shared.push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let keys = state.extractor.sort_keys(&batch)?; @@ -283,15 +283,15 @@ impl ExecutableOperator for PhysicalGatherSort { // matter where the operator is in the pipeline. // // Changing this to NeedsMore wouldn't change behavior. - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::GatherSortPush(state) => state, PartitionState::GatherSortPull(_) => { @@ -314,15 +314,15 @@ impl ExecutableOperator for PhysicalGatherSort { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::GatherSortPull(state) => state, PartitionState::GatherSortPush(_) => { @@ -350,7 +350,7 @@ impl ExecutableOperator for PhysicalGatherSort { // Not finished initializing, still waiting on some input. // // `try_finish_initialize` registers a waker for us. - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } } } @@ -370,7 +370,7 @@ impl ExecutableOperator for PhysicalGatherSort { )?; if !input_pushed { // `try_push_input_batch_to_merger` registers a waker for us. - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // Input no longer required, we've either pushed the batch @@ -386,7 +386,7 @@ impl ExecutableOperator for PhysicalGatherSort { loop { // TODO: Configurable batch size. match merger.try_merge(1024)? { - MergeResult::Batch(batch) => return Ok(PollPull::Computed(batch.into())), + MergeResult::Batch(batch) => return Ok(PollPull2::Computed(batch.into())), MergeResult::NeedsInput(input_idx) => { let pushed = Self::try_push_input_batch_to_merger( cx, @@ -409,10 +409,10 @@ impl ExecutableOperator for PhysicalGatherSort { // call to `poll_pull` ensures that we // get that input. *input_required = Some(input_idx); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } } - MergeResult::Exhausted => return Ok(PollPull::Exhausted), + MergeResult::Exhausted => return Ok(PollPull2::Exhausted), } } } @@ -613,7 +613,8 @@ mod tests { use std::sync::Arc; use super::*; - use crate::execution::operators::test_util::{ + use crate::arrays::datatype::DataType; + use crate::execution::operators::testutil::{ make_i32_batch, unwrap_poll_pull_batch, TestWakerContext, @@ -629,7 +630,10 @@ mod tests { ]; let operator = Arc::new(PhysicalGatherSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -649,7 +653,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push our first batch. let push_cx = TestWakerContext::new(); @@ -661,7 +665,7 @@ mod tests { p0_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Kind of an implementation detail, but the puller is waiting on // partition 0 to push. Multiple partitions would trigger this wakeup @@ -680,17 +684,17 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); let poll_push = push_cx .poll_push(&operator, &mut push_states[0], &operator_state, p1_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); } // Partition input is finished. operator - .poll_finalize_push(&mut push_cx.context(), &mut push_states[0], &operator_state) + .poll_finalize_push2(&mut push_cx.context(), &mut push_states[0], &operator_state) .unwrap(); // Now we can pull the sorted result. @@ -704,7 +708,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } #[test] @@ -721,7 +725,10 @@ mod tests { ]; let operator = Arc::new(PhysicalGatherSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -741,7 +748,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push batch for partition 0. let p0_push_cx = TestWakerContext::new(); @@ -753,7 +760,7 @@ mod tests { p0_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Triggers pull wake up. assert_eq!(1, pull_cx.wake_count()); @@ -762,7 +769,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push batch for partition 1. let p1_push_cx = TestWakerContext::new(); @@ -774,7 +781,7 @@ mod tests { p1_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Also triggers wake up. assert_eq!(1, pull_cx.wake_count()); @@ -783,7 +790,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push the rest of the batches. // @@ -793,29 +800,29 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); let poll_push = p0_push_cx .poll_push(&operator, &mut push_states[0], &operator_state, p0_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); let poll_push = p1_push_cx .poll_push(&operator, &mut push_states[1], &operator_state, p1_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); } // Partition inputs is finished. operator - .poll_finalize_push( + .poll_finalize_push2( &mut p0_push_cx.context(), &mut push_states[0], &operator_state, ) .unwrap(); operator - .poll_finalize_push( + .poll_finalize_push2( &mut p1_push_cx.context(), &mut push_states[1], &operator_state, @@ -833,6 +840,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs index 3428ec7ec..76ae8d739 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs @@ -6,18 +6,18 @@ use rayexec_error::Result; use super::util::merger::{IterState, KWayMerger, MergeResult}; use super::util::sort_keys::SortKeysExtractor; use super::util::sorted_batch::{IndexSortedBatch, SortedIndicesIter}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalSortExpression; @@ -63,11 +63,11 @@ impl PhysicalScatterSort { } impl ExecutableOperator for PhysicalScatterSort { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let extractor = SortKeysExtractor::new(&self.exprs); @@ -83,21 +83,21 @@ impl ExecutableOperator for PhysicalScatterSort { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -107,7 +107,7 @@ impl ExecutableOperator for PhysicalScatterSort { ScatterSortPartitionState::Consuming(state) => { self.insert_batch_for_comparison(state, batch)?; - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } ScatterSortPartitionState::Producing { .. } => { panic!("attempted to push to partition that's already produding data") @@ -115,12 +115,12 @@ impl ExecutableOperator for PhysicalScatterSort { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -154,7 +154,7 @@ impl ExecutableOperator for PhysicalScatterSort { // Update partition state to "producing" using the merger. *state = ScatterSortPartitionState::Producing(ProducingPartitionState { merger }); - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } ScatterSortPartitionState::Producing { .. } => { panic!("attempted to finalize partition that's already producing data") @@ -162,12 +162,12 @@ impl ExecutableOperator for PhysicalScatterSort { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let mut state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -177,17 +177,17 @@ impl ExecutableOperator for PhysicalScatterSort { ScatterSortPartitionState::Consuming(state) => { // Partition still collecting data to sort. state.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } ScatterSortPartitionState::Producing(state) => { loop { // TODO: Configurable batch size. match state.merger.try_merge(DEFAULT_TARGET_BATCH_SIZE)? { MergeResult::Batch(batch) => { - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } MergeResult::Exhausted => { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } MergeResult::NeedsInput(idx) => { // We're merging all batch in this partition, and @@ -207,7 +207,7 @@ impl PhysicalScatterSort { fn insert_batch_for_comparison( &self, state: &mut ConsumingPartitionState, - batch: Batch, + batch: Batch2, ) -> Result<()> { let keys = state.extractor.sort_keys(&batch)?; @@ -263,9 +263,10 @@ mod tests { use std::sync::Arc; use super::*; - use crate::execution::operators::test_util::{ + use crate::arrays::datatype::DataType; + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::{ make_i32_batch, - test_database_context, unwrap_poll_pull_batch, TestWakerContext, }; @@ -273,10 +274,10 @@ mod tests { fn create_states(operator: &PhysicalScatterSort, partitions: usize) -> Vec { let context = test_database_context(); - let states = operator.create_states(&context, vec![partitions]).unwrap(); + let states = operator.create_states2(&context, vec![partitions]).unwrap(); match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, other => panic!("unexpected states: {other:?}"), } } @@ -290,7 +291,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -303,10 +307,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -332,7 +336,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: false, nulls_first: true, }])); @@ -345,10 +352,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -378,7 +385,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -391,10 +401,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -431,7 +441,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } #[test] @@ -443,7 +453,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -456,10 +469,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -491,6 +504,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs index 69e05dab8..21b7f18bd 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs @@ -2,16 +2,16 @@ use std::task::Context; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -25,39 +25,39 @@ pub struct TopKOperatorState {} pub struct PhysicalTopK {} impl ExecutableOperator for PhysicalTopK { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs b/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs index d28aa3917..a467a6e5e 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::executor::scalar::interleave; /// Tracks the state per input into the merge. @@ -15,7 +15,7 @@ struct InputState { #[derive(Debug)] pub struct IndicesAccumulator { /// Batches we're using for the build. - batches: Vec<(usize, Batch)>, + batches: Vec<(usize, Batch2)>, /// States for each input we're reading from. states: Vec, /// Interleave indices referencing the stored batches. @@ -37,7 +37,7 @@ impl IndicesAccumulator { /// /// The inputs's state will be updated to point to the beginning of this /// batch (making any previous batches pushed for this input unreachable). - pub fn push_input_batch(&mut self, input: usize, batch: Batch) { + pub fn push_input_batch(&mut self, input: usize, batch: Batch2) { let idx = self.batches.len(); self.batches.push((input, batch)); self.states[input] = InputState { batch_idx: idx }; @@ -57,7 +57,7 @@ impl IndicesAccumulator { /// Build a batch from the accumulated interleave indices. /// /// Internally drops batches that will no longer be part of the output. - pub fn build(&mut self) -> Result> { + pub fn build(&mut self) -> Result> { if self.indices.is_empty() { return Ok(None); } @@ -78,7 +78,7 @@ impl IndicesAccumulator { .collect::>>()?; self.indices.clear(); - let batch = Batch::try_new(merged)?; + let batch = Batch2::try_new(merged)?; // Drops batches that are no longer reachable (won't be contributing to // the output). diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs b/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs index b327100a5..dff52f606 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs @@ -5,14 +5,14 @@ use rayexec_error::{RayexecError, Result}; use super::accumulator::IndicesAccumulator; use super::sorted_batch::RowReference; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; #[derive(Debug)] pub enum MergeResult { /// We have a merged batch. /// /// Nothing else needed before the next call to `try_merge`. - Batch(Batch), + Batch(Batch2), /// Need to push a new batch for the input at the given index. /// @@ -73,7 +73,7 @@ where /// The initial heap will be created from the first element of each /// iterator. If an input is never expected to produce references, its iter /// state should be Finished and the batch should be None. - pub fn try_new(inputs: Vec<(Option, IterState)>) -> Result { + pub fn try_new(inputs: Vec<(Option, IterState)>) -> Result { let mut heap = BinaryHeap::new(); let mut iters = Vec::with_capacity(inputs.len()); let mut acc = IndicesAccumulator::new(inputs.len()); @@ -128,7 +128,7 @@ where } /// Push a batch and iterator for an input. - pub fn push_batch_for_input(&mut self, input: usize, batch: Batch, mut iter: I) -> Result<()> { + pub fn push_batch_for_input(&mut self, input: usize, batch: Batch2, mut iter: I) -> Result<()> { assert!(self.needs_input); self.needs_input = false; diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs index b89ff7ad3..66bf2c419 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::row::encoding::{ComparableColumn, ComparableRowEncoder, ComparableRows}; use crate::expr::physical::PhysicalSortExpression; @@ -31,14 +31,14 @@ impl SortKeysExtractor { } /// Get the sort keys for the batch as rows. - pub fn sort_keys(&self, batch: &Batch) -> Result { + pub fn sort_keys(&self, batch: &Batch2) -> Result { let cols = self.sort_columns(batch)?; let rows = self.encoder.encode(&cols)?; Ok(rows) } /// Get the columns that make up the sort keys. - pub fn sort_columns<'a>(&self, batch: &'a Batch) -> Result> { + pub fn sort_columns<'a>(&self, batch: &'a Batch2) -> Result> { let sort_cols = self .order_by .iter() diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs b/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs index 6853283a7..95bf8b395 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::fmt; use std::sync::Arc; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::row::encoding::{ComparableRow, ComparableRows}; /// A batch that's been physically sorted. @@ -12,14 +12,14 @@ use crate::arrays::row::encoding::{ComparableRow, ComparableRows}; #[derive(Debug)] pub struct PhysicallySortedBatch { /// The sorted batch. - pub batch: Batch, + pub batch: Batch2, /// The sorted keys. pub keys: ComparableRows, } impl PhysicallySortedBatch { - pub fn into_batch_and_iter(self) -> (Batch, SortedKeysIter) { + pub fn into_batch_and_iter(self) -> (Batch2, SortedKeysIter) { let iter = SortedKeysIter { row_idx: 0, keys: Arc::new(self.keys), @@ -71,11 +71,11 @@ pub struct IndexSortedBatch { /// Unsorted keys for the batch. pub keys: ComparableRows, /// The original unsorted batch. - pub batch: Batch, + pub batch: Batch2, } impl IndexSortedBatch { - pub fn into_batch_and_iter(self) -> (Batch, SortedIndicesIter) { + pub fn into_batch_and_iter(self) -> (Batch2, SortedIndicesIter) { let iter = SortedIndicesIter { indices: self.sort_indices, idx: 0, diff --git a/crates/rayexec_execution/src/execution/operators/source.rs b/crates/rayexec_execution/src/execution/operators/source.rs index 5328111bd..d40dce9a7 100644 --- a/crates/rayexec_execution/src/execution/operators/source.rs +++ b/crates/rayexec_execution/src/execution/operators/source.rs @@ -10,15 +10,15 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -55,13 +55,13 @@ pub trait PartitionSource: Debug + Send { /// Pull the enxt batch from the source. /// /// Returns None when there's no batches remaining in the source. - fn pull(&mut self) -> BoxFuture<'_, Result>>; + fn pull(&mut self) -> BoxFuture<'_, Result>>; } pub struct SourcePartitionState { source: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for SourcePartitionState { @@ -85,11 +85,11 @@ impl SourceOperator { } impl ExecutableOperator for SourceOperator { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let states = self .source .create_partition_sources(partitions[0]) @@ -102,63 +102,63 @@ impl ExecutableOperator for SourceOperator { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Source(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.source.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Source lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/table_function.rs b/crates/rayexec_execution/src/execution/operators/table_function.rs index 17f4a3eb1..93668870c 100644 --- a/crates/rayexec_execution/src/execution/operators/table_function.rs +++ b/crates/rayexec_execution/src/execution/operators/table_function.rs @@ -9,15 +9,15 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::functions::table::{PlannedTableFunction, TableFunctionImpl}; @@ -27,7 +27,7 @@ use crate::storage::table_storage::{DataTableScan, Projections}; pub struct TableFunctionPartitionState { scan_state: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for TableFunctionPartitionState { @@ -53,11 +53,11 @@ impl PhysicalTableFunction { } impl ExecutableOperator for PhysicalTableFunction { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let scan_func = match &self.function.function_impl { TableFunctionImpl::Scan(scan) => scan, _ => { @@ -81,64 +81,64 @@ impl ExecutableOperator for PhysicalTableFunction { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { // Could UNNEST be implemented as a table function? Err(RayexecError::new("Cannot push to physical table function")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical table function")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::TableFunction(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.scan_state.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Scan lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/table_inout.rs b/crates/rayexec_execution/src/execution/operators/table_inout.rs index e047eb941..fb513988a 100644 --- a/crates/rayexec_execution/src/execution/operators/table_inout.rs +++ b/crates/rayexec_execution/src/execution/operators/table_inout.rs @@ -5,16 +5,16 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -25,7 +25,7 @@ use crate::functions::table::{inout, PlannedTableFunction, TableFunctionImpl}; pub struct TableInOutPartitionState { function_state: Box, /// Additional outputs that will be included on the output batch. - additional_outputs: Vec, + additional_outputs: Vec, } #[derive(Debug)] @@ -39,11 +39,11 @@ pub struct PhysicalTableInOut { } impl ExecutableOperator for PhysicalTableInOut { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states = match &self.function.function_impl { @@ -66,21 +66,21 @@ impl ExecutableOperator for PhysicalTableInOut { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -93,12 +93,12 @@ impl ExecutableOperator for PhysicalTableInOut { .function_inputs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; - let inputs = Batch::try_new(inputs)?; + let inputs = Batch2::try_new(inputs)?; // Try to push first to avoid overwriting any buffered additional // outputs. @@ -108,7 +108,7 @@ impl ExecutableOperator for PhysicalTableInOut { // TODO: Remove needing to do this, the clones should be cheap, but the // expression execution is wasteful. match state.function_state.poll_push(cx, inputs)? { - PollPush::Pending(_) => Ok(PollPush::Pending(orig)), + PollPush2::Pending(_) => Ok(PollPush2::Pending(orig)), other => { // Batch was pushed to the function state, compute additional // outputs. @@ -116,7 +116,7 @@ impl ExecutableOperator for PhysicalTableInOut { .projected_outputs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; @@ -128,12 +128,12 @@ impl ExecutableOperator for PhysicalTableInOut { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid state: {other:?}"), @@ -142,12 +142,12 @@ impl ExecutableOperator for PhysicalTableInOut { state.function_state.poll_finalize_push(cx) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -175,12 +175,12 @@ impl ExecutableOperator for PhysicalTableInOut { arrays.push(additional); } - let new_batch = Batch::try_new(arrays)?; + let new_batch = Batch2::try_new(arrays)?; - Ok(PollPull::Computed(new_batch.into())) + Ok(PollPull2::Computed(new_batch.into())) } - inout::InOutPollPull::Pending => Ok(PollPull::Pending), - inout::InOutPollPull::Exhausted => Ok(PollPull::Exhausted), + inout::InOutPollPull::Pending => Ok(PollPull2::Pending), + inout::InOutPollPull::Exhausted => Ok(PollPull2::Exhausted), } } } diff --git a/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs b/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs new file mode 100644 index 000000000..0f6cf425c --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs @@ -0,0 +1,12 @@ +use std::sync::Arc; + +use crate::database::system::new_system_catalog; +use crate::database::DatabaseContext; +use crate::datasource::DataSourceRegistry; + +pub fn test_database_context() -> DatabaseContext { + DatabaseContext::new(Arc::new( + new_system_catalog(&DataSourceRegistry::default()).unwrap(), + )) + .unwrap() +} diff --git a/crates/rayexec_execution/src/execution/operators/test_util.rs b/crates/rayexec_execution/src/execution/operators/testutil/mod.rs similarity index 77% rename from crates/rayexec_execution/src/execution/operators/test_util.rs rename to crates/rayexec_execution/src/execution/operators/testutil/mod.rs index 65ef3bdde..29c6757bc 100644 --- a/crates/rayexec_execution/src/execution/operators/test_util.rs +++ b/crates/rayexec_execution/src/execution/operators/testutil/mod.rs @@ -1,4 +1,7 @@ //! Utilities for testing operator implementations. +pub mod db_context; +pub mod wrapper; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::task::{Context, Wake, Waker}; @@ -10,23 +13,16 @@ use super::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::scalar::ScalarValue; use crate::database::system::new_system_catalog; use crate::database::DatabaseContext; use crate::datasource::DataSourceRegistry; -pub fn test_database_context() -> DatabaseContext { - DatabaseContext::new(Arc::new( - new_system_catalog(&DataSourceRegistry::default()).unwrap(), - )) - .unwrap() -} - /// Test context containg a waker implementation that counts the number of times /// it's woken. /// @@ -72,9 +68,9 @@ impl TestWakerContext { operator: impl AsRef, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: impl Into, - ) -> Result { - operator.as_ref().poll_push( + batch: impl Into, + ) -> Result { + operator.as_ref().poll_push2( &mut self.context(), partition_state, operator_state, @@ -87,10 +83,10 @@ impl TestWakerContext { operator: impl AsRef, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { operator .as_ref() - .poll_pull(&mut self.context(), partition_state, operator_state) + .poll_pull2(&mut self.context(), partition_state, operator_state) } } @@ -101,18 +97,18 @@ impl Wake for TestWakerInner { } /// Unwraps a batch from the PollPull::Batch variant. -pub fn unwrap_poll_pull_batch(poll: PollPull) -> Batch { +pub fn unwrap_poll_pull_batch(poll: PollPull2) -> Batch2 { match poll { - PollPull::Computed(ComputedBatches::Single(batch)) => batch, + PollPull2::Computed(ComputedBatches::Single(batch)) => batch, other => panic!("unexpected poll pull: {other:?}"), } } -pub fn logical_value(batch: &Batch, column: usize, row: usize) -> ScalarValue { +pub fn logical_value(batch: &Batch2, column: usize, row: usize) -> ScalarValue { batch.column(column).unwrap().logical_value(row).unwrap() } /// Makes a batch with a single column i32 values provided by the iterator. -pub fn make_i32_batch(iter: impl IntoIterator) -> Batch { - Batch::try_new(vec![Array::from_iter(iter.into_iter())]).unwrap() +pub fn make_i32_batch(iter: impl IntoIterator) -> Batch2 { + Batch2::try_new(vec![Array2::from_iter(iter.into_iter())]).unwrap() } diff --git a/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs b/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs new file mode 100644 index 000000000..e83dff465 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs @@ -0,0 +1,74 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::task::{Context, Wake, Waker}; + +use rayexec_error::Result; + +use crate::execution::operators::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionState, + PollExecute, + PollFinalize, +}; + +#[derive(Debug, Default)] +pub struct CountingWaker { + count: AtomicUsize, +} + +impl CountingWaker { + pub fn wake_count(&self) -> usize { + self.count.load(Ordering::SeqCst) + } +} + +impl Wake for CountingWaker { + fn wake(self: Arc) { + self.count.fetch_add(1, Ordering::SeqCst); + } +} + +/// Wrapper around an operator that uses a stub waker that tracks how many times +/// it's woken. +#[derive(Debug)] +pub struct OperatorWrapper { + pub waker: Arc, + pub operator: O, +} + +impl OperatorWrapper +where + O: ExecutableOperator, +{ + pub fn new(operator: O) -> Self { + OperatorWrapper { + waker: Arc::new(CountingWaker::default()), + operator, + } + } + + pub fn poll_execute( + &self, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let waker = Waker::from(self.waker.clone()); + let mut cx = Context::from_waker(&waker); + self.operator + .poll_execute(&mut cx, partition_state, operator_state, inout) + } + + pub fn poll_finalize( + &self, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + let waker = Waker::from(self.waker.clone()); + let mut cx = Context::from_waker(&waker); + self.operator + .poll_finalize(&mut cx, partition_state, operator_state) + } +} diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 34efcad03..a286a39bc 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -9,20 +9,20 @@ use super::hash_aggregate::distinct::DistinctGroupedStates; use super::hash_aggregate::hash_table::GroupAddress; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; -use crate::execution::operators::InputOutputStates; +use crate::execution::operators::InputOutputStates2; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalAggregateExpression; use crate::functions::aggregate::states::AggregateGroupStates; -use crate::functions::aggregate::ChunkGroupAddressIter; use crate::proto::DatabaseProtoConv; #[derive(Debug)] @@ -46,7 +46,7 @@ pub enum UngroupedAggregatePartitionState { /// /// Currently only one partition will actually produce output. The rest /// will be empty. - batches: Vec, + batches: Vec, }, } @@ -93,7 +93,7 @@ impl PhysicalUngroupedAggregate { } else { agg.function.function_impl.new_states() }; - state.new_states(1); + state.new_groups(1); states.push(state); } @@ -102,11 +102,11 @@ impl PhysicalUngroupedAggregate { } impl ExecutableOperator for PhysicalUngroupedAggregate { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let inner = OperatorStateInner { @@ -129,26 +129,26 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { }) .collect::>>()?; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::UngroupedAggregate(operator_state)), - partition_states: InputOutputStates::OneToOne { partition_states }, + partition_states: InputOutputStates2::OneToOne { partition_states }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), }; match state { - UngroupedAggregatePartitionState::Aggregating { agg_states, .. } => { + UngroupedAggregatePartitionState::Aggregating { .. } => { // All rows map to the same group (group 0) let addrs: Vec<_> = (0..batch.num_rows()) .map(|_| GroupAddress { @@ -164,12 +164,13 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { .map(|expr| batch.column(expr.idx).expect("column to exist")) .collect(); - agg_states[agg_idx] - .update_states(&cols, ChunkGroupAddressIter::new(0, &addrs))?; + unimplemented!() + // agg_states[agg_idx] + // .update_states2(&cols, ChunkGroupAddressIter::new(0, &addrs))?; } // Keep pushing. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } UngroupedAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to push to partition that should be producing batches", @@ -177,12 +178,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -201,17 +202,13 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { }; // Everything maps to the same group (group 0) - let mapping = [GroupAddress { - chunk_idx: 0, - row_idx: 0, - }]; - for (mut local_agg_state, global_agg_state) in agg_states.into_iter().zip(shared.agg_states.iter_mut()) { global_agg_state.combine( &mut local_agg_state, - ChunkGroupAddressIter::new(0, &mapping), + Selection::selection(&[0]), + &[0], )?; } @@ -219,7 +216,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { if shared.remaining == 0 { // This partition is the chosen one to produce the output. - let mut final_states = std::mem::take(&mut shared.agg_states); + let final_states = std::mem::take(&mut shared.agg_states); // Wake up other partitions to let them know they are not // the chosen ones. @@ -232,20 +229,21 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { // Lock no longer needed. std::mem::drop(shared); - let arrays = final_states - .iter_mut() - .map(|s| s.finalize()) - .collect::>>()?; + unimplemented!() + // let arrays = final_states + // .iter_mut() + // .map(|s| s.finalize2()) + // .collect::>>()?; - let batch = Batch::try_new(arrays)?; + // let batch = Batch2::try_new(arrays)?; - *state = UngroupedAggregatePartitionState::Producing { - partition_idx: *partition_idx, - batches: vec![batch], - } + // *state = UngroupedAggregatePartitionState::Producing { + // partition_idx: *partition_idx, + // batches: vec![batch], + // } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } UngroupedAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to finalize push partition that's producing", @@ -253,12 +251,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -266,8 +264,8 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { match state { UngroupedAggregatePartitionState::Producing { batches, .. } => match batches.pop() { - Some(batch) => Ok(PollPull::Computed(batch.into())), - None => Ok(PollPull::Exhausted), + Some(batch) => Ok(PollPull2::Computed(batch.into())), + None => Ok(PollPull2::Exhausted), }, UngroupedAggregatePartitionState::Aggregating { partition_idx, .. } => { let mut shared = match operator_state { @@ -277,12 +275,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { if shared.remaining == 0 { // We weren't the chosen partition to produce output. Immediately exhausted. - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } shared.pull_wakers[*partition_idx] = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/union.rs b/crates/rayexec_execution/src/execution/operators/union.rs index c5bc79a7f..d3eba86d0 100644 --- a/crates/rayexec_execution/src/execution/operators/union.rs +++ b/crates/rayexec_execution/src/execution/operators/union.rs @@ -6,15 +6,15 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -22,7 +22,7 @@ use crate::proto::DatabaseProtoConv; #[derive(Debug)] pub struct UnionTopPartitionState { partition_idx: usize, - batch: Option, + batch: Option, finished: bool, push_waker: Option, pull_waker: Option, @@ -40,7 +40,7 @@ pub struct UnionOperatorState { #[derive(Debug)] struct SharedPartitionState { - batch: Option, + batch: Option, finished: bool, push_waker: Option, pull_waker: Option, @@ -68,11 +68,11 @@ impl PhysicalUnion { } impl ExecutableOperator for PhysicalUnion { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let top_states = (0..num_partitions) @@ -106,27 +106,27 @@ impl ExecutableOperator for PhysicalUnion { .collect(), }; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::Union(operator_state)), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![top_states, bottom_states], pull_states: Self::UNION_TOP_INPUT_INDEX, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { match partition_state { PartitionState::UnionTop(state) => { if state.batch.is_some() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } state.batch = Some(batch); @@ -134,7 +134,7 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } PartitionState::UnionBottom(state) => { @@ -147,7 +147,7 @@ impl ExecutableOperator for PhysicalUnion { if shared.batch.is_some() { shared.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } shared.batch = Some(batch); @@ -156,26 +156,26 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::UnionTop(state) => { state.finished = true; if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } PartitionState::UnionBottom(state) => { @@ -191,26 +191,26 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::UnionTop(state) => match state.batch.take() { Some(batch) => { if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } None => { let mut shared = match operator_state { @@ -225,12 +225,12 @@ impl ExecutableOperator for PhysicalUnion { if let Some(waker) = shared.push_waker.take() { waker.wake(); } - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } // If not, check if we're finished. if shared.finished && state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // No batches, and we're not finished. Need to wait. @@ -239,7 +239,7 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } }, other => panic!("invalid partition state: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 9667554df..af68b9b8e 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -7,16 +7,16 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::batch::Batch; +use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ ArrayBuilder, @@ -26,27 +26,27 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalList, - PhysicalStorage, - PhysicalType, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalList_2, + PhysicalStorage2, + PhysicalType2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::{AddressableStorage, ListItemMetadata}; use crate::database::DatabaseContext; @@ -56,9 +56,9 @@ use crate::expr::physical::PhysicalScalarExpression; #[derive(Debug)] pub struct UnnestPartitionState { /// Projections that need to extended to match the unnest outputs. - project_inputs: Vec, + project_inputs: Vec, /// Inputs we're processing. - unnest_inputs: Vec, + unnest_inputs: Vec, /// Number of rows in the input. input_num_rows: usize, /// Row we're currently unnesting. @@ -82,22 +82,22 @@ pub struct PhysicalUnnest { } impl ExecutableOperator for PhysicalUnnest { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states: Vec<_> = (0..partitions) .map(|_| { PartitionState::Unnest(UnnestPartitionState { project_inputs: vec![ - Array::new_untyped_null_array(0); + Array2::new_untyped_null_array(0); self.project_expressions.len() ], unnest_inputs: vec![ - Array::new_untyped_null_array(0); + Array2::new_untyped_null_array(0); self.unnest_expressions.len() ], input_num_rows: 0, @@ -109,21 +109,21 @@ impl ExecutableOperator for PhysicalUnnest { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, - ) -> Result { + batch: Batch2, + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -136,16 +136,16 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Compute inputs. These will be stored until we've processed all rows. for (col_idx, expr) in self.project_expressions.iter().enumerate() { - state.project_inputs[col_idx] = expr.eval(&batch)?.into_owned(); + state.project_inputs[col_idx] = expr.eval2(&batch)?.into_owned(); } for (col_idx, expr) in self.unnest_expressions.iter().enumerate() { - state.unnest_inputs[col_idx] = expr.eval(&batch)?.into_owned(); + state.unnest_inputs[col_idx] = expr.eval2(&batch)?.into_owned(); } state.input_num_rows = batch.num_rows(); @@ -155,15 +155,15 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -175,15 +175,15 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -191,7 +191,7 @@ impl ExecutableOperator for PhysicalUnnest { if state.current_row >= state.input_num_rows { if state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // We're done with these inputs. Come back later. @@ -200,19 +200,19 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // We have input ready, get the longest list for the current row. let mut longest = 0; for input_idx in 0..state.unnest_inputs.len() { - if state.unnest_inputs[input_idx].physical_type() == PhysicalType::UntypedNull { + if state.unnest_inputs[input_idx].physical_type() == PhysicalType2::UntypedNull { // Just let other unnest expressions determine the number of // rows. continue; } - if let Some(list_meta) = UnaryExecutor::value_at::( + if let Some(list_meta) = UnaryExecutor2::value_at::( &state.unnest_inputs[input_idx], state.current_row, )? { @@ -244,13 +244,13 @@ impl ExecutableOperator for PhysicalUnnest { let arr = &state.unnest_inputs[input_idx]; match arr.physical_type() { - PhysicalType::List => { + PhysicalType2::List => { let child = match arr.array_data() { - ArrayData::List(list) => list.inner_array(), + ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor::value_at::(arr, state.current_row)? { + match UnaryExecutor2::value_at::(arr, state.current_row)? { Some(meta) => { // Row is a list, unnest. let out = unnest(child, longest as usize, meta)?; @@ -259,7 +259,7 @@ impl ExecutableOperator for PhysicalUnnest { None => { // Row is null, produce nulls according to longest // length. - let out = Array::new_typed_null_array( + let out = Array2::new_typed_null_array( child.datatype().clone(), longest as usize, )?; @@ -267,9 +267,9 @@ impl ExecutableOperator for PhysicalUnnest { } } } - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { // Just produce null array according to longest length. - let out = Array::new_untyped_null_array(longest as usize); + let out = Array2::new_untyped_null_array(longest as usize); outputs.push(out); } other => { @@ -290,9 +290,9 @@ impl ExecutableOperator for PhysicalUnnest { } } - let batch = Batch::try_new(outputs)?; + let batch = Batch2::try_new(outputs)?; - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } } @@ -304,122 +304,122 @@ impl Explainable for PhysicalUnnest { } } -pub(crate) fn unnest(child: &Array, longest_len: usize, meta: ListItemMetadata) -> Result { +pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) -> Result { let datatype = child.datatype().clone(); match child.physical_type() { - PhysicalType::UntypedNull => Ok(Array::new_untyped_null_array(longest_len)), - PhysicalType::Boolean => { + PhysicalType2::UntypedNull => Ok(Array2::new_untyped_null_array(longest_len)), + PhysicalType2::Boolean => { let builder = ArrayBuilder { datatype, buffer: BooleanBuffer::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let builder = ArrayBuilder { datatype, buffer: GermanVarlenBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let builder = ArrayBuilder { datatype, buffer: GermanVarlenBuffer::<[u8]>::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } other => not_implemented!("Unnest for physical type {other:?}"), } @@ -427,11 +427,11 @@ pub(crate) fn unnest(child: &Array, longest_len: usize, meta: ListItemMetadata) fn unnest_inner<'a, S, B>( mut builder: ArrayBuilder, - child: &'a Array, + child: &'a Array2, meta: ListItemMetadata, -) -> Result +) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow, { @@ -459,7 +459,7 @@ where builder.buffer.put(out_idx, val.borrow()); } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, out_validity, builder.buffer.into_data(), @@ -482,7 +482,7 @@ where builder.buffer.put(out_idx, val.borrow()); } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, out_validity, builder.buffer.into_data(), diff --git a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs index beca52778..94f041c83 100644 --- a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs +++ b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs @@ -5,7 +5,7 @@ use std::task::{Context, Poll, Waker}; use parking_lot::Mutex; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; #[derive(Debug)] pub struct BroadcastChannel { @@ -34,7 +34,7 @@ impl BroadcastChannel { (ch, recvs) } - pub fn send(&self, batch: Batch) { + pub fn send(&self, batch: Batch2) { let mut state = self.state.lock(); let idx = state.batches.len(); @@ -102,7 +102,7 @@ struct BroadcastState { #[derive(Debug)] struct BatchState { remaining_recv: usize, - batch: Option, + batch: Option, } #[derive(Debug)] @@ -113,7 +113,7 @@ pub struct RecvFut { } impl Future for RecvFut { - type Output = Option; + type Output = Option; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut state = self.state.lock(); @@ -152,7 +152,7 @@ mod tests { use futures::FutureExt; use super::*; - use crate::arrays::array::Array; + use crate::arrays::array::Array2; struct NopWaker {} @@ -170,9 +170,9 @@ mod tests { } /// Create a batch with a single int64 value. - fn test_batch(n: i64) -> Batch { - let col = Array::from_iter([n]); - Batch::try_new([col]).unwrap() + fn test_batch(n: i64) -> Batch2 { + let col = Array2::from_iter([n]); + Batch2::try_new([col]).unwrap() } #[test] diff --git a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs index 12b385ce2..153e34ea2 100644 --- a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs +++ b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::batch::Batch; +use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::selection::SelectionVector; @@ -23,7 +23,7 @@ pub struct LeftOuterJoinTracker { } impl LeftOuterJoinTracker { - pub fn new_for_batches(batches: &[Batch]) -> Self { + pub fn new_for_batches(batches: &[Batch2]) -> Self { let bitmaps = batches .iter() .map(|b| Bitmap::new_with_all_false(b.num_rows())) @@ -57,7 +57,7 @@ impl LeftOuterJoinTracker { pub struct LeftOuterJoinDrainState { tracker: LeftOuterJoinTracker, /// All batches from the left side. - batches: Vec, + batches: Vec, /// Types for the right side of the join. Used to create the (typed) null /// columns for left rows that weren't visited. right_types: Vec, @@ -76,7 +76,7 @@ impl LeftOuterJoinDrainState { start_idx: usize, skip: usize, tracker: LeftOuterJoinTracker, - batches: Vec, + batches: Vec, right_types: Vec, ) -> Self { LeftOuterJoinDrainState { @@ -90,7 +90,7 @@ impl LeftOuterJoinDrainState { /// Drains the next batch from the left, and appends a boolean column /// representing which rows were visited. - pub fn drain_mark_next(&mut self) -> Result> { + pub fn drain_mark_next(&mut self) -> Result> { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, None => return Ok(None), @@ -106,12 +106,12 @@ impl LeftOuterJoinDrainState { .columns() .iter() .cloned() - .chain([Array::new_with_array_data( + .chain([Array2::new_with_array_data( DataType::Boolean, - ArrayData::Boolean(Arc::new(bitmap.clone().into())), + ArrayData2::Boolean(Arc::new(bitmap.clone().into())), )]); - let batch = Batch::try_new(cols)?; + let batch = Batch2::try_new(cols)?; Ok(Some(batch)) } @@ -120,7 +120,7 @@ impl LeftOuterJoinDrainState { /// /// This will filter out rows that have been visited, and join the remaining /// rows will null columns on the right. - pub fn drain_next(&mut self) -> Result> { + pub fn drain_next(&mut self) -> Result> { loop { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, @@ -152,16 +152,16 @@ impl LeftOuterJoinDrainState { let right_cols = self .right_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; return Ok(Some(batch)); } } - pub fn drain_semi_next(&mut self) -> Result> { + pub fn drain_semi_next(&mut self) -> Result> { loop { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, @@ -187,10 +187,10 @@ impl LeftOuterJoinDrainState { let right_cols = self .right_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; return Ok(Some(batch)); } @@ -209,7 +209,7 @@ pub struct RightOuterJoinTracker { impl RightOuterJoinTracker { /// Create a new tracker for the provided batch. - pub fn new_for_batch(batch: &Batch) -> Self { + pub fn new_for_batch(batch: &Batch2) -> Self { RightOuterJoinTracker { unvisited: Bitmap::new_with_all_true(batch.num_rows()), } @@ -230,7 +230,7 @@ impl RightOuterJoinTracker { /// the batch. /// /// Returns None if all row on the right were visited. - pub fn into_unvisited(self, left_types: &[DataType], right: &Batch) -> Result> { + pub fn into_unvisited(self, left_types: &[DataType], right: &Batch2) -> Result> { let selection = SelectionVector::from_iter(self.unvisited.index_iter()); let num_rows = selection.num_rows(); if num_rows == 0 { @@ -241,10 +241,10 @@ impl RightOuterJoinTracker { let left_null_cols = left_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_null_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_null_cols.into_iter().chain(right_cols))?; Ok(Some(batch)) } diff --git a/crates/rayexec_execution/src/execution/operators/util/resizer.rs b/crates/rayexec_execution/src/execution/operators/util/resizer.rs index c2a7eb615..d9e594da8 100644 --- a/crates/rayexec_execution/src/execution/operators/util/resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/util/resizer.rs @@ -2,10 +2,12 @@ use std::sync::Arc; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::execution::computed_batch::ComputedBatches; +// TODO: Delete + // TODO: Shouldn't be a const, should be determined when we create the // executable plans. pub const DEFAULT_TARGET_BATCH_SIZE: usize = 4096; @@ -16,7 +18,7 @@ pub struct BatchResizer { /// Target batch size. target: usize, /// Pending input batches. - pending: Vec, + pending: Vec, /// Current total row count for all batches. pending_row_count: usize, } @@ -36,7 +38,7 @@ impl BatchResizer { /// Typically this will return either no batches or a single batch. However /// there is a case where this can return multiple batches if 'len(input) + /// pending_row_count > target * 2' (aka very large input batch). - pub fn try_push(&mut self, batch: Batch) -> Result { + pub fn try_push(&mut self, batch: Batch2) -> Result { if batch.num_rows() == 0 { return Ok(ComputedBatches::None); } @@ -44,7 +46,7 @@ impl BatchResizer { if self.pending_row_count + batch.num_rows() == self.target { self.pending.push(batch); - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; @@ -67,7 +69,7 @@ impl BatchResizer { self.pending.push(batch_a); // Concat current pending + batch a. - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; @@ -107,145 +109,9 @@ impl BatchResizer { return Ok(ComputedBatches::None); } - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; Ok(ComputedBatches::Single(out)) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::arrays::array::Array; - use crate::arrays::testutil::assert_batches_eq; - - #[test] - fn push_within_target() { - let batch1 = Batch::try_new([ - Array::from_iter([1, 2, 3]), - Array::from_iter(["a", "b", "c"]), - ]) - .unwrap(); - - let batch2 = Batch::try_new([ - Array::from_iter([4, 5, 6]), - Array::from_iter(["d", "e", "f"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - - let out = resizer.try_push(batch1).unwrap(); - assert!(matches!(out, ComputedBatches::None)); - - let out = resizer.try_push(batch2).unwrap(); - let got = match out { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - let expected = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - - assert_batches_eq(&expected, &got); - - let expected_rem = - Batch::try_new([Array::from_iter([5, 6]), Array::from_iter(["e", "f"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn push_large_batch() { - // len(batch) > target && len(batch) < target * 2 - - let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4, 5]), - Array::from_iter(["a", "b", "c", "d", "e"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - let got = match resizer.try_push(batch).unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - let expected = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - - assert_batches_eq(&expected, &got); - - let expected_rem = - Batch::try_new([Array::from_iter([5]), Array::from_iter(["e"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn push_very_large_batch() { - // len(batch) > target * 2 - - let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - Array::from_iter(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - let gots = match resizer.try_push(batch).unwrap() { - ComputedBatches::Multi(batches) => batches, - other => panic!("unexpected out: {other:?}"), - }; - - assert_eq!(2, gots.len()); - - let expected1 = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - assert_batches_eq(&expected1, &gots[0]); - - let expected2 = Batch::try_new([ - Array::from_iter([5, 6, 7, 8]), - Array::from_iter(["e", "f", "g", "h"]), - ]) - .unwrap(); - assert_batches_eq(&expected2, &gots[1]); - - let expected_rem = - Batch::try_new([Array::from_iter([9, 10]), Array::from_iter(["i", "j"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn flush_none() { - let mut resizer = BatchResizer::new(4); - let out = resizer.flush_remaining().unwrap(); - assert!(matches!(out, ComputedBatches::None)); - } -} diff --git a/crates/rayexec_execution/src/execution/operators/values.rs b/crates/rayexec_execution/src/execution/operators/values.rs index 4fdfa7555..f08e1cc94 100644 --- a/crates/rayexec_execution/src/execution/operators/values.rs +++ b/crates/rayexec_execution/src/execution/operators/values.rs @@ -5,41 +5,41 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; #[derive(Debug)] pub struct ValuesPartitionState { - batches: Vec, + batches: Vec, } #[derive(Debug)] pub struct PhysicalValues { - batches: Vec, + batches: Vec, } impl PhysicalValues { - pub fn new(batches: Vec) -> Self { + pub fn new(batches: Vec) -> Self { PhysicalValues { batches } } } impl ExecutableOperator for PhysicalValues { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let mut states: Vec<_> = (0..num_partitions) @@ -52,43 +52,43 @@ impl ExecutableOperator for PhysicalValues { states[idx % num_partitions].batches.push(batch.clone()); } - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states.into_iter().map(PartitionState::Values).collect(), }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { Err(RayexecError::new("Cannot push to Values operator")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to Values operator")) } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Values(state) => match state.batches.pop() { - Some(batch) => Ok(PollPull::Computed(batch.into())), - None => Ok(PollPull::Exhausted), + Some(batch) => Ok(PollPull2::Computed(batch.into())), + None => Ok(PollPull2::Exhausted), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/window/mod.rs b/crates/rayexec_execution/src/execution/operators/window/mod.rs index 36547a198..af935b541 100644 --- a/crates/rayexec_execution/src/execution/operators/window/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/window/mod.rs @@ -4,14 +4,14 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -19,39 +19,39 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; pub struct PhysicalWindow {} impl ExecutableOperator for PhysicalWindow { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, - ) -> Result { + _batch: Batch2, + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index bafbb5db8..21f077540 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -1,15 +1,16 @@ -use std::borrow::Cow; use std::fmt; -use std::sync::Arc; use rayexec_error::Result; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::executor::scalar::{interleave, SelectExecutor}; -use crate::arrays::selection::SelectionVector; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::PhysicalBool; +use crate::arrays::datatype::DataType; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::expr::physical::evaluator::ExpressionEvaluator; #[derive(Debug, Clone)] pub struct PhysicalWhenThen { @@ -23,81 +24,152 @@ impl fmt::Display for PhysicalWhenThen { } } +/// Physical expression for 'CASE .. THEN .. ELSE ..' expressions. #[derive(Debug, Clone)] pub struct PhysicalCaseExpr { pub cases: Vec, pub else_expr: Box, + pub datatype: DataType, } impl PhysicalCaseExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { - let mut arrays = Vec::new(); - let mut indices: Vec<(usize, usize)> = (0..batch.num_rows()).map(|_| (0, 0)).collect(); + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + // 2 states per when/then pair, plus one for the 'else'. + let mut inputs = Vec::with_capacity(self.cases.len() * 2 + 1); + for case in &self.cases { + let when_input = case.when.create_state(batch_size)?; + inputs.push(when_input); + + let then_input = case.then.create_state(batch_size)?; + inputs.push(then_input); + } + + let else_input = self.else_expr.create_state(batch_size)?; + inputs.push(else_input); + + // 2 arrays in the buffer, one 'boolean' for conditional evaluation, one + // for the result if condition is true. 'then' and 'else' expressions + // should evaluate to the same type. + let buffer = Batch::try_from_arrays( + [ + Array::new(&NopBufferManager, DataType::Boolean, batch_size)?, + Array::new(&NopBufferManager, self.else_expr.datatype(), batch_size)?, + ], + false, + )?; + + Ok(ExpressionState { buffer, inputs }) + } - // Track remaining rows we need to evaluate. + pub fn datatype(&self) -> DataType { + self.datatype.clone() + } + + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Indices where 'when' evaluated to true and the 'then' expression + // needs to be evaluated. + let mut then_selection = Vec::with_capacity(sel.len()); + // Indices where 'then' evaluated to false or null. + let mut fallthrough_selection = Vec::with_capacity(sel.len()); + + // Current selection for a single when/then pair. // - // True bits are rows we still need to consider. - let mut remaining = Bitmap::new_with_all_true(batch.num_rows()); + // Initialized to the initial selection passed in. + let mut curr_selection: Vec<_> = sel.iter().collect(); // TODO: Would be cool not needing to allocate here. - let mut trues_sel = SelectionVector::with_capacity(batch.num_rows()); + for (case_idx, case) in self.cases.iter().enumerate() { + fallthrough_selection.clear(); + then_selection.clear(); - for case in &self.cases { - // Generate selection from remaining bitmap. - let selection = Arc::new(SelectionVector::from_iter(remaining.index_iter())); - - // Get batch with only remaining rows that we should consider. - let selected_batch = batch.select(selection.clone()); - - // Execute 'when'. - let selected = case.when.eval(&selected_batch)?; - - // Determine which rows should be executed for 'then', and which we - // need to fall through on. - SelectExecutor::select(&selected, &mut trues_sel)?; - - // Select rows in batch to execute on based on 'trues'. - let execute_batch = selected_batch.select(Arc::new(trues_sel.clone())); - let output = case.then.eval(&execute_batch)?; - - // Store array for later interleaving. - let array_idx = arrays.len(); - arrays.push(output.into_owned()); - - // Figure out mapping from the 'trues' selection to the original row - // index. - // - // The selection vector locations should index into the full-length - // selection vector to get the original row index. - for (array_row_idx, selected_row_idx) in trues_sel.iter_locations().enumerate() { - // Final output row. - let output_row_idx = selection.get(selected_row_idx); - indices[output_row_idx] = (array_idx, array_row_idx); - - // Update bitmap, this row was handled. - remaining.set_unchecked(output_row_idx, false); + if curr_selection.is_empty() { + // Nothing left to do. + break; } - } - // Do all remaining rows. - if remaining.count_trues() != 0 { - let selection = Arc::new(SelectionVector::from_iter(remaining.index_iter())); - let remaining_batch = batch.select(selection.clone()); - - let output = self.else_expr.eval(&remaining_batch)?; - let array_idx = arrays.len(); - arrays.push(output.into_owned()); + // Each case has two input states, one for 'when' and one for + // 'then'. + let when_state = &mut state.inputs[case_idx * 2]; + // When array reused for each case. + let when_array = &mut state.buffer.arrays_mut()[0]; + when_array.reset_for_write(&NopBufferManager)?; + + // Eval 'when' + ExpressionEvaluator::eval_expression( + &case.when, + input, + when_state, + Selection::selection(&curr_selection), + when_array, + )?; + + UnaryExecutor::for_each_flat::( + when_array.flat_view()?, + Selection::selection(&curr_selection), + |idx, b| { + if let Some(&true) = b { + // 'When' expression evaluated to true, select it for + // 'then' expression eval. + then_selection.push(idx); + } else { + // Not true, need to fall through. + fallthrough_selection.push(idx); + } + }, + )?; - // Update indices. - for (array_row_idx, output_row_idx) in selection.iter_locations().enumerate() { - indices[output_row_idx] = (array_idx, array_row_idx); + if then_selection.is_empty() { + // Everything in this case's 'when' evaluated to false. + continue; } + + let then_state = &mut state.inputs[case_idx * 2 + 1]; + // Reused, assumes all 'then' expressions and the 'else' expression + // are the same type. + let then_array = &mut state.buffer.arrays_mut()[1]; + then_array.reset_for_write(&NopBufferManager)?; + + // Eval 'then' with selection from 'when'. + ExpressionEvaluator::eval_expression( + &case.then, + input, + then_state, + Selection::selection(&then_selection), + then_array, + )?; + + // Fill output array according to indices in 'when' selection. + then_array.copy_rows(then_selection.iter().copied().enumerate(), output)?; + + // Update next iteration to use fallthrough indices. + std::mem::swap(&mut fallthrough_selection, &mut curr_selection); } - // Interleave. - let refs: Vec<_> = arrays.iter().collect(); - let arr = interleave(&refs, &indices)?; + if !curr_selection.is_empty() { + // We have remaining indices that fell through all cases. Eval with + // else expression and add those in. + let else_state = state.inputs.last_mut().unwrap(); // Last state after all when/then states. + let else_array = &mut state.buffer.arrays_mut()[1]; + else_array.reset_for_write(&NopBufferManager)?; + + ExpressionEvaluator::eval_expression( + &self.else_expr, + input, + else_state, + Selection::selection(&curr_selection), + else_array, + )?; + + // And fill remaining. + else_array.copy_rows(curr_selection.iter().copied().enumerate(), output)?; + } - Ok(Cow::Owned(arr)) + Ok(()) } } @@ -115,83 +187,96 @@ impl fmt::Display for PhysicalCaseExpr { #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DataType; - use crate::arrays::scalar::ScalarValue; - use crate::expr::case_expr::{CaseExpr, WhenThen}; - use crate::expr::physical::planner::PhysicalExpressionPlanner; - use crate::expr::{self, Expression}; - use crate::functions::scalar::builtin::comparison::Eq; - use crate::functions::scalar::ScalarFunction; - use crate::logical::binder::table_list::TableList; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; #[test] fn case_simple() { - let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter([12, 13, 14, 15]), - ]) + // CASE a THEN b + // ELSE 48 + let expr = PhysicalCaseExpr { + cases: vec![PhysicalWhenThen { + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 0, + datatype: DataType::Boolean, + }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }), + }], + else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: 48.into(), + })), + datatype: DataType::Int32, + }; + + let mut input = Batch::try_from_arrays( + [ + Array::try_from_iter([true, true, false]).unwrap(), + Array::try_from_iter([1, 2, 3]).unwrap(), + ], + true, + ) .unwrap(); - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) + let mut state = expr.create_state(3).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + expr.eval(&mut input, &mut state, Selection::linear(3), &mut out) .unwrap(); - // CASE WHEN a = 2 THEN 'first_case' - // WHEN a = 3 THEN 'second_case' - // ELSE 'else' - // END - - let when_expr_0 = Expression::ScalarFunction( - Eq.plan(&table_list, vec![expr::col_ref(table_ref, 0), expr::lit(2)]) - .unwrap() - .into(), - ); - let then_expr_0 = expr::lit("first_case"); - - let when_expr_1 = Expression::ScalarFunction( - Eq.plan(&table_list, vec![expr::col_ref(table_ref, 0), expr::lit(3)]) - .unwrap() - .into(), - ); - let then_expr_1 = expr::lit("second_case"); - - let else_expr = expr::lit("else"); - - let case_expr = Expression::Case(CaseExpr { - cases: vec![ - WhenThen { - when: when_expr_0, - then: then_expr_0, - }, - WhenThen { - when: when_expr_1, - then: then_expr_1, - }, + let expected = Array::try_from_iter([1, 2, 48]).unwrap(); + assert_arrays_eq(&expected, &out); + } + + #[test] + fn case_falsey() { + // Same as above but check that 'when' treats nulls as false. + + // CASE a THEN b + // ELSE 48 + let expr = PhysicalCaseExpr { + cases: vec![PhysicalWhenThen { + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 0, + datatype: DataType::Boolean, + }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }), + }], + else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: 48.into(), + })), + datatype: DataType::Int32, + }; + + let mut input = Batch::try_from_arrays( + [ + Array::try_from_iter([Some(true), None, Some(false)]).unwrap(), + Array::try_from_iter([1, 2, 3]).unwrap(), ], - else_expr: Some(Box::new(else_expr)), - }); - - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical_case = planner.plan_scalar(&[table_ref], &case_expr).unwrap(); - - let got = physical_case.eval(&batch).unwrap(); - - assert_eq!(ScalarValue::from("else"), got.logical_value(0).unwrap()); - assert_eq!( - ScalarValue::from("first_case"), - got.logical_value(1).unwrap() - ); - assert_eq!( - ScalarValue::from("second_case"), - got.logical_value(2).unwrap() - ); - assert_eq!(ScalarValue::from("else"), got.logical_value(3).unwrap()); + true, + ) + .unwrap(); + + let mut state = ExpressionState { + buffer: Batch::new(&NopBufferManager, [DataType::Boolean, DataType::Int32], 3).unwrap(), + inputs: vec![ExpressionState::empty(), ExpressionState::empty()], + }; + + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + expr.eval(&mut input, &mut state, Selection::linear(3), &mut out) + .unwrap(); + + let expected = Array::try_from_iter([1, 48, 48]).unwrap(); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 081e8b023..8895c95d6 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -1,12 +1,14 @@ -use std::borrow::Cow; use std::fmt; use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use super::evaluator::ExpressionEvaluator; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::compute::cast::array::cast_array; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::DataType; @@ -20,10 +22,53 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { - let input = self.expr.eval(batch)?; - let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; - Ok(Cow::Owned(out)) + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + let inputs = vec![self.expr.create_state(batch_size)?]; + let buffer = Batch::try_from_arrays( + [Array::new( + &NopBufferManager, + self.expr.datatype(), + batch_size, + )?], + false, + )?; + + Ok(ExpressionState { buffer, inputs }) + } + + pub fn datatype(&self) -> DataType { + self.to.clone() + } + + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Eval child. + let child_output = &mut state.buffer.arrays_mut()[0]; + ExpressionEvaluator::eval_expression( + &self.expr, + input, + &mut state.inputs[0], + sel, + child_output, + )?; + + // Cast child output. + // + // Note we discard the previous selection since the child would have + // written the rows starting at 0 up to selection len. + cast_array( + child_output, + Selection::linear(sel.len()), + output, + CastFailBehavior::Error, + )?; + + Ok(()) } } @@ -53,3 +98,33 @@ impl DatabaseProtoConv for PhysicalCastExpr { }) } } + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq_sel; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; + + #[test] + fn cast_expr_literal_string_to_i32() { + let expr = PhysicalCastExpr { + to: DataType::Int32, + expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: "35".into(), + })), + }; + + let mut state = expr.create_state(1024).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 1024).unwrap(); + let mut input = Batch::empty_with_num_rows(3); + let sel = input.selection(); + + expr.eval(&mut input, &mut state, sel, &mut out).unwrap(); + + let expected = Array::try_from_iter([35, 35, 35]).unwrap(); + assert_arrays_eq_sel(&expected, 0..3, &out, 0..3); + } +} diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 6394383cb..0a5e37e5c 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -1,29 +1,46 @@ -use std::borrow::Cow; use std::fmt; -use rayexec_error::{RayexecError, Result}; +use rayexec_error::Result; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use super::evaluator::ExpressionState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; #[derive(Debug, Clone)] pub struct PhysicalColumnExpr { + pub datatype: DataType, pub idx: usize, } impl PhysicalColumnExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { - let col = batch.column(self.idx).ok_or_else(|| { - RayexecError::new(format!( - "Tried to get column at index {} in a batch with {} columns", - self.idx, - batch.columns().len() - )) - })?; + pub(crate) fn create_state(&self, _batch_size: usize) -> Result { + Ok(ExpressionState::empty()) + } + + pub fn datatype(&self) -> DataType { + self.datatype.clone() + } + + pub(crate) fn eval( + &self, + input: &mut Batch, + _: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + let col = &mut input.arrays_mut()[self.idx]; + output.clone_from(&NopBufferManager, col)?; + + if !sel.is_linear() || sel.len() != input.num_rows() { + output.select(&NopBufferManager, sel.iter())?; + } - Ok(Cow::Borrowed(col)) + Ok(()) } } @@ -37,14 +54,75 @@ impl DatabaseProtoConv for PhysicalColumnExpr { type ProtoType = rayexec_proto::generated::physical_expr::PhysicalColumnExpr; fn to_proto_ctx(&self, _context: &DatabaseContext) -> Result { - Ok(Self::ProtoType { - idx: self.idx as u32, - }) + unimplemented!() + // Ok(Self::ProtoType { + // idx: self.idx as u32, + // }) + } + + fn from_proto_ctx(_proto: Self::ProtoType, _context: &DatabaseContext) -> Result { + unimplemented!() + // Ok(Self { + // idx: proto.idx as usize, + // }) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn column_expr_eval() { + let mut input = Batch::try_from_arrays( + [ + Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), + Array::try_from_iter([1, 2, 3, 4]).unwrap(), + ], + true, + ) + .unwrap(); + + let expr = PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }; + let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); + let sel = Selection::linear(4); + + expr.eval(&mut input, &mut ExpressionState::empty(), sel, &mut out) + .unwrap(); + + let expected = Array::try_from_iter([1, 2, 3, 4]).unwrap(); + assert_arrays_eq(&expected, &out); } - fn from_proto_ctx(proto: Self::ProtoType, _context: &DatabaseContext) -> Result { - Ok(Self { - idx: proto.idx as usize, - }) + #[test] + fn column_expr_eval_with_selection() { + let mut input = Batch::try_from_arrays( + [ + Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), + Array::try_from_iter([1, 2, 3, 4]).unwrap(), + ], + true, + ) + .unwrap(); + + let expr = PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }; + let mut state = expr.create_state(4).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); + let sel = Selection::selection(&[1, 3]); + + expr.eval(&mut input, &mut state, sel, &mut out).unwrap(); + + let expected = Array::try_from_iter([2, 4]).unwrap(); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs new file mode 100644 index 000000000..4eb896769 --- /dev/null +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -0,0 +1,140 @@ +use rayexec_error::{RayexecError, Result}; + +use super::PhysicalScalarExpression; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::scalar::OwnedScalarValue; + +/// Evaluate expressions on batch inputs. +#[derive(Debug)] +pub struct ExpressionEvaluator { + expressions: Vec, + states: Vec, +} + +#[derive(Debug)] +pub(crate) struct ExpressionState { + /// Buffer for writing intermediate results. + pub(crate) buffer: Batch, + /// Child states for expressions that contain other input expressions. + pub(crate) inputs: Vec, +} + +impl ExpressionState { + pub(crate) const fn empty() -> Self { + ExpressionState { + buffer: Batch::empty(), + inputs: Vec::new(), + } + } +} + +impl ExpressionEvaluator { + pub fn try_new(expressions: Vec, batch_size: usize) -> Result { + let states = expressions + .iter() + .map(|expr| expr.create_state(batch_size)) + .collect::>>()?; + + Ok(ExpressionEvaluator { + expressions, + states, + }) + } + + pub fn num_expressions(&self) -> usize { + self.expressions.len() + } + + pub fn try_eval_constant(&mut self) -> Result { + if self.expressions.len() != 1 { + return Err(RayexecError::new( + "Single expression for constant eval required", + )); + } + + let expr = &self.expressions[0]; + let state = &mut self.states[0]; + + let mut input = Batch::empty_with_num_rows(1); + let mut out = Array::new(&NopBufferManager, expr.datatype(), 1)?; + + Self::eval_expression(expr, &mut input, state, Selection::linear(1), &mut out)?; + + let v = out.get_value(0)?; + + Ok(v.into_owned()) + } + + /// Evaluate the expression on an input batch, writing the results to the + /// output batch. + /// + /// Output batch must contain the same number of arrays as expressions in + /// this evaluator. Arrays will be written to in the same order as the + /// expressions. + /// + /// `input` is mutable only to allow converting arrays from owned to + /// managed. + /// + /// `output` will have num rows set to the number of logical rows in the + /// selection. + pub fn eval_batch( + &mut self, + input: &mut Batch, + sel: Selection, + output: &mut Batch, + ) -> Result<()> { + debug_assert_eq!(self.expressions.len(), output.arrays().len()); + + for (idx, expr) in self.expressions.iter().enumerate() { + let output = &mut output.arrays_mut()[idx]; + let state = &mut self.states[idx]; + + Self::eval_expression(expr, input, state, sel, output)?; + } + + output.set_num_rows(sel.len())?; + + Ok(()) + } + + pub fn eval_single_expression( + &mut self, + input: &mut Batch, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + debug_assert_eq!(1, self.expressions.len()); + Self::eval_expression( + &self.expressions[0], + input, + &mut self.states[0], + sel, + output, + ) + } + + pub(crate) fn eval_expression( + expr: &PhysicalScalarExpression, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // TODO: Figure out how the manager will be threaded down. Might just + // keep it on the array/buffer/batch/something else. We might need + // `Arc` here, ideally the buffer reuse prevents us from + // needing to call into it often. + output.reset_for_write(&NopBufferManager)?; + + match expr { + PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Case(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Cast(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Literal(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), + } + } +} diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index 6f64288fd..96314c7d9 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -1,11 +1,14 @@ -use std::borrow::Cow; use std::fmt; use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use super::evaluator::ExpressionState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::arrays::scalar::OwnedScalarValue; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -16,9 +19,27 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { - let arr = self.literal.as_array(batch.num_rows())?; - Ok(Cow::Owned(arr)) + pub(crate) fn create_state(&self, _batch_size: usize) -> Result { + Ok(ExpressionState::empty()) + } + + pub fn datatype(&self) -> DataType { + self.literal.datatype() + } + + pub(crate) fn eval( + &self, + _: &mut Batch, + _: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + output.set_value(0, &self.literal)?; + + // TODO: Need to be able to provide "constant" selection here. + output.select(&NopBufferManager, std::iter::repeat(0).take(sel.len()))?; + + Ok(()) } } @@ -43,3 +64,54 @@ impl DatabaseProtoConv for PhysicalLiteralExpr { }) } } + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn literal_eval() { + let mut input = Batch::empty_with_num_rows(4); + + let expr = PhysicalLiteralExpr { + literal: "catdog".into(), + }; + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(); + expr.eval( + &mut input, + &mut ExpressionState::empty(), + Selection::linear(4), + &mut out, + ) + .unwrap(); + + let expected = Array::try_from_iter(["catdog", "catdog", "catdog", "catdog"]).unwrap(); + assert_arrays_eq(&expected, &out); + } + + #[test] + fn literal_eval_with_selection() { + let mut input = Batch::empty_with_num_rows(4); + + let expr = PhysicalLiteralExpr { + literal: "catdog".into(), + }; + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(); + expr.eval( + &mut input, + &mut ExpressionState::empty(), + Selection::selection(&[2, 3]), + &mut out, + ) + .unwrap(); + + let expected = Array::try_from_iter(["catdog", "catdog"]).unwrap(); + assert_arrays_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 223561832..3bd1ff7b8 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -1,3 +1,4 @@ +pub mod evaluator; pub mod planner; pub mod case_expr; @@ -12,12 +13,14 @@ use std::fmt; use case_expr::PhysicalCaseExpr; use cast_expr::PhysicalCastExpr; use column_expr::PhysicalColumnExpr; +use evaluator::ExpressionState; use literal_expr::PhysicalLiteralExpr; use rayexec_error::{not_implemented, OptionExt, Result}; use scalar_function_expr::PhysicalScalarFunctionExpr; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; +use crate::arrays::datatype::DataType; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -34,22 +37,43 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { match self { - Self::Case(e) => e.eval(batch), - Self::Cast(e) => e.eval(batch), - Self::Column(e) => e.eval(batch), - Self::Literal(e) => e.eval(batch), - Self::ScalarFunction(e) => e.eval(batch), + Self::Case(expr) => expr.create_state(batch_size), + Self::Cast(expr) => expr.create_state(batch_size), + Self::Column(expr) => expr.create_state(batch_size), + Self::Literal(expr) => expr.create_state(batch_size), + Self::ScalarFunction(expr) => expr.create_state(batch_size), } } + pub fn datatype(&self) -> DataType { + match self { + Self::Case(expr) => expr.datatype(), + Self::Cast(expr) => expr.datatype(), + Self::Column(expr) => expr.datatype(), + Self::Literal(expr) => expr.datatype(), + Self::ScalarFunction(expr) => expr.datatype(), + } + } + + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { + unimplemented!() + // match self { + // Self::Case(e) => e.eval2(batch), + // Self::Cast(e) => e.eval2(batch), + // Self::Column(e) => e.eval2(batch), + // Self::Literal(e) => e.eval2(batch), + // Self::ScalarFunction(e) => e.eval2(batch), + // } + } + /// Produce a selection vector for the batch using this expression. /// /// The selection vector will include row indices where the expression /// evaluates to true. - pub fn select(&self, batch: &Batch) -> Result { - let selected = self.eval(batch)?; + pub fn select(&self, batch: &Batch2) -> Result { + let selected = self.eval2(batch)?; let mut selection = SelectionVector::with_capacity(selected.logical_len()); SelectExecutor::select(&selected, &mut selection)?; @@ -186,67 +210,3 @@ impl DatabaseProtoConv for PhysicalSortExpression { }) } } - -#[cfg(test)] -mod tests { - use planner::PhysicalExpressionPlanner; - - use super::*; - use crate::arrays::datatype::DataType; - use crate::expr; - use crate::logical::binder::table_list::TableList; - - #[test] - fn select_some() { - let batch = Batch::try_new([ - Array::from_iter([1, 4, 6, 9, 12]), - Array::from_iter([2, 3, 8, 9, 10]), - ]) - .unwrap(); - - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) - .unwrap(); - - let expr = expr::gt(expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)); - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical = planner.plan_scalar(&[table_ref], &expr).unwrap(); - - let selection = physical.select(&batch).unwrap(); - let expected = SelectionVector::from_iter([1, 4]); - - assert_eq!(expected, selection) - } - - #[test] - fn select_none() { - let batch = Batch::try_new([ - Array::from_iter([1, 2, 6, 9, 9]), - Array::from_iter([2, 3, 8, 9, 10]), - ]) - .unwrap(); - - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) - .unwrap(); - - let expr = expr::gt(expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)); - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical = planner.plan_scalar(&[table_ref], &expr).unwrap(); - - let selection = physical.select(&batch).unwrap(); - let expected = SelectionVector::empty(); - - assert_eq!(expected, selection) - } -} diff --git a/crates/rayexec_execution/src/expr/physical/planner.rs b/crates/rayexec_execution/src/expr/physical/planner.rs index cf40aed00..e98f77324 100644 --- a/crates/rayexec_execution/src/expr/physical/planner.rs +++ b/crates/rayexec_execution/src/expr/physical/planner.rs @@ -64,10 +64,15 @@ impl<'a> PhysicalExpressionPlanner<'a> { let mut offset = 0; for &table_ref in table_refs { let table = self.table_list.get(table_ref)?; + let datatype = + table.column_types.get(col.column).cloned().ok_or_else(|| { + RayexecError::new(format!("Missing column: {}", col.column)) + })?; if col.table_scope == table_ref { return Ok(PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: offset + col.column, + datatype, })); } @@ -176,7 +181,7 @@ impl<'a> PhysicalExpressionPlanner<'a> { let else_expr = match &expr.else_expr { Some(else_expr) => self.plan_scalar(table_refs, else_expr)?, None => PhysicalScalarExpression::Cast(PhysicalCastExpr { - to: datatype, + to: datatype.clone(), expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { literal: ScalarValue::Null, })), @@ -186,6 +191,7 @@ impl<'a> PhysicalExpressionPlanner<'a> { Ok(PhysicalScalarExpression::Case(PhysicalCaseExpr { cases, else_expr: Box::new(else_expr), + datatype, })) } other => Err(RayexecError::new(format!( diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 676f6122f..3d9208666 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -1,13 +1,16 @@ -use std::borrow::Cow; use std::fmt; use fmtutil::IntoDisplayableSlice; use rayexec_error::Result; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::database::DatabaseContext; +use crate::expr::physical::evaluator::ExpressionEvaluator; use crate::functions::scalar::PlannedScalarFunction; use crate::proto::DatabaseProtoConv; @@ -18,27 +21,47 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { let inputs = self .inputs .iter() - .map(|input| input.eval(batch)) + .map(|input| input.create_state(batch_size)) .collect::>>()?; - let refs: Vec<_> = inputs.iter().map(|a| a.as_ref()).collect(); // Can I not? - let mut out = self.function.function_impl.execute(&refs)?; + let arrays = self + .inputs + .iter() + .map(|input| Array::new(&NopBufferManager, input.datatype(), batch_size)) + .collect::>>()?; - // If function is provided no input, it's expected to return an - // array of length 1. We extend the array here so that it's the - // same size as the rest. - // - // TODO: Could just extend the selection vector too. - if refs.is_empty() { - let scalar = out.logical_value(0)?; - out = scalar.as_array(batch.num_rows())?; + let buffer = Batch::try_from_arrays(arrays, false)?; + + Ok(ExpressionState { buffer, inputs }) + } + + pub fn datatype(&self) -> DataType { + self.function.return_type.clone() + } + + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Eval children. + for (child_idx, array) in state.buffer.arrays_mut().iter_mut().enumerate() { + let expr = &self.inputs[child_idx]; + let child_state = &mut state.inputs[child_idx]; + ExpressionEvaluator::eval_expression(expr, input, child_state, sel, array)?; } - Ok(Cow::Owned(out)) + // Eval function with child outputs. + state.buffer.set_num_rows(sel.len())?; + self.function.function_impl.execute(&state.buffer, output)?; + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index 205107c09..28eaaca04 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -6,18 +6,17 @@ use num_traits::AsPrimitive; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalF64, PhysicalI64}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalI64}; +use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -134,42 +133,23 @@ where D::Primitive: Into, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - let state_finalize = move |states: &mut [AvgStateDecimal]| { - let mut builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(states.len()), - }; - - let mut validities = Bitmap::new_with_all_true(states.len()); - - let m = datatype.clone().try_get_decimal_type_meta()?; - let scale = f64::powi(10.0, m.scale.abs() as i32); - - for (idx, state) in states.iter_mut().enumerate() { - let ((sum, count), valid) = state.finalize()?; - - if !valid { - validities.set_unchecked(idx, false); - continue; - } - - let val = (sum as f64) / (count as f64 * scale); - builder.buffer.put(idx, &val); - } - - Ok(Array::new_with_validity_and_array_data( - builder.datatype, - validities, - builder.buffer.into_data(), - )) - }; - - new_unary_aggregate_states::( - AvgStateDecimal::::default, - state_finalize, - ) + let m = self + .datatype + .try_get_decimal_type_meta() + .unwrap_or(DecimalTypeMeta::new(D::MAX_PRECISION, D::DEFAULT_SCALE)); // TODO: Should rework to return the error instead. + + let scale = f64::powi(10.0, m.scale.abs() as i32); + + Box::new(TypedAggregateGroupStates::new( + move || AvgStateDecimal:: { + scale, + sum: 0, + count: 0, + _input: PhantomData, + }, + unary_update::, + drain::, + )) } } @@ -178,10 +158,11 @@ pub struct AvgFloat64Impl; impl AggregateFunctionImpl for AvgFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( AvgStateF64::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -190,38 +171,52 @@ pub struct AvgInt64Impl; impl AggregateFunctionImpl for AvgInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( AvgStateF64::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } -#[derive(Debug, Default)] +#[derive(Debug)] struct AvgStateDecimal { + /// Scale to use when finalizing the physical decimal value. + scale: f64, sum: i128, count: i64, _input: PhantomData, } -impl + Default + Debug> AggregateState for AvgStateDecimal { +impl AggregateState<&I, f64> for AvgStateDecimal +where + I: Into + Copy + Debug, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.count += other.count; Ok(()) } - fn update(&mut self, input: I) -> Result<()> { + fn update(&mut self, &input: &I) -> Result<()> { self.sum += input.into(); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<((i128, i64), bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - return Ok(((0, 0), false)); + output.put_null(); + return Ok(()); } - Ok(((self.sum, self.count), true)) + + let val = (self.sum as f64) / (self.count as f64 * self.scale); + output.put(&val); + + Ok(()) } } @@ -232,9 +227,9 @@ struct AvgStateF64 { _input: PhantomData, } -impl AggregateState for AvgStateF64 +impl AggregateState<&I, f64> for AvgStateF64 where - I: Into + Default + Debug, + I: Into + Copy + Default + Debug, T: AsPrimitive + AddAssign + Debug + Default, { fn merge(&mut self, other: &mut Self) -> Result<()> { @@ -243,17 +238,23 @@ where Ok(()) } - fn update(&mut self, input: I) -> Result<()> { + fn update(&mut self, &input: &I) -> Result<()> { self.sum += input.into(); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - return Ok((0.0, false)); + output.put_null(); + return Ok(()); } let sum: f64 = self.sum.as_(); - Ok((sum / self.count as f64, true)) + output.put(&sum); + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs index ad557baf5..34cb40c62 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs @@ -4,14 +4,16 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{StddevPopFinalize, VarianceState}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -74,10 +76,11 @@ pub struct CorrImpl; impl AggregateFunctionImpl for CorrImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( CorrelationState::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -88,7 +91,25 @@ pub struct CorrelationState { stddev_y: VarianceState, } -impl AggregateState<(f64, f64), f64> for CorrelationState { +impl CorrelationState { + pub fn finalize_value(&self) -> Option { + let cov = self.covar.finalize_value()?; + let stddev_x = self.stddev_x.finalize_value()?; + let stddev_y = self.stddev_y.finalize_value()?; + + let div = stddev_x * stddev_y; + if div == 0.0 { + // Return null, matches Postgres. + // + // Note duckdb returns NaN here. + return None; + } + + Some(cov / div) + } +} + +impl AggregateState<(&f64, &f64), f64> for CorrelationState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.covar.merge(&mut other.covar)?; self.stddev_x.merge(&mut other.stddev_x)?; @@ -96,7 +117,7 @@ impl AggregateState<(f64, f64), f64> for CorrelationState { Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.covar.update(input)?; // Note input is passed in as (y, x) @@ -106,23 +127,15 @@ impl AggregateState<(f64, f64), f64> for CorrelationState { Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (cov, cov_valid) = self.covar.finalize()?; - let (stddev_x, stddev_x_valid) = self.stddev_x.finalize()?; - let (stddev_y, stddev_y_valid) = self.stddev_y.finalize()?; - - if cov_valid && stddev_x_valid && stddev_y_valid { - let div = stddev_x * stddev_y; - if div == 0.0 { - // Matches Postgres. - // - // Note duckdb returns NaN here. - return Ok((0.0, false)); - } - Ok((cov / div, true)) - } else { - Ok((0.0, false)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match self.finalize_value() { + Some(val) => output.put(&val), + None => output.put_null(), } + Ok(()) } } @@ -133,9 +146,9 @@ mod tests { #[test] fn correlation_state_single_input() { let mut state = CorrelationState::default(); - state.update((1.0, 1.0)).unwrap(); + state.update((&1.0, &1.0)).unwrap(); - let (_v, valid) = state.finalize().unwrap(); - assert!(!valid); + let v = state.finalize_value(); + assert_eq!(None, v); } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs index 68761f921..25ff22b55 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs @@ -1,13 +1,41 @@ -use rayexec_error::Result; +use std::marker::PhantomData; +use rayexec_error::{not_implemented, Result}; + +use crate::arrays::buffer::physical_type::{ + AddressableMut, + PhysicalBinary, + PhysicalBool, + PhysicalDictionary, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalAny; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::{self, Expression}; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -28,7 +56,7 @@ impl Count { function: Box::new(*self), return_type: DataType::Int64, inputs: vec![expr::lit(true)], - function_impl: Box::new(CountNonNullImpl), + function_impl: Box::new(CountNonNullImpl::::new()), } } } @@ -56,50 +84,96 @@ impl FunctionInfo for Count { impl AggregateFunction for Count { fn plan( &self, - _table_list: &TableList, + table_list: &TableList, inputs: Vec, ) -> Result { plan_check_num_args(self, &inputs, 1)?; + let function_impl: Box = match inputs[0] + .datatype(table_list)? + .physical_type() + { + PhysicalType::UntypedNull => Box::new(CountNonNullImpl::::new()), + PhysicalType::Boolean => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int128 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt128 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Interval => Box::new(CountNonNullImpl::::new()), + PhysicalType::Utf8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Binary => Box::new(CountNonNullImpl::::new()), + PhysicalType::Dictionary => Box::new(CountNonNullImpl::::new()), + PhysicalType::List => Box::new(CountNonNullImpl::::new()), + PhysicalType::Struct => not_implemented!("count struct"), + }; + Ok(PlannedAggregateFunction { function: Box::new(*self), return_type: DataType::Int64, inputs, - function_impl: Box::new(CountNonNullImpl), + function_impl, }) } } #[derive(Debug, Clone)] -pub struct CountNonNullImpl; +pub struct CountNonNullImpl { + _s: PhantomData, +} -impl AggregateFunctionImpl for CountNonNullImpl { +impl CountNonNullImpl { + const fn new() -> Self { + CountNonNullImpl { _s: PhantomData } + } +} + +impl AggregateFunctionImpl for CountNonNullImpl +where + S: PhysicalStorage, +{ fn new_states(&self) -> Box { - new_unary_aggregate_states::( - CountNonNullState::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + Box::new(TypedAggregateGroupStates::new( + CountNonNullState::::default, + unary_update::, + drain::, + )) } } #[derive(Debug, Default)] -pub struct CountNonNullState { +pub struct CountNonNullState { count: i64, + _s: PhantomData, } -impl AggregateState<(), i64> for CountNonNullState { +impl AggregateState<&S::StorageType, i64> for CountNonNullState +where + S: PhysicalStorage, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) } - fn update(&mut self, _input: ()) -> Result<()> { + fn update(&mut self, _input: &S::StorageType) -> Result<()> { self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(i64, bool)> { - // Always valid, even when count is 0 - Ok((self.count, true)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.count); + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs index 7de84c41f..c03c14162 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct CovarPopImpl; impl AggregateFunctionImpl for CovarPopImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( CovarState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -130,25 +133,26 @@ pub struct CovarSampImpl; impl AggregateFunctionImpl for CovarSampImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( CovarState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } pub trait CovarFinalize: Sync + Send + Debug + Default + 'static { - fn finalize(co_moment: f64, count: i64) -> (f64, bool); + fn finalize(co_moment: f64, count: i64) -> Option; } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct CovarSampFinalize; impl CovarFinalize for CovarSampFinalize { - fn finalize(co_moment: f64, count: i64) -> (f64, bool) { + fn finalize(co_moment: f64, count: i64) -> Option { match count { - 0 | 1 => (0.0, false), - _ => (co_moment / (count - 1) as f64, true), + 0 | 1 => None, + _ => Some(co_moment / (count - 1) as f64), } } } @@ -157,10 +161,10 @@ impl CovarFinalize for CovarSampFinalize { pub struct CovarPopFinalize; impl CovarFinalize for CovarPopFinalize { - fn finalize(co_moment: f64, count: i64) -> (f64, bool) { + fn finalize(co_moment: f64, count: i64) -> Option { match count { - 0 => (0.0, false), - _ => (co_moment / count as f64, true), + 0 => None, + _ => Some(co_moment / count as f64), } } } @@ -174,7 +178,16 @@ pub struct CovarState { _finalize: PhantomData, } -impl AggregateState<(f64, f64), f64> for CovarState +impl CovarState +where + F: CovarFinalize, +{ + pub fn finalize_value(&self) -> Option { + F::finalize(self.co_moment, self.count) + } +} + +impl AggregateState<(&f64, &f64), f64> for CovarState where F: CovarFinalize, { @@ -206,11 +219,8 @@ where Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { - // Note that 'y' comes first, covariance functions are call like `COVAR_SAMP(y, x)`. - let x = input.1; - let y = input.0; - + // Note that 'y' comes first, covariance functions are call like `COVAR_SAMP(y, x)`. + fn update(&mut self, (&y, &x): (&f64, &f64)) -> Result<()> { self.count += 1; let n = self.count as f64; @@ -229,7 +239,14 @@ where Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - Ok(F::finalize(self.co_moment, self.count)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match F::finalize(self.co_moment, self.count) { + Some(val) => output.put(&val), + None => output.put_null(), + } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index d15fcc709..12192f7da 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -1,14 +1,11 @@ use std::fmt::Debug; use std::marker::PhantomData; -use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::buffer::physical_type::{ + AddressableMut, + MutablePhysicalStorage, PhysicalBinary, PhysicalBool, PhysicalF16, @@ -20,7 +17,6 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, PhysicalType, PhysicalU128, PhysicalU16, @@ -28,16 +24,17 @@ use crate::arrays::executor::physical_type::{ PhysicalU64, PhysicalU8, PhysicalUntypedNull, + PhysicalUtf8, }; -use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; +use crate::arrays::datatype::DataTypeId; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - boolean_finalize, - new_unary_aggregate_states, - primitive_finalize, - untyped_null_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -81,61 +78,26 @@ impl AggregateFunction for First { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(FirstUntypedNullImpl), - PhysicalType::Boolean => Box::new(FirstBoolImpl), - PhysicalType::Float16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Float32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Float64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Int8 => { - Box::new(FirstPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Int32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Int64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Int128 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt8 => { - Box::new(FirstPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt128 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Interval => Box::new( - FirstPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType::Binary => Box::new(FirstBinaryImpl { - datatype: datatype.clone(), - }), - PhysicalType::Utf8 => Box::new(FirstBinaryImpl { - datatype: datatype.clone(), - }), - PhysicalType::List => { - // TODO: Easy, clone underlying array and select. - not_implemented!("FIRST for list arrays") - } + let function_impl: Box = match datatype.physical_type() { + PhysicalType::UntypedNull => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Utf8 => Box::new(FirstStringImpl), + PhysicalType::Binary => Box::new(FirstBinaryImpl), + other => not_implemented!("FIRST for physical type: {other}"), }; Ok(PlannedAggregateFunction { @@ -147,140 +109,152 @@ impl AggregateFunction for First { } } -/// FIRST aggregate impl for utf8 and binary. -#[derive(Debug, Clone)] -pub struct FirstBinaryImpl { - datatype: DataType, +#[derive(Debug, Clone, Copy)] +pub struct FirstPrimitiveImpl { + _s: PhantomData, } -impl AggregateFunctionImpl for FirstBinaryImpl { - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); +impl FirstPrimitiveImpl { + const fn new() -> Self { + FirstPrimitiveImpl { _s: PhantomData } + } +} - new_unary_aggregate_states::( - FirstStateBinary::default, - move |states| { - let builder = ArrayBuilder { - datatype: datatype.clone(), - buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }, - ) +impl AggregateFunctionImpl for FirstPrimitiveImpl +where + S: MutablePhysicalStorage, + S::StorageType: Debug + Default + Copy, +{ + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + FirstPrimitiveState::::default, + unary_update::, + drain::, + )) } } -#[derive(Debug, Clone)] -pub struct FirstUntypedNullImpl; +#[derive(Debug, Clone, Copy)] +pub struct FirstBinaryImpl; -impl AggregateFunctionImpl for FirstUntypedNullImpl { +impl AggregateFunctionImpl for FirstBinaryImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( - FirstState::::default, - untyped_null_finalize, - ) + Box::new(TypedAggregateGroupStates::new( + FirstBinaryState::default, + unary_update::, + drain::, + )) } } -#[derive(Debug, Clone)] -pub struct FirstBoolImpl; +#[derive(Debug, Clone, Copy)] +pub struct FirstStringImpl; -impl AggregateFunctionImpl for FirstBoolImpl { +impl AggregateFunctionImpl for FirstStringImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( - FirstState::::default, - move |states| boolean_finalize(DataType::Boolean, states), - ) + Box::new(TypedAggregateGroupStates::new( + FirstStringState::default, + unary_update::, + drain::, + )) } } -// TODO: Remove T -#[derive(Debug, Clone)] -pub struct FirstPrimitiveImpl { - datatype: DataType, - _s: PhantomData, - _t: PhantomData, +#[derive(Debug, Default)] +pub struct FirstPrimitiveState { + value: Option, } -impl FirstPrimitiveImpl { - fn new(datatype: DataType) -> Self { - FirstPrimitiveImpl { - datatype, - _s: PhantomData, - _t: PhantomData, +impl AggregateState<&T, T> for FirstPrimitiveState +where + T: Debug + Default + Copy, +{ + fn merge(&mut self, other: &mut Self) -> Result<()> { + if self.value.is_none() { + std::mem::swap(&mut self.value, &mut other.value); } + Ok(()) } -} -impl AggregateFunctionImpl for FirstPrimitiveImpl -where - for<'a> S: PhysicalStorage = T>, - T: Copy + Debug + Default + Sync + Send + 'static, - ArrayData: From>, -{ - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); + fn update(&mut self, &input: &T) -> Result<()> { + if self.value.is_none() { + self.value = Some(input); + } + Ok(()) + } - new_unary_aggregate_states::(FirstState::::default, move |states| { - primitive_finalize(datatype.clone(), states) - }) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), + } + Ok(()) } } #[derive(Debug, Default)] -pub struct FirstState { - value: Option, +pub struct FirstBinaryState { + value: Option>, } -impl AggregateState for FirstState { +impl AggregateState<&[u8], [u8]> for FirstBinaryState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { - self.value = other.value; - return Ok(()); + std::mem::swap(&mut self.value, &mut other.value); } Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &[u8]) -> Result<()> { if self.value.is_none() { - self.value = Some(input); + self.value = Some(input.to_vec()); } Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { - match self.value { - Some(v) => Ok((v, true)), - None => Ok((T::default(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), } + Ok(()) } } #[derive(Debug, Default)] -pub struct FirstStateBinary { - value: Option>, +pub struct FirstStringState { + value: Option, } -impl AggregateState<&[u8], Vec> for FirstStateBinary { +impl AggregateState<&str, str> for FirstStringState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { std::mem::swap(&mut self.value, &mut other.value); - return Ok(()); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if self.value.is_none() { - self.value = Some(input.to_owned()); + self.value = Some(input.to_string()); } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { - match self.value.as_mut() { - Some(v) => Ok((std::mem::take(v), true)), - None => Ok((Vec::new(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index 50793c642..1b242f11a 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -1,14 +1,11 @@ use std::fmt::Debug; use std::marker::PhantomData; -use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::buffer::physical_type::{ + AddressableMut, + MutablePhysicalStorage, PhysicalBinary, PhysicalBool, PhysicalF16, @@ -20,7 +17,6 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, PhysicalType, PhysicalU128, PhysicalU16, @@ -28,16 +24,17 @@ use crate::arrays::executor::physical_type::{ PhysicalU64, PhysicalU8, PhysicalUntypedNull, + PhysicalUtf8, }; -use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; +use crate::arrays::datatype::DataTypeId; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - boolean_finalize, - new_unary_aggregate_states, - primitive_finalize, - untyped_null_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -81,56 +78,26 @@ impl AggregateFunction for Min { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType::Boolean => Box::new(MinBoolImpl::new()), - PhysicalType::Float16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Float32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Float64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int128 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt128 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Interval => Box::new( - MinPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType::List => { - not_implemented!("MIN for list arrays") - } + let function_impl: Box = match datatype.physical_type() { + PhysicalType::UntypedNull => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Utf8 => Box::new(MinStringImpl), + PhysicalType::Binary => Box::new(MinBinaryImpl), + other => not_implemented!("max for type {other:?}"), }; Ok(PlannedAggregateFunction { @@ -175,56 +142,26 @@ impl AggregateFunction for Max { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType::Boolean => Box::new(MaxBoolImpl::new()), - PhysicalType::Float16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Float32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Float64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::Int128 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::UInt8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType::UInt128 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType::Interval => Box::new( - MaxPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType::List => { - not_implemented!("MAX for list arrays") - } + let function_impl: Box = match datatype.physical_type() { + PhysicalType::UntypedNull => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Utf8 => Box::new(MaxStringImpl), + PhysicalType::Binary => Box::new(MaxBinaryImpl), + other => not_implemented!("max for type {other:?}"), }; Ok(PlannedAggregateFunction { @@ -236,288 +173,398 @@ impl AggregateFunction for Max { } } -#[derive(Debug, Clone)] -pub struct MinMaxUntypedNull; - -impl AggregateFunctionImpl for MinMaxUntypedNull { - fn new_states(&self) -> Box { - // Note min vs max doesn't matter. Everything is null. - new_unary_aggregate_states::( - MinState::::default, - untyped_null_finalize, - ) - } -} - -pub type MinBinaryImpl = MinMaxBinaryImpl; -pub type MaxBinaryImpl = MinMaxBinaryImpl; - -#[derive(Debug)] -pub struct MinMaxBinaryImpl { - datatype: DataType, - _m: PhantomData, +#[derive(Debug, Clone, Copy)] +pub struct MaxPrimitiveImpl { + _s: PhantomData, } -impl MinMaxBinaryImpl { - fn new(datatype: DataType) -> Self { - MinMaxBinaryImpl { - datatype, - _m: PhantomData, - } +impl MaxPrimitiveImpl { + const fn new() -> Self { + MaxPrimitiveImpl { _s: PhantomData } } } -impl AggregateFunctionImpl for MinMaxBinaryImpl +impl AggregateFunctionImpl for MaxPrimitiveImpl where - M: for<'a> AggregateState<&'a [u8], Vec> + Default + Sync + Send + 'static, + S: MutablePhysicalStorage, + S::StorageType: Default + Debug + Sync + Send + PartialOrd + Copy, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - new_unary_aggregate_states::(M::default, move |states| { - let builder = ArrayBuilder { - datatype: datatype.clone(), - buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }) + Box::new(TypedAggregateGroupStates::new( + MaxStatePrimitive::::default, + unary_update::, + drain::, + )) } } -impl Clone for MinMaxBinaryImpl { - fn clone(&self) -> Self { - Self::new(self.datatype.clone()) +#[derive(Debug, Clone, Copy)] +pub struct MaxBinaryImpl; + +impl AggregateFunctionImpl for MaxBinaryImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MaxStateBinary::default, + unary_update::, + drain::, + )) } } -pub type MinBoolImpl = MinMaxBoolImpl>; -pub type MaxBoolImpl = MinMaxBoolImpl>; +#[derive(Debug, Clone, Copy)] +pub struct MaxStringImpl; -#[derive(Debug)] -pub struct MinMaxBoolImpl { - _m: PhantomData, +impl AggregateFunctionImpl for MaxStringImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MaxStateString::default, + unary_update::, + drain::, + )) + } } -impl MinMaxBoolImpl { - fn new() -> Self { - MinMaxBoolImpl { _m: PhantomData } - } +#[derive(Debug, Default)] +pub struct MaxStatePrimitive { + max: T, + valid: bool, } -impl AggregateFunctionImpl for MinMaxBoolImpl +impl AggregateState<&T, T> for MaxStatePrimitive where - M: AggregateState + Default + Sync + Send + 'static, + T: Debug + Sync + Send + PartialOrd + Copy, { - fn new_states(&self) -> Box { - new_unary_aggregate_states::(M::default, move |states| { - boolean_finalize(DataType::Boolean, states) - }) + fn merge(&mut self, other: &mut Self) -> Result<()> { + if !self.valid { + self.valid = other.valid; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); + } + + Ok(()) } -} -impl Clone for MinMaxBoolImpl { - fn clone(&self) -> Self { - Self::new() + fn update(&mut self, input: &T) -> Result<()> { + if !self.valid { + self.max = *input; + return Ok(()); + } + + if self.max.lt(input) { + self.max = *input; + } + + Ok(()) } -} -pub type MinPrimitiveImpl = MinMaxPrimitiveImpl, S, T>; -pub type MaxPrimitiveImpl = MinMaxPrimitiveImpl, S, T>; + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.valid { + output.put(&self.max); + } else { + output.put_null(); + } -// TODO: Remove T -#[derive(Debug)] -pub struct MinMaxPrimitiveImpl { - datatype: DataType, - _m: PhantomData, - _s: PhantomData, - _t: PhantomData, + Ok(()) + } } -impl MinMaxPrimitiveImpl { - fn new(datatype: DataType) -> Self { - MinMaxPrimitiveImpl { - datatype, - _m: PhantomData, - _s: PhantomData, - _t: PhantomData, +#[derive(Debug, Default)] +pub struct MaxStateBinary { + max: Vec, + valid: bool, +} + +impl AggregateState<&[u8], [u8]> for MaxStateBinary { + fn merge(&mut self, other: &mut Self) -> Result<()> { + if !self.valid { + self.valid = other.valid; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); + } + + Ok(()) } -} -impl AggregateFunctionImpl for MinMaxPrimitiveImpl -where - for<'a> S: PhysicalStorage = T>, - T: PartialOrd + Debug + Default + Sync + Send + Copy + 'static, - M: AggregateState + Default + Sync + Send + 'static, - ArrayData: From>, -{ - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); + fn update(&mut self, input: &[u8]) -> Result<()> { + if !self.valid { + self.max = input.to_vec(); + return Ok(()); + } - new_unary_aggregate_states::(M::default, move |states| { - primitive_finalize(datatype.clone(), states) - }) + if self.max.as_slice().lt(input) { + self.max = input.to_vec(); + } + + Ok(()) } -} -impl Clone for MinMaxPrimitiveImpl { - fn clone(&self) -> Self { - Self::new(self.datatype.clone()) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.valid { + output.put(&self.max); + } else { + output.put_null(); + } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MinState { - min: T, +pub struct MaxStateString { + max: String, valid: bool, } -impl AggregateState for MinState -where - T: PartialOrd + Debug + Default + Copy, -{ +impl AggregateState<&str, str> for MaxStateString { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - self.min = other.min; - } else if other.valid && other.min < self.min { - self.min = other.min; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); } Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if !self.valid { - self.valid = true; - self.min = input; - } else if input < self.min { - self.min = input + self.max = input.to_string(); + return Ok(()); } + + if self.max.as_str().lt(input) { + self.max = input.to_string(); + } + Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.min, true)) + output.put(&self.max); } else { - Ok((T::default(), false)) + output.put_null(); } + + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct MinPrimitiveImpl { + _s: PhantomData, +} + +impl MinPrimitiveImpl { + const fn new() -> Self { + MinPrimitiveImpl { _s: PhantomData } + } +} + +impl AggregateFunctionImpl for MinPrimitiveImpl +where + S: MutablePhysicalStorage, + S::StorageType: Default + Debug + Sync + Send + PartialOrd + Copy, +{ + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MinStatePrimitive::::default, + unary_update::, + drain::, + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct MinBinaryImpl; + +impl AggregateFunctionImpl for MinBinaryImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MinStateBinary::default, + unary_update::, + drain::, + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct MinStringImpl; + +impl AggregateFunctionImpl for MinStringImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MinStateString::default, + unary_update::, + drain::, + )) } } #[derive(Debug, Default)] -pub struct MinStateBinary { - min: Vec, +pub struct MinStatePrimitive { + min: T, valid: bool, } -impl AggregateState<&[u8], Vec> for MinStateBinary { +impl AggregateState<&T, T> for MinStatePrimitive +where + T: Debug + Sync + Send + PartialOrd + Copy, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; std::mem::swap(&mut self.min, &mut other.min); - } else if other.valid && other.min < self.min { + return Ok(()); + } + + if self.min.gt(&other.min) { std::mem::swap(&mut self.min, &mut other.min); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &T) -> Result<()> { if !self.valid { - self.valid = true; - self.min = input.into(); - } else if input < self.min.as_slice() { - self.min = input.into(); + self.min = *input; + return Ok(()); + } + + if self.min.gt(input) { + self.min = *input; } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((std::mem::take(&mut self.min), true)) + output.put(&self.min); } else { - Ok((Vec::new(), false)) + output.put_null(); } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MaxState { - max: T, +pub struct MinStateBinary { + min: Vec, valid: bool, } -impl AggregateState for MaxState -where - T: PartialOrd + Debug + Default + Copy, -{ +impl AggregateState<&[u8], [u8]> for MinStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - self.max = other.max; - } else if other.valid && other.max > self.max { - self.max = other.max; + std::mem::swap(&mut self.min, &mut other.min); + return Ok(()); } + + if self.min.gt(&other.min) { + std::mem::swap(&mut self.min, &mut other.min); + } + Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &[u8]) -> Result<()> { if !self.valid { - self.valid = true; - self.max = input; - } else if input > self.max { - self.max = input + self.min = input.to_vec(); + return Ok(()); + } + + if self.min.as_slice().gt(input) { + self.min = input.to_vec(); } Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.max, true)) + output.put(&self.min); } else { - Ok((T::default(), false)) + output.put_null(); } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MaxStateBinary { - max: Vec, +pub struct MinStateString { + min: String, valid: bool, } -impl AggregateState<&[u8], Vec> for MaxStateBinary { +impl AggregateState<&str, str> for MinStateString { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - std::mem::swap(&mut self.max, &mut other.max); - } else if other.valid && other.max > self.max { - std::mem::swap(&mut self.max, &mut other.max); + std::mem::swap(&mut self.min, &mut other.min); + return Ok(()); + } + + if self.min.gt(&other.min) { + std::mem::swap(&mut self.min, &mut other.min); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if !self.valid { - self.valid = true; - self.max = input.into(); - } else if input > self.max.as_slice() { - self.max = input.into(); + self.min = input.to_string(); + return Ok(()); + } + + if self.min.as_str().gt(input) { + self.min = input.to_string(); } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((std::mem::take(&mut self.max), true)) + output.put(&self.min); } else { - Ok((Vec::new(), false)) + output.put_null(); } + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs index 040fd880b..2966e1bc4 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct RegrAvgYImpl; impl AggregateFunctionImpl for RegrAvgYImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( RegrAvgState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -137,10 +140,11 @@ pub struct RegrAvgXImpl; impl AggregateFunctionImpl for RegrAvgXImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( RegrAvgState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -165,7 +169,7 @@ where _input: PhantomData, } -impl AggregateState<(f64, f64), f64> for RegrAvgState +impl AggregateState<(&f64, &f64), f64> for RegrAvgState where F: RegrAvgInput, { @@ -175,17 +179,22 @@ where Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { - self.sum += F::input(input); + fn update(&mut self, (&y, &x): (&f64, &f64)) -> Result<()> { + self.sum += F::input((y, x)); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - Ok((0.0, false)) + output.put_null(); } else { - Ok((self.sum / self.count as f64, true)) + let v = self.sum / self.count as f64; + output.put(&v); } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs index 2a26b0169..ddad9d65f 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs @@ -1,16 +1,14 @@ use std::fmt::Debug; +use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalStorage}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalAny; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; -use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, - AggregateGroupStates, -}; +use crate::functions::aggregate::states::AggregateGroupStates; use crate::functions::aggregate::{ AggregateFunction, AggregateFunctionImpl, @@ -59,7 +57,7 @@ impl AggregateFunction for RegrCount { function: Box::new(*self), return_type: DataType::Float64, inputs, - function_impl: Box::new(RegrCountImpl), + function_impl: Box::new(RegrCountImpl::::new()), }), (a, b) => Err(invalid_input_types_error(self, &[a, b])), } @@ -67,14 +65,22 @@ impl AggregateFunction for RegrCount { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct RegrCountImpl; +pub struct RegrCountImpl { + _s: PhantomData, +} + +impl RegrCountImpl { + const fn new() -> Self { + RegrCountImpl { _s: PhantomData } + } +} -impl AggregateFunctionImpl for RegrCountImpl { +impl AggregateFunctionImpl for RegrCountImpl +where + S: PhysicalStorage, +{ fn new_states(&self) -> Box { - new_binary_aggregate_states::( - RegrCountState::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + unimplemented!() } } @@ -83,22 +89,30 @@ impl AggregateFunctionImpl for RegrCountImpl { /// Note that this can be used for any input type, but the sql function we /// expose only accepts f64 (to match Postgres). #[derive(Debug, Clone, Copy, Default)] -pub struct RegrCountState { +pub struct RegrCountState { count: i64, + _s: PhantomData, } -impl AggregateState<((), ()), i64> for RegrCountState { +impl AggregateState<&S::StorageType, i64> for RegrCountState +where + S: PhysicalStorage, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) } - fn update(&mut self, _input: ((), ())) -> Result<()> { + fn update(&mut self, _input: &S::StorageType) -> Result<()> { self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(i64, bool)> { - Ok((self.count, true)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.count); + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs index 11941ba7c..7ed1379d8 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs @@ -3,14 +3,16 @@ use std::fmt::Debug; use rayexec_error::Result; use super::corr::CorrelationState; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct RegrR2Impl; impl AggregateFunctionImpl for RegrR2Impl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( RegrR2State::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -84,23 +87,28 @@ pub struct RegrR2State { corr: CorrelationState, } -impl AggregateState<(f64, f64), f64> for RegrR2State { +impl AggregateState<(&f64, &f64), f64> for RegrR2State { fn merge(&mut self, other: &mut Self) -> Result<()> { self.corr.merge(&mut other.corr)?; Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.corr.update(input)?; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (v, valid) = self.corr.finalize()?; - if valid { - Ok((v.powi(2), true)) - } else { - Ok((0.0, false)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match self.corr.finalize_value() { + Some(val) => { + let val = val.powi(2); + output.put(&val); + } + None => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs index 4d4dd5ba7..b8b60f6f3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs @@ -4,14 +4,16 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{VariancePopFinalize, VarianceState}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -73,10 +75,11 @@ pub struct RegrSlopeImpl; impl AggregateFunctionImpl for RegrSlopeImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( RegrSlopeState::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -86,31 +89,34 @@ pub struct RegrSlopeState { var: VarianceState, } -impl AggregateState<(f64, f64), f64> for RegrSlopeState { +impl AggregateState<(&f64, &f64), f64> for RegrSlopeState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.cov.merge(&mut other.cov)?; self.var.merge(&mut other.var)?; Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.cov.update(input)?; self.var.update(input.1)?; // Update with 'x' Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (cov, cov_valid) = self.cov.finalize()?; - let (var, var_valid) = self.var.finalize()?; - - if cov_valid && var_valid { - if var == 0.0 { - return Ok((0.0, false)); + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match (self.cov.finalize_value(), self.var.finalize_value()) { + (Some(cov), Some(var)) => { + if var == 0.0 { + output.put_null(); + return Ok(()); + } + let v = cov / var; + output.put(&v); } - let v = cov / var; - Ok((v, true)) - } else { - Ok((0.0, false)) + _ => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs index 0e66f7d67..63a04b510 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -69,10 +71,11 @@ pub struct StddevPopImpl; impl AggregateFunctionImpl for StddevPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -128,10 +131,11 @@ pub struct StddevSampImpl; impl AggregateFunctionImpl for StddevSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -183,10 +187,11 @@ pub struct VarPopImpl; impl AggregateFunctionImpl for VarPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -238,28 +243,29 @@ pub struct VarSampImpl; impl AggregateFunctionImpl for VarSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } pub trait VarianceFinalize: Sync + Send + Debug + Default + 'static { - fn finalize(count: i64, mean: f64, m2: f64) -> (f64, bool); + fn finalize(count: i64, mean: f64, m2: f64) -> Option; } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct StddevPopFinalize; impl VarianceFinalize for StddevPopFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 => (0.0, false), - 1 => (0.0, true), + 0 => None, + 1 => Some(0.0), _ => { let v = f64::sqrt(m2 / count as f64); - (v, true) + Some(v) } } } @@ -269,12 +275,12 @@ impl VarianceFinalize for StddevPopFinalize { pub struct StddevSampFinalize; impl VarianceFinalize for StddevSampFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 | 1 => (0.0, false), + 0 | 1 => None, _ => { let v = f64::sqrt(m2 / (count - 1) as f64); - (v, true) + Some(v) } } } @@ -284,12 +290,12 @@ impl VarianceFinalize for StddevSampFinalize { pub struct VarianceSampFinalize; impl VarianceFinalize for VarianceSampFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 | 1 => (0.0, false), + 0 | 1 => None, _ => { let v = m2 / (count - 1) as f64; - (v, true) + Some(v) } } } @@ -299,13 +305,13 @@ impl VarianceFinalize for VarianceSampFinalize { pub struct VariancePopFinalize; impl VarianceFinalize for VariancePopFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 => (0.0, false), - 1 => (0.0, true), + 0 => None, + 1 => Some(0.0), _ => { let v = m2 / count as f64; - (v, true) + Some(v) } } } @@ -319,7 +325,16 @@ pub struct VarianceState { _finalize: PhantomData, } -impl AggregateState for VarianceState +impl VarianceState +where + F: VarianceFinalize, +{ + pub fn finalize_value(&self) -> Option { + F::finalize(self.count, self.mean, self.m2) + } +} + +impl AggregateState<&f64, f64> for VarianceState where F: VarianceFinalize, { @@ -343,7 +358,7 @@ where Ok(()) } - fn update(&mut self, input: f64) -> Result<()> { + fn update(&mut self, &input: &f64) -> Result<()> { self.count += 1; let delta = input - self.mean; self.mean += delta / self.count as f64; @@ -353,7 +368,14 @@ where Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - Ok(F::finalize(self.count, self.mean, self.m2)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match F::finalize(self.count, self.mean, self.m2) { + Some(val) => output.put(&val), + None => output.put_null(), + } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs index 5484f9938..b1407c091 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs @@ -2,13 +2,18 @@ use std::fmt::Debug; use rayexec_error::{RayexecError, Result}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::ScalarValue; use crate::expr::Expression; -use crate::functions::aggregate::states::{new_unary_aggregate_states, AggregateGroupStates}; +use crate::functions::aggregate::states::{ + drain, + unary_update, + AggregateGroupStates, + TypedAggregateGroupStates, +}; use crate::functions::aggregate::{ AggregateFunction, AggregateFunctionImpl, @@ -99,13 +104,11 @@ impl AggregateFunctionImpl for StringAggImpl { string: None, }; - new_unary_aggregate_states::(state_init, move |states| { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }) + Box::new(TypedAggregateGroupStates::new( + state_init, + unary_update::, + drain::, + )) } } @@ -119,7 +122,7 @@ pub struct StringAggState { string: Option, } -impl AggregateState<&str, String> for StringAggState { +impl AggregateState<&str, str> for StringAggState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.string.is_none() { std::mem::swap(self, other); @@ -148,10 +151,14 @@ impl AggregateState<&str, String> for StringAggState { Ok(()) } - fn finalize(&mut self) -> Result<(String, bool)> { - match self.string.take() { - Some(s) => Ok((s, true)), - None => Ok((String::new(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.string { + Some(s) => output.put(s), + None => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 379d60935..60e761f50 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -5,17 +5,17 @@ use std::ops::AddAssign; use num_traits::CheckedAdd; use rayexec_error::Result; -use crate::arrays::array::ArrayData; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalI64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::{PhysicalF64, PhysicalI64}; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -85,17 +85,11 @@ impl AggregateFunction for Sum { DataType::Float64 => (Box::new(SumFloat64Impl), DataType::Float64), DataType::Decimal64(m) => { let datatype = DataType::Decimal64(m); - ( - Box::new(SumDecimalImpl::::new(datatype.clone())), - datatype, - ) + (Box::new(SumDecimalImpl::::new()), datatype) } DataType::Decimal128(m) => { let datatype = DataType::Decimal128(m); - ( - Box::new(SumDecimalImpl::::new(datatype.clone())), - datatype, - ) + (Box::new(SumDecimalImpl::::new()), datatype) } other => return Err(invalid_input_types_error(self, &[other])), }; @@ -114,10 +108,11 @@ pub struct SumInt64Impl; impl AggregateFunctionImpl for SumInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( SumStateCheckedAdd::::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + unary_update::, + drain::, + )) } } @@ -126,68 +121,70 @@ pub struct SumFloat64Impl; impl AggregateFunctionImpl for SumFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( SumStateAdd::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } #[derive(Debug, Clone)] pub struct SumDecimalImpl { - datatype: DataType, _d: PhantomData, } impl SumDecimalImpl { - fn new(datatype: DataType) -> Self { - SumDecimalImpl { - datatype, - _d: PhantomData, - } + const fn new() -> Self { + SumDecimalImpl { _d: PhantomData } } } impl AggregateFunctionImpl for SumDecimalImpl where D: DecimalType, - ArrayData: From>, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( SumStateCheckedAdd::::default, - move |states| primitive_finalize(datatype.clone(), states), - ) + unary_update::, + drain::, + )) } } #[derive(Debug, Default)] pub struct SumStateCheckedAdd { sum: T, - set: bool, + valid: bool, } -impl AggregateState for SumStateCheckedAdd { +impl AggregateState<&T, T> for SumStateCheckedAdd +where + T: CheckedAdd + Default + Debug + Copy, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum = self.sum.checked_add(&other.sum).unwrap_or_default(); // TODO - self.set = self.set || other.set; + self.valid = self.valid || other.valid; Ok(()) } - fn update(&mut self, input: T) -> Result<()> { - self.sum = self.sum.checked_add(&input).unwrap_or_default(); // TODO - self.set = true; + fn update(&mut self, input: &T) -> Result<()> { + self.sum = self.sum.checked_add(input).unwrap_or_default(); // TODO + self.valid = true; Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { - if self.set { - Ok((self.sum, true)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.valid { + output.put(&self.sum); } else { - Ok((T::default(), false)) + output.put_null(); } + Ok(()) } } @@ -197,43 +194,56 @@ pub struct SumStateAdd { valid: bool, } -impl AggregateState for SumStateAdd { +impl AggregateState<&T, T> for SumStateAdd +where + T: AddAssign + Default + Debug + Copy, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.valid = self.valid || other.valid; Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, &input: &T) -> Result<()> { self.sum += input; self.valid = true; Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.sum, true)) + output.put(&self.sum); } else { - Ok((T::default(), false)) + output.put_null(); } + Ok(()) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; - use crate::arrays::array::Array; - use crate::arrays::scalar::ScalarValue; - use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; + use crate::arrays::array::exp::Array; + use crate::arrays::array::selection::Selection; + + use crate::arrays::buffer::buffer_manager::NopBufferManager; + + use crate::arrays::testutil::{assert_arrays_eq, assert_arrays_eq_sel}; + use crate::expr; - use crate::functions::aggregate::ChunkGroupAddressIter; + #[test] fn sum_i64_single_group_two_partitions() { // Single group, two partitions, 'SELECT SUM(a) FROM table' - let partition_1_vals = &Array::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = Array::try_from_iter::<[i64; 3]>([1, 2, 3]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 3]>([4, 5, 6]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -247,50 +257,31 @@ mod tests { let mut states_1 = specialized.function_impl.new_states(); let mut states_2 = specialized.function_impl.new_states(); - states_1.new_states(1); - states_2.new_states(1); + states_1.new_groups(1); + states_2.new_groups(1); // All inputs map to the same group (no GROUP BY clause) - let addrs_1: Vec<_> = (0..partition_1_vals.logical_len()) - .map(|_| GroupAddress { - chunk_idx: 0, - row_idx: 0, - }) - .collect(); - let addrs_2: Vec<_> = (0..partition_2_vals.logical_len()) - .map(|_| GroupAddress { - chunk_idx: 0, - row_idx: 0, - }) - .collect(); - states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(3), &[0, 0, 0]) .unwrap(); states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(3), &[0, 0, 0]) .unwrap(); // Combine states. // // Both partitions hold a single state (representing a single group), // and those states map to each other. - let combine_mapping = vec![GroupAddress { - chunk_idx: 0, - row_idx: 0, - }]; states_1 - .combine( - &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), - ) + .combine(&mut states_2, Selection::selection(&[0]), &[0]) .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 1).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(1, out.logical_len()); - assert_eq!(ScalarValue::Int64(21), out.logical_value(0).unwrap()); + let expected = Array::try_from_iter([21_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] @@ -309,8 +300,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = Array::try_from_iter::<[i64; 3]>([1, 2, 3]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 3]>([4, 5, 6]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -329,48 +320,19 @@ mod tests { let mut states_2 = specialized.function_impl.new_states(); // Both partitions are operating on two groups ('a' and 'b'). - states_1.new_states(1); - states_1.new_states(1); + states_1.new_groups(1); + states_1.new_groups(1); - states_2.new_states(1); - states_2.new_states(1); + states_2.new_groups(1); + states_2.new_groups(1); // Mapping corresponding to the above table. Group 'a' == 0 and group // 'b' == 1. - let addrs_1 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; - let addrs_2 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - ]; - states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(3), &[0, 0, 1]) .unwrap(); states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(3), &[1, 1, 0]) .unwrap(); // Combine states. @@ -382,29 +344,20 @@ mod tests { // The mapping here indicates the the 0th state for both partitions // should be combined, and the 1st state for both partitions should be // combined. - let combine_mapping = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; states_1 .combine( &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), + Selection::linear(2), // States 0 ('a') and 1 ('b') + &[0, 1], ) .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 2).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(2, out.logical_len()); - assert_eq!(ScalarValue::Int64(9), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Int64(12), out.logical_value(1).unwrap()); + let expected = Array::try_from_iter([9_i64, 12_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] @@ -431,8 +384,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array::from_iter::<[i64; 4]>([1, 2, 3, 4]); - let partition_2_vals = &Array::from_iter::<[i64; 4]>([5, 6, 7, 8]); + let partition_1_vals = Array::try_from_iter::<[i64; 4]>([1, 2, 3, 4]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 4]>([5, 6, 7, 8]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -451,58 +404,21 @@ mod tests { let mut states_2 = specialized.function_impl.new_states(); // Partition 1 sees groups 'x', 'y', and 'z'. - states_1.new_states(1); - states_1.new_states(1); - states_1.new_states(1); + states_1.new_groups(1); + states_1.new_groups(1); + states_1.new_groups(1); // Partition 2 see groups 'x' and 'z' (no 'y'). - states_2.new_states(1); - states_2.new_states(1); + states_2.new_groups(1); + states_2.new_groups(1); // For partition 1: 'x' == 0, 'y' == 1, 'z' == 2 - let addrs_1 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 2, - }, - ]; - // For partition 2: 'x' == 0, 'z' == 1 - let addrs_2 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; - states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(4), &[0, 0, 1, 2]) .unwrap(); + // For partition 2: 'x' == 0, 'z' == 1 states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(4), &[0, 1, 1, 1]) .unwrap(); // Combine states. @@ -510,86 +426,61 @@ mod tests { // States for 'x' both at the same position. // // States for 'y' at different positions, partition_2_state[1] => partition_1_state[2] - let combine_mapping = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 2, - }, - ]; states_1 - .combine( - &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), - ) + .combine(&mut states_2, Selection::selection(&[0, 1]), &[0, 2]) .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 3).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(3, out.logical_len()); - assert_eq!(ScalarValue::Int64(8), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Int64(3), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Int64(25), out.logical_value(2).unwrap()); + let expected = Array::try_from_iter([8_i64, 3_i64, 25_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } - // #[test] - // fn sum_i64_drain_multiple() { - // // Three groups, single partition, test that drain can be called - // // multiple times until states are exhausted. - // let vals = &Array::from_iter::<[i64; 6]>([1, 2, 3, 4, 5, 6]); - - // let specialized = Sum.plan_from_datatypes(&[DataType::Int64]).unwrap(); - // let mut states = specialized.new_grouped_state(); - - // states.new_group(); - // states.new_group(); - // states.new_group(); - - // let addrs = vec![ - // GroupAddress { - // chunk_idx: 0, - // row_idx: 0, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 0, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 1, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 1, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 2, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 2, - // }, - // ]; - - // states - // .update_states(&[vals], ChunkGroupAddressIter::new(0, &addrs)) - // .unwrap(); - - // let out_1 = states.drain_next(2).unwrap().unwrap(); - // assert_eq!(2, out_1.logical_len()); - // assert_eq!(ScalarValue::Int64(3), out_1.logical_value(0).unwrap()); - // assert_eq!(ScalarValue::Int64(7), out_1.logical_value(1).unwrap()); - - // let out_2 = states.drain_next(2).unwrap().unwrap(); - // assert_eq!(1, out_2.logical_len()); - // assert_eq!(ScalarValue::Int64(11), out_2.logical_value(0).unwrap()); - - // let out_3 = states.drain_next(2).unwrap(); - // assert_eq!(None, out_3); - // } + #[test] + fn sum_i64_drain_multiple() { + // Three groups, single partition, test that drain can be called + // multiple times until states are exhausted. + let vals = Array::try_from_iter::<[i64; 6]>([1, 2, 3, 4, 5, 6]).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![DataType::Utf8, DataType::Int64], + vec!["col1".to_string(), "col2".to_string()], + ) + .unwrap(); + + let specialized = Sum + .plan(&table_list, vec![expr::col_ref(table_ref, 1)]) + .unwrap(); + let mut states = specialized.function_impl.new_states(); + + states.new_groups(3); + + states + .update_group_states(&[vals], Selection::linear(6), &[0, 0, 1, 1, 2, 2]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Int64, 2).unwrap(); + + let n = states.drain(&mut out).unwrap(); + assert_eq!(2, n); + + let expected = Array::try_from_iter([3_i64, 7]).unwrap(); + assert_arrays_eq(&expected, &out); + + out.reset_for_write(&NopBufferManager).unwrap(); + let n = states.drain(&mut out).unwrap(); + assert_eq!(1, n); + + let expected = Array::try_from_iter([11_i64]).unwrap(); + assert_arrays_eq_sel(&expected, 0..1, &out, 0..1); + + out.reset_for_write(&NopBufferManager).unwrap(); + let n = states.drain(&mut out).unwrap(); + assert_eq!(0, n); + } } diff --git a/crates/rayexec_execution/src/functions/aggregate/mod.rs b/crates/rayexec_execution/src/functions/aggregate/mod.rs index 0bfa1baba..a75647076 100644 --- a/crates/rayexec_execution/src/functions/aggregate/mod.rs +++ b/crates/rayexec_execution/src/functions/aggregate/mod.rs @@ -10,7 +10,6 @@ use states::AggregateGroupStates; use super::FunctionInfo; use crate::arrays::datatype::DataType; -use crate::arrays::executor::aggregate::RowToStateMapping; use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -103,7 +102,7 @@ impl<'a> ChunkGroupAddressIter<'a> { } impl Iterator for ChunkGroupAddressIter<'_> { - type Item = RowToStateMapping; + type Item = (usize, usize); #[inline] fn next(&mut self) -> Option { @@ -111,10 +110,7 @@ impl Iterator for ChunkGroupAddressIter<'_> { if addr.chunk_idx == self.chunk_idx { let row = self.row_idx; self.row_idx += 1; - return Some(RowToStateMapping { - from_row: row, - to_state: addr.row_idx as usize, - }); + return Some((row, addr.row_idx as usize)); } self.row_idx += 1; } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index c3926aa98..e6ea716c5 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -1,36 +1,45 @@ use core::fmt; use std::any::Any; use std::fmt::Debug; -use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; - -use super::ChunkGroupAddressIter; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::aggregate::{ - AggregateState, - BinaryNonNullUpdater, - StateCombiner, - StateFinalizer, - UnaryNonNullUpdater, -}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; - -pub struct TypedAggregateGroupStates { +use stdutil::marker::PhantomCovariant; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::aggregate::binary::BinaryNonNullUpdater; +use crate::arrays::executor_exp::aggregate::unary::UnaryNonNullUpdater; +use crate::arrays::executor_exp::aggregate::{AggregateState, StateCombiner}; +use crate::arrays::executor_exp::PutBuffer; + +pub struct TypedAggregateGroupStates< + State, + Input, + Output: ?Sized, + StateInit, + StateUpdate, + StateFinalize, +> { + /// States being tracked. states: Vec, + /// Index we should start draining from. Updated after every call to + /// `drain`. + drain_idx: usize, + + /// How new states are initialized. state_init: StateInit, + /// How states get updated. state_update: StateUpdate, + /// How to finalize states. state_finalize: StateFinalize, - _input: PhantomData, - _output: PhantomData, + _input: PhantomCovariant, + _output: PhantomCovariant, } -impl +impl TypedAggregateGroupStates { pub fn new( @@ -40,109 +49,101 @@ impl ) -> Self { TypedAggregateGroupStates { states: Vec::new(), + drain_idx: 0, state_init, state_update, state_finalize, - _input: PhantomData, - _output: PhantomData, + _input: PhantomCovariant::new(), + _output: PhantomCovariant::new(), } } } -/// Helper for create an `AggregateGroupStates` that accepts one input. -pub fn new_unary_aggregate_states( - state_init: StateInit, - state_finalize: StateFinalize, -) -> Box -where - Storage: PhysicalStorage, - State: for<'a> AggregateState< - <::Storage<'a> as AddressableStorage>::T, - Output, - > + Sync - + Send - + 'static, - Output: Sync + Send + 'static, - StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, -{ - Box::new(TypedAggregateGroupStates { - states: Vec::new(), - state_init, - state_update: unary_update::, - state_finalize, - _input: PhantomData, - _output: PhantomData, - }) -} - -/// Helper for create an `AggregateGroupStates` that accepts two inputs. -pub fn new_binary_aggregate_states( - state_init: StateInit, - state_finalize: StateFinalize, -) -> Box -where - Storage1: PhysicalStorage, - Storage2: PhysicalStorage, - State: for<'a> AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output> - + Sync - + Send - + 'static, - Output: Sync + Send + 'static, - StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, -{ - Box::new(TypedAggregateGroupStates { - states: Vec::new(), - state_init, - state_update: binary_update::, - state_finalize, - _input: PhantomData, - _output: PhantomData, - }) -} - impl AggregateGroupStates for TypedAggregateGroupStates where State: AggregateState + Sync + Send + 'static, Input: Sync + Send, - Output: Sync + Send, + Output: Sync + Send + ?Sized, StateInit: Fn() -> State + Sync + Send, - StateUpdate: Fn(&[&Array], ChunkGroupAddressIter, &mut [State]) -> Result<()> + Sync + Send, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send, + StateUpdate: Fn(&[Array], Selection, &[usize], &mut [State]) -> Result<()> + Sync + Send, + StateFinalize: Fn(&mut [State], &mut Array) -> Result<()> + Sync + Send, { fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_> { + debug_assert_eq!(0, self.drain_idx); OpaqueStatesMut(&mut self.states) } - fn new_states(&mut self, count: usize) { + fn new_groups(&mut self, count: usize) { + debug_assert_eq!(0, self.drain_idx); self.states.extend((0..count).map(|_| (self.state_init)())) } - fn num_states(&self) -> usize { + fn num_groups(&self) -> usize { self.states.len() } - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()> { - (self.state_update)(inputs, mapping, &mut self.states) + fn update_group_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()> { + debug_assert_eq!(0, self.drain_idx); + debug_assert_eq!(selection.len(), mapping.len()); + + (self.state_update)(inputs, selection, mapping, &mut self.states) } fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()> { + debug_assert_eq!(0, self.drain_idx); + debug_assert_eq!(selection.len(), mapping.len()); + let consume_states = consume.opaque_states_mut().downcast::>()?; - StateCombiner::combine(consume_states, mapping, &mut self.states) + + StateCombiner::combine( + consume_states, + selection.iter().zip(mapping.iter().copied()), + &mut self.states, + ) + } + + fn drain(&mut self, output: &mut Array) -> Result { + let num_drain = usize::min(self.states.len() - self.drain_idx, output.capacity()); + let drain_states = &mut self.states[self.drain_idx..self.drain_idx + num_drain]; + + (self.state_finalize)(drain_states, output)?; + self.drain_idx += num_drain; + + Ok(num_drain) } +} - fn finalize(&mut self) -> Result { - (self.state_finalize)(&mut self.states) +pub fn drain(states: &mut [State], output: &mut Array) -> Result<()> +where + S: MutablePhysicalStorage, + State: AggregateState, +{ + let buffer = &mut S::get_addressable_mut(output.data.try_as_mut()?)?; + let validity = &mut output.validity; + + for (idx, state) in states.iter_mut().enumerate() { + state.finalize(PutBuffer { + idx, + buffer, + validity, + })?; } + + Ok(()) } -impl fmt::Debug +impl fmt::Debug for TypedAggregateGroupStates { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -160,23 +161,39 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_>; /// Create `count` number of new states. - fn new_states(&mut self, count: usize); + fn new_groups(&mut self, count: usize); /// Returns the number of states being tracked. - fn num_states(&self) -> usize; + fn num_groups(&self) -> usize; - /// Update states from inputs using some mapping. - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()>; + /// Updates groups states from array inputs. + /// + /// Selection indicates with rows from the input array to use during state + /// updates, and `mapping` provides the state index to use for each row. + /// Selection length and mapping array must be the same length. + fn update_group_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()>; /// Combine states from another partition into self using some mapping. + /// + /// Selection indices which states to use from the `consume`, and mapping + /// indicates the target states to merge into for each selected states. fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()>; - /// Finalize the states and return an array. - fn finalize(&mut self) -> Result; + /// Finalize and drain state into `output`. + /// + /// Returns the number of states drained. If the number of states drained is + /// less than the capacity of the output arrays, then draining is finished. + fn drain(&mut self, output: &mut Array) -> Result; } #[derive(Debug)] @@ -192,61 +209,45 @@ impl<'a> OpaqueStatesMut<'a> { } } -/// Update function for a unary aggregate. -pub fn unary_update( - arrays: &[&Array], - mapping: ChunkGroupAddressIter, +pub fn unary_update( + arrays: &[Array], + selection: Selection, + mapping: &[usize], states: &mut [State], ) -> Result<()> where Storage: PhysicalStorage, - State: for<'a> AggregateState, Output>, + Output: MutablePhysicalStorage, + State: for<'a> AggregateState<&'a Storage::StorageType, Output::StorageType>, { - UnaryNonNullUpdater::update::(arrays[0], mapping, states) + UnaryNonNullUpdater::update::( + &arrays[0], + selection, + mapping.iter().copied(), + states, + ) } -pub fn binary_update( - arrays: &[&Array], - mapping: ChunkGroupAddressIter, +pub fn binary_update( + arrays: &[Array], + selection: Selection, + mapping: &[usize], states: &mut [State], ) -> Result<()> where Storage1: PhysicalStorage, Storage2: PhysicalStorage, - State: for<'a> AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, + Output: MutablePhysicalStorage, + State: for<'a> AggregateState< + (&'a Storage1::StorageType, &'a Storage2::StorageType), + Output::StorageType, + >, { - BinaryNonNullUpdater::update::( - arrays[0], arrays[1], mapping, states, + BinaryNonNullUpdater::update::( + &arrays[0], + &arrays[1], + selection, + mapping.iter().copied(), + states, ) } - -pub fn untyped_null_finalize(states: &mut [State]) -> Result { - Ok(Array::new_untyped_null_array(states.len())) -} - -pub fn boolean_finalize(datatype: DataType, states: &mut [State]) -> Result -where - State: AggregateState, -{ - let builder = ArrayBuilder { - datatype, - buffer: BooleanBuffer::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) -} - -pub fn primitive_finalize( - datatype: DataType, - states: &mut [State], -) -> Result -where - State: AggregateState, - Output: Copy + Default, - ArrayData: From>, -{ - let builder = ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) -} diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index a3bd2318e..36b584f55 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -3,10 +3,10 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, @@ -15,15 +15,15 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -99,80 +99,63 @@ impl ScalarFunction for Add { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(AddImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(AddImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(AddImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(AddImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(AddImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(AddImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(AddImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(AddImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(AddImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(AddImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(AddImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(AddImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(AddImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(AddImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(AddImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(AddImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(AddImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(AddImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(AddImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(AddImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(AddImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(AddImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(AddImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(AddImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(AddImpl::::new()), DataType::UInt128) + } // TODO: Split out decimal (for scaling) - datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), - datatypes.0, - ), + datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => { + (Box::new(AddImpl::::new()), datatypes.0) + } + datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => { + (Box::new(AddImpl::::new()), datatypes.0) + } // Date + days - (DataType::Date32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Date32)), - DataType::Date32, - ), + (DataType::Date32, DataType::Int32) => { + (Box::new(AddImpl::::new()), DataType::Date32) + } // Days + date // Note both are represented as i32 physical type, we don't need to worry about flipping the sides. - (DataType::Int32, DataType::Date32) => ( - Box::new(AddImpl::::new(DataType::Date32)), - DataType::Date32, - ), + (DataType::Int32, DataType::Date32) => { + (Box::new(AddImpl::::new()), DataType::Date32) + } // TODO: Interval (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -189,49 +172,52 @@ impl ScalarFunction for Add { #[derive(Debug, Clone)] pub struct AddImpl { - datatype: DataType, _s: PhantomData, } impl AddImpl { - fn new(datatype: DataType) -> Self { - AddImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + AddImpl { _s: PhantomData } } } impl ScalarFunctionImpl for AddImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Add> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Add + Sized + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a + b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a + b)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn add_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([4, 5, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -249,9 +235,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([5, 7, 9]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([5, 7, 9]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index f5c27bd17..91b82308a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -3,12 +3,10 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::compute::cast::array::cast_decimal_to_float; -use crate::arrays::compute::cast::behavior::CastFailBehavior; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, @@ -17,16 +15,16 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -105,58 +103,45 @@ impl ScalarFunction for Div { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(DivImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(DivImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(DivImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(DivImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(DivImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(DivImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(DivImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(DivImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(DivImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(DivImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(DivImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(DivImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(DivImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(DivImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(DivImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(DivImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(DivImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(DivImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(DivImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(DivImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(DivImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(DivImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(DivImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(DivImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(DivImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(DivImpl::::new()), DataType::UInt128) + } // Decimals (DataType::Decimal64(_), DataType::Decimal64(_)) => ( @@ -198,77 +183,88 @@ impl ScalarFunctionImpl for DecimalDivImpl where D: DecimalType, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; + // fn execute2(&self, inputs: &[&Array2]) -> Result { + // unimplemented!() + // let a = inputs[0]; + // let b = inputs[1]; - let a = cast_decimal_to_float::( - a, - DataType::Float64, - CastFailBehavior::Error, - )?; - let b = cast_decimal_to_float::( - b, - DataType::Float64, - CastFailBehavior::Error, - )?; + // let a = cast_decimal_to_float::( + // a, + // DataType::Float64, + // CastFailBehavior::Error, + // )?; + // let b = cast_decimal_to_float::( + // b, + // DataType::Float64, + // CastFailBehavior::Error, + // )?; - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; + // let builder = ArrayBuilder { + // datatype: DataType::Float64, + // buffer: PrimitiveBuffer::with_len(a.logical_len()), + // }; - BinaryExecutor::execute::(&a, &b, builder, |a, b, buf| { - buf.put(&(a / b)) - }) + // BinaryExecutor2::execute::( + // &a, + // &b, + // builder, + // |a, b, buf| buf.put(&(a / b)), + // ) + // } + + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + unimplemented!() } } #[derive(Debug, Clone)] pub struct DivImpl { - datatype: DataType, _s: PhantomData, } impl DivImpl { - fn new(datatype: DataType) -> Self { - DivImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + DivImpl { _s: PhantomData } } } impl ScalarFunctionImpl for DivImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Div> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Div + Sized + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a / b))) + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a / b)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn div_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -286,9 +282,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([4, 2, 2]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([4, 2, 2]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 1a2c3d9e9..2fc7ad1e7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -4,10 +4,10 @@ use std::marker::PhantomData; use num_traits::{NumCast, PrimInt}; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, @@ -24,10 +24,11 @@ use crate::arrays::executor::physical_type::{ PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -116,58 +117,45 @@ impl ScalarFunction for Mul { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(MulImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(MulImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(MulImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(MulImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(MulImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(MulImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(MulImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(MulImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(MulImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(MulImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(MulImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(MulImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(MulImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(MulImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(MulImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(MulImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(MulImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(MulImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(MulImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(MulImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(MulImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(MulImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(MulImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(MulImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(MulImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(MulImpl::::new()), DataType::UInt128) + } // Decimal (DataType::Decimal64(a), DataType::Decimal64(b)) => { @@ -178,7 +166,7 @@ impl ScalarFunction for Mul { let scale = a.scale + b.scale; let return_type = DataType::Decimal64(DecimalTypeMeta { precision, scale }); ( - Box::new(DecimalMulImpl::::new(return_type.clone())), + Box::new(DecimalMulImpl::::new()), return_type, ) } @@ -187,7 +175,7 @@ impl ScalarFunction for Mul { let scale = a.scale + b.scale; let return_type = DataType::Decimal128(DecimalTypeMeta { precision, scale }); ( - Box::new(DecimalMulImpl::::new(return_type.clone())), + Box::new(DecimalMulImpl::::new()), return_type, ) } @@ -237,111 +225,112 @@ impl IntervalMulImpl { impl ScalarFunctionImpl for IntervalMulImpl where Rhs: PhysicalStorage, - for<'a> Rhs::Type<'a>: PrimInt, + Rhs::StorageType: PrimInt, { - fn execute(&self, inputs: &[&Array]) -> Result { - let (lhs, rhs) = if LHS_RHS_FLIPPED { - (inputs[1], inputs[0]) - } else { - (inputs[0], inputs[1]) - }; - - let builder = ArrayBuilder { - datatype: DataType::Interval, - buffer: PrimitiveBuffer::::with_len(lhs.logical_len()), - }; - - BinaryExecutor::execute::(lhs, rhs, builder, |a, b, buf| { - // TODO: Overflow check - buf.put(&Interval { - months: a.months * (::from(b).unwrap_or_default()), - days: a.days * (::from(b).unwrap_or_default()), - nanos: a.nanos * (::from(b).unwrap_or_default()), - }) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + let (lhs, rhs) = if LHS_RHS_FLIPPED { (b, a) } else { (a, b) }; + + BinaryExecutor::execute::( + lhs, + sel, + rhs, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| { + // TODO: Overflow check + buf.put(&Interval { + months: a.months * (::from(b).unwrap_or_default()), + days: a.days * (::from(b).unwrap_or_default()), + nanos: a.nanos * (::from(b).unwrap_or_default()), + }) + }, + ) } } #[derive(Debug, Clone)] pub struct DecimalMulImpl { - datatype: DataType, _d: PhantomData, } impl DecimalMulImpl { - fn new(datatype: DataType) -> Self { - DecimalMulImpl { - datatype, - _d: PhantomData, - } + const fn new() -> Self { + DecimalMulImpl { _d: PhantomData } } } impl ScalarFunctionImpl for DecimalMulImpl where D: DecimalType, - ArrayData: From>, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| { - buf.put(&(a * b)) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a * b)), + ) } } #[derive(Debug, Clone)] pub struct MulImpl { - datatype: DataType, _s: PhantomData, } impl MulImpl { - fn new(datatype: DataType) -> Self { - MulImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + MulImpl { _s: PhantomData } } } impl ScalarFunctionImpl for MulImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Mul> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Mul + Sized + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a * b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a * b)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn mul_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -359,9 +348,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([4, 10, 18]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([4, 10, 18]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 52beb91ff..59b48e546 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -3,10 +3,10 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, @@ -15,15 +15,15 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -113,58 +113,45 @@ impl ScalarFunction for Rem { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(RemImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(RemImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(RemImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(RemImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(RemImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(RemImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(RemImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(RemImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(RemImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(RemImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(RemImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(RemImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(RemImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(RemImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(RemImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(RemImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(RemImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(RemImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(RemImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(RemImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(RemImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(RemImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(RemImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(RemImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(RemImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(RemImpl::::new()), DataType::UInt128) + } // TODO: Interval, date, decimal (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -181,49 +168,52 @@ impl ScalarFunction for Rem { #[derive(Debug, Clone)] pub struct RemImpl { - datatype: DataType, _s: PhantomData, } impl RemImpl { - fn new(datatype: DataType) -> Self { - RemImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + RemImpl { _s: PhantomData } } } impl ScalarFunctionImpl for RemImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Rem> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Rem + Sized + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a % b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a % b)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn rem_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -241,9 +231,10 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([0, 1, 0]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([0, 1, 0]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 5936265ad..455411538 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -3,10 +3,10 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, @@ -15,15 +15,15 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -112,74 +112,58 @@ impl ScalarFunction for Sub { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(SubImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(SubImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(SubImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(SubImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(SubImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(SubImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(SubImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(SubImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(SubImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(SubImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(SubImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(SubImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(SubImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(SubImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(SubImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(SubImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(SubImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(SubImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(SubImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(SubImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(SubImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(SubImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(SubImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(SubImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(SubImpl::::new()), DataType::UInt128) + } // TODO: Split out decimal (for scaling) - datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - - // Date + days - (DataType::Date32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Date32)), - DataType::Date32, - ), + datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => { + (Box::new(SubImpl::::new()), datatypes.0) + } + datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => { + (Box::new(SubImpl::::new()), datatypes.0) + } + + // Date - days + (DataType::Date32, DataType::Int32) => { + (Box::new(SubImpl::::new()), DataType::Date32) + } // TODO: Interval (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -196,49 +180,52 @@ impl ScalarFunction for Sub { #[derive(Debug, Clone)] pub struct SubImpl { - datatype: DataType, _s: PhantomData, } impl SubImpl { - fn new(datatype: DataType) -> Self { - SubImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + SubImpl { _s: PhantomData } } } impl ScalarFunctionImpl for SubImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Sub> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Sub + Sized + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a - b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a - b)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn sub_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -256,9 +243,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([3, 3, 3]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([3, 3, 3]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index c72438ec5..d594b922e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -3,13 +3,14 @@ use std::fmt::Debug; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; -use crate::arrays::bitmap::Bitmap; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalBool; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalBool; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor, UniformExecutor}; -use crate::arrays::storage::BooleanStorage; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::scalar::uniform::UniformExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -70,57 +71,52 @@ impl ScalarFunction for And { pub struct AndImpl; impl ScalarFunctionImpl for AndImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array::new_with_array_data( - DataType::Boolean, - BooleanStorage::from(Bitmap::new_with_val(false, 1)), - ); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Default to false? + let vals = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + for v in vals { + *v = false; + } } - 1 => Ok(inputs[0].clone()), - 2 => { - let a = inputs[0]; - let b = inputs[1]; - BinaryExecutor::execute::( - a, - b, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, buf| buf.put(&(a && b)), - ) + 1 => { + let input = &input.arrays()[0]; + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(v), + )?; } - 3 => { - let a = inputs[0]; - let b = inputs[1]; - let c = inputs[2]; - TernaryExecutor::execute::( + 2 => { + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( a, + sel, b, - c, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, c, buf| buf.put(&(a && b && c)), - ) - } - _ => { - let len = inputs[0].logical_len(); - UniformExecutor::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(len), - }, - |bools, buf| buf.put(&(bools.iter().all(|b| *b))), - ) + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a && b)), + )?; } + _ => UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |bools, buf| buf.put(&(bools.iter().all(|b| **b))), + )?, } + + Ok(()) } } @@ -178,55 +174,69 @@ impl ScalarFunction for Or { pub struct OrImpl; impl ScalarFunctionImpl for OrImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array::new_with_array_data( - DataType::Boolean, - BooleanStorage::from(Bitmap::new_with_val(false, 1)), - ); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Default to false? + let vals = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + for v in vals { + *v = false; + } + } + 1 => { + let input = &input.arrays()[0]; + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(v), + )?; } - 1 => Ok(inputs[0].clone()), 2 => { - let a = inputs[0]; - let b = inputs[1]; - BinaryExecutor::execute::( + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( a, + sel, b, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, buf| buf.put(&(a || b)), - ) - } - _ => { - let len = inputs[0].logical_len(); - UniformExecutor::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(len), - }, - |bools, buf| buf.put(&(bools.iter().any(|b| *b))), - ) + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a || b)), + )?; } + _ => UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |bools, buf| buf.put(&(bools.iter().any(|b| **b))), + )?, } + + Ok(()) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; - use crate::arrays::scalar::ScalarValue; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; #[test] fn and_bool_2() { - let a = Array::from_iter([true, false, false]); - let b = Array::from_iter([true, true, false]); + let a = Array::try_from_iter([true, false, false]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -244,18 +254,20 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([true, false, false]).unwrap(); - assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } #[test] fn and_bool_3() { - let a = Array::from_iter([true, true, true]); - let b = Array::from_iter([false, true, true]); - let c = Array::from_iter([true, true, false]); + let a = Array::try_from_iter([true, true, true]).unwrap(); + let b = Array::try_from_iter([false, true, true]).unwrap(); + let c = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::try_from_arrays([a, b, c], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -277,17 +289,19 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b, &c]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); - assert_eq!(ScalarValue::from(false), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + let expected = Array::try_from_iter([false, true, false]).unwrap(); + + assert_arrays_eq(&expected, &out); } #[test] fn or_bool_2() { - let a = Array::from_iter([true, false, false]); - let b = Array::from_iter([true, true, false]); + let a = Array::try_from_iter([true, false, false]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -305,10 +319,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([true, true, false]).unwrap(); - assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 23ba23a54..43b1575d7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,12 +4,10 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::compute::cast::array::decimal_rescale; -use crate::arrays::compute::cast::behavior::CastFailBehavior; -use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ PhysicalBinary, PhysicalBool, PhysicalF16, @@ -22,18 +20,40 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalStorage, - PhysicalType, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, - PhysicalUntypedNull, PhysicalUtf8, }; -use crate::arrays::executor::scalar::{BinaryExecutor, BinaryListReducer, FlexibleListExecutor}; -use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; +use crate::arrays::executor::physical_type::{ + PhysicalBinary_2, + PhysicalBool_2, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalType2, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, +}; +use crate::arrays::executor::scalar::{BinaryListReducer2, FlexibleListExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; +use crate::expr::cast_expr::CastExpr; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -207,12 +227,7 @@ impl ScalarFunction for Eq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, EqOperation>(*self, inputs, table_list) } } @@ -252,12 +267,7 @@ impl ScalarFunction for Neq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, NotEqOperation>(*self, inputs, table_list) } } @@ -293,12 +303,7 @@ impl ScalarFunction for Lt { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, LtOperation>(*self, inputs, table_list) } } @@ -334,12 +339,7 @@ impl ScalarFunction for LtEq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, LtEqOperation>(*self, inputs, table_list) } } @@ -375,12 +375,7 @@ impl ScalarFunction for Gt { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, GtOperation>(*self, inputs, table_list) } } @@ -416,12 +411,7 @@ impl ScalarFunction for GtEq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, GtEqOperation>(*self, inputs, table_list) } } @@ -504,92 +494,136 @@ impl ComparisonOperation for GtEqOperation { } } -/// Creates a new scalar function implementation based on input types. -fn new_comparison_impl( - func: &impl FunctionInfo, - inputs: &[Expression], +/// Create new planned scalar function for some comparison operation. +/// +/// This will normalize input expressions as required. +fn new_planned_comparison_function( + func: F, + mut inputs: Vec, table_list: &TableList, -) -> Result> { - plan_check_num_args(func, inputs, 2)?; - Ok( - match ( - inputs[0].datatype(table_list)?, - inputs[1].datatype(table_list)?, - ) { - (DataType::Boolean, DataType::Boolean) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int8, DataType::Int8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int16, DataType::Int16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int32, DataType::Int32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int64, DataType::Int64) => { - Box::new(BaseComparisonImpl::::new()) +) -> Result +where + F: ScalarFunction + 'static, + O: ComparisonOperation, +{ + plan_check_num_args(&func, &inputs, 2)?; + + let function_impl: Box = match ( + inputs[0].datatype(table_list)?, + inputs[1].datatype(table_list)?, + ) { + (DataType::Boolean, DataType::Boolean) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int8, DataType::Int8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int16, DataType::Int16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int32, DataType::Int32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int64, DataType::Int64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int128, DataType::Int128) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt8, DataType::UInt8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt16, DataType::UInt16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt32, DataType::UInt32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt64, DataType::UInt64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt128, DataType::UInt128) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float16, DataType::Float16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float32, DataType::Float32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float64, DataType::Float64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Decimal64(left), DataType::Decimal64(right)) => { + // Normalize decimals. + match left.scale.cmp(&right.scale) { + Ordering::Less => { + // Scale up left. + inputs[0] = Expression::Cast(CastExpr { + to: DataType::Decimal64(right), + expr: Box::new(inputs[0].clone()), + }) + } + Ordering::Greater => { + // Scale up right. + inputs[1] = Expression::Cast(CastExpr { + to: DataType::Decimal64(left), + expr: Box::new(inputs[1].clone()), + }) + } + Ordering::Equal => (), // Nothing to do } - (DataType::Int128, DataType::Int128) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Decimal128(left), DataType::Decimal128(right)) => { + // Normalize decimals. + match left.scale.cmp(&right.scale) { + Ordering::Less => { + // Scale up left. + inputs[0] = Expression::Cast(CastExpr { + to: DataType::Decimal128(right), + expr: Box::new(inputs[0].clone()), + }) + } + Ordering::Greater => { + // Scale up right. + inputs[1] = Expression::Cast(CastExpr { + to: DataType::Decimal128(left), + expr: Box::new(inputs[1].clone()), + }) + } + Ordering::Equal => (), // Nothing to do } + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Timestamp(_), DataType::Timestamp(_)) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Interval, DataType::Interval) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Date32, DataType::Date32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Date64, DataType::Date64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Utf8, DataType::Utf8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Binary, DataType::Binary) => { + Box::new(UnnestedComparisonImpl::::new()) + } - (DataType::UInt8, DataType::UInt8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt16, DataType::UInt16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt32, DataType::UInt32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt64, DataType::UInt64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt128, DataType::UInt128) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float16, DataType::Float16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float32, DataType::Float32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float64, DataType::Float64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Decimal64(left), DataType::Decimal64(right)) => Box::new( - RescalingComparisionImpl::::new(left, right), - ), - (DataType::Decimal128(left), DataType::Decimal128(right)) => Box::new( - RescalingComparisionImpl::::new(left, right), - ), - (DataType::Timestamp(_), DataType::Timestamp(_)) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Interval, DataType::Interval) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Date32, DataType::Date32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Date64, DataType::Date64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Utf8, DataType::Utf8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Binary, DataType::Binary) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::List(m1), DataType::List(m2)) if m1 == m2 => { - // TODO: We'll want to figure out casting for lists. - Box::new(ListComparisonImpl::::new(m1.datatype.physical_type()?)) - } - (a, b) => return Err(invalid_input_types_error(func, &[a, b])), - }, - ) + (a, b) => return Err(invalid_input_types_error(&func, &[a, b])), + }; + + Ok(PlannedScalarFunction { + function: Box::new(func), + return_type: DataType::Boolean, + inputs, + function_impl, + }) } #[derive(Debug)] @@ -602,7 +636,7 @@ struct ListComparisonReducer { _op: PhantomData, } -impl BinaryListReducer for ListComparisonReducer +impl BinaryListReducer2 for ListComparisonReducer where T: PartialEq + PartialOrd, O: ComparisonOperation, @@ -643,12 +677,12 @@ where #[derive(Debug, Clone)] struct ListComparisonImpl { - inner_physical_type: PhysicalType, + inner_physical_type: PhysicalType2, _op: PhantomData, } impl ListComparisonImpl { - fn new(inner_physical_type: PhysicalType) -> Self { + fn new(inner_physical_type: PhysicalType2) -> Self { ListComparisonImpl { _op: PhantomData, inner_physical_type, @@ -660,7 +694,11 @@ impl ScalarFunctionImpl for ListComparisonImpl where O: ComparisonOperation, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + unimplemented!() + } + + fn execute2(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -670,97 +708,97 @@ where }; let array = match self.inner_physical_type { - PhysicalType::UntypedNull => FlexibleListExecutor::binary_reduce::< - PhysicalUntypedNull, + PhysicalType2::UntypedNull => FlexibleListExecutor::binary_reduce::< + PhysicalUntypedNull_2, _, ListComparisonReducer<_, O>, >(left, right, builder)?, - PhysicalType::Boolean => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Boolean => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int8 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Int8 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int16 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Int16 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int32 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Int32 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int64 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Int64 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int128 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Int128 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt8 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::UInt8 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt16 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::UInt16 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt32 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::UInt32 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt64 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::UInt64 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt128 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::UInt128 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float16 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Float16 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float32 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Float32 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float64 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Float64 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Interval => FlexibleListExecutor::binary_reduce::< - PhysicalInterval, + PhysicalType2::Interval => FlexibleListExecutor::binary_reduce::< + PhysicalInterval_2, _, ListComparisonReducer<_, O>, >(left, right, builder)?, - PhysicalType::Binary => { - FlexibleListExecutor::binary_reduce::>( - left, right, builder, - )? - } - PhysicalType::Utf8 => { - FlexibleListExecutor::binary_reduce::>( + PhysicalType2::Binary => FlexibleListExecutor::binary_reduce::< + PhysicalBinary_2, + _, + ListComparisonReducer<_, O>, + >(left, right, builder)?, + PhysicalType2::Utf8 => { + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::List => { + PhysicalType2::List => { return Err(RayexecError::new( "Comparison between nested lists not yet supported", )) @@ -772,137 +810,61 @@ where } #[derive(Debug, Clone)] -struct BaseComparisonImpl { +struct UnnestedComparisonImpl { _op: PhantomData, _s: PhantomData, } -impl BaseComparisonImpl +impl UnnestedComparisonImpl where O: ComparisonOperation, S: PhysicalStorage, - for<'a> S::Type<'a>: PartialEq + PartialOrd, { - fn new() -> Self { - BaseComparisonImpl { + const fn new() -> Self { + UnnestedComparisonImpl { _op: PhantomData, _s: PhantomData, } } } -impl ScalarFunctionImpl for BaseComparisonImpl +impl ScalarFunctionImpl for UnnestedComparisonImpl where O: ComparisonOperation, S: PhysicalStorage, - for<'a> S::Type<'a>: PartialEq + PartialOrd, + S::StorageType: PartialEq + PartialOrd, { - fn execute(&self, inputs: &[&Array]) -> Result { - let left = inputs[0]; - let right = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(left.logical_len()), - }; - - BinaryExecutor::execute::(left, right, builder, |a, b, buf| { - buf.put(&O::compare(a, b)) - }) - } -} - -// TODO: Determine if this is still needed. Ideally scaling happens prior to -// calling the comparison function. -#[derive(Debug, Clone)] -struct RescalingComparisionImpl { - _op: PhantomData, - _t: PhantomData, - - left: DecimalTypeMeta, - right: DecimalTypeMeta, -} + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let left = &input.arrays()[0]; + let right = &input.arrays()[1]; -impl RescalingComparisionImpl -where - O: ComparisonOperation, - T: DecimalType, - ArrayData: From>, -{ - fn new(left: DecimalTypeMeta, right: DecimalTypeMeta) -> Self { - RescalingComparisionImpl { - _op: PhantomData, - _t: PhantomData, + BinaryExecutor::execute::( left, + sel, right, - } - } -} - -impl ScalarFunctionImpl for RescalingComparisionImpl -where - O: ComparisonOperation, - T: DecimalType, - ArrayData: From>, -{ - fn execute(&self, inputs: &[&Array]) -> Result { - let left = inputs[0]; - let right = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(left.logical_len()), - }; - - match self.left.scale.cmp(&self.right.scale) { - Ordering::Greater => { - let scaled_right = decimal_rescale::( - right, - left.datatype().clone(), - CastFailBehavior::Error, - )?; - - BinaryExecutor::execute::( - left, - &scaled_right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ) - } - Ordering::Less => { - let scaled_left = decimal_rescale::( - left, - right.datatype().clone(), - CastFailBehavior::Error, - )?; - - BinaryExecutor::execute::( - &scaled_left, - right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ) - } - Ordering::Equal => BinaryExecutor::execute::( - left, - right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ), - } + sel, + OutBuffer::from_array(output)?, + |left, right, buf| buf.put(&O::compare(left, right)), + ) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; #[test] fn eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -920,16 +882,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, true, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, true, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn neq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -947,16 +911,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, false, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, false, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn lt_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -974,16 +940,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, false, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, false, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn lt_eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1001,16 +969,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, true, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, true, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn gt_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1028,16 +998,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, false, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, false, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn gt_eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1055,9 +1027,10 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, true, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, true, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs index 2e4044200..a1f43a097 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs @@ -1,7 +1,8 @@ use rayexec_error::Result; use rayexec_parser::ast; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; use crate::arrays::compute::date::{self, extract_date_part}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; @@ -101,9 +102,11 @@ pub struct DatePartImpl { } impl ScalarFunctionImpl for DatePartImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); // First input ignored (the constant "part" to extract) - extract_date_part(self.part, inputs[1]) + let input = &input.arrays()[1]; + extract_date_part(self.part, input, sel, output) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs index b677bd584..4a74e0f24 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs @@ -2,11 +2,12 @@ use std::str::FromStr; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalI64; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -140,21 +141,21 @@ pub struct DateTruncImpl { } impl ScalarFunctionImpl for DateTruncImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = &inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + // First element is field name, skip. + let input = &input.arrays()[0]; let trunc = match self.input_unit { TimeUnit::Second => match self.field { - TruncField::Microseconds | TruncField::Milliseconds | TruncField::Second => { - return Ok((*input).clone()) - } + TruncField::Microseconds | TruncField::Milliseconds | TruncField::Second => 1, TruncField::Minute => 60, TruncField::Hour => 60 * 60, TruncField::Day => 24 * 60 * 60, other => not_implemented!("trunc field: {other:?}"), }, TimeUnit::Millisecond => match self.field { - TruncField::Microseconds | TruncField::Milliseconds => return Ok((*input).clone()), + TruncField::Microseconds | TruncField::Milliseconds => 1, TruncField::Second => 1000, TruncField::Minute => 60 * 1000, TruncField::Hour => 60 * 60 * 1000, @@ -162,7 +163,7 @@ impl ScalarFunctionImpl for DateTruncImpl { other => not_implemented!("trunc field: {other:?}"), }, TimeUnit::Microsecond => match self.field { - TruncField::Microseconds => return Ok((*input).clone()), + TruncField::Microseconds => 1, TruncField::Milliseconds => 1000, TruncField::Second => 1000 * 1000, TruncField::Minute => 60 * 1000 * 1000, @@ -181,16 +182,14 @@ impl ScalarFunctionImpl for DateTruncImpl { }, }; - let builder = ArrayBuilder { - datatype: DataType::Timestamp(TimestampTypeMeta { - unit: self.input_unit, - }), - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor::execute::(input, builder, |v, buf| { - let v = (v / trunc) * trunc; - buf.put(&v) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |&v, buf| { + let v = (v / trunc) * trunc; + buf.put(&v) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index 536dee3ac..3ce6a16ee 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -1,10 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalI64; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -102,21 +104,22 @@ impl ScalarFunction for EpochMs { pub struct EpochImpl; impl ScalarFunctionImpl for EpochImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - to_timestamp::(input) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + to_timestamp::(input, sel, output) } } -fn to_timestamp(input: &Array) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Timestamp(TimestampTypeMeta { - unit: TimeUnit::Microsecond, - }), - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor::execute::(input, builder, |v, buf| { - buf.put(&(v * S)); - }) +fn to_timestamp( + input: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, +) -> Result<()> { + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(out)?, + |&v, buf| buf.put(&(v * S)), + ) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index 69a221936..79fe65077 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -1,10 +1,9 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalType}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool}; -use crate::arrays::executor::scalar::UnaryExecutor; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -101,29 +100,34 @@ impl ScalarFunction for IsNotNull { pub struct CheckNullImpl; impl ScalarFunctionImpl for CheckNullImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - - let (initial, updated) = if IS_NULL { - // Executor will only execute on non-null inputs, so we can assume - // everything is null first then selectively set false for things - // that the executor executes. - (true, false) - } else { - (false, true) - }; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len_and_default_value(input.logical_len(), initial), - }; - let array = UnaryExecutor::execute::(input, builder, |_, buf| { - buf.put(&updated) - })?; - - // Drop validity. - let data = array.into_array_data(); - Ok(Array::new_with_array_data(DataType::Boolean, data)) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + let out = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + if input.physical_type() == PhysicalType::UntypedNull { + // Everything null, just set to default value. + out.iter_mut().for_each(|v| *v = IS_NULL); + + return Ok(()); + } + + let flat = input.flat_view()?; + + for (output_idx, idx) in sel.into_iter().enumerate() { + let is_valid = flat.validity.is_valid(idx); + if is_valid { + out[output_idx] = !IS_NULL; + } else { + out[output_idx] = IS_NULL; + } + } + + Ok(()) } } @@ -303,22 +307,128 @@ impl ScalarFunction for IsNotFalse { pub struct CheckBoolImpl; impl ScalarFunctionImpl for CheckBoolImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - - let initial = NOT; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len_and_default_value(input.logical_len(), initial), - }; - let array = UnaryExecutor::execute::(input, builder, |val, buf| { - let b = if NOT { val != BOOL } else { val == BOOL }; - buf.put(&b) - })?; - - // Drop validity. - let data = array.into_array_data(); - Ok(Array::new_with_array_data(DataType::Boolean, data)) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + let out = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + let flat = input.flat_view()?; + let input = flat.array_buffer.try_as_slice::()?; + + for (output_idx, idx) in sel.into_iter().enumerate() { + let is_valid = flat.validity.is_valid(idx); + if is_valid { + let val = input[idx]; + out[output_idx] = if NOT { val != BOOL } else { val == BOOL } + } else { + // 'IS TRUE', 'IS FALSE' => false + // 'IS NOT TRUE', 'IS NOT FALSE' => true + out[output_idx] = NOT; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr; + + #[test] + fn is_null_all_valid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNull + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([false, false, false]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_null_some_invalid() { + let a = Array::try_from_iter([Some(1), None, None]).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNull + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([false, true, true]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_true() { + let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsTrue + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([Some(true), Some(false), Some(false)]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_not_true() { + let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNotTrue + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([Some(false), Some(true), Some(true)]).unwrap(); + + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 9c9287700..757568c68 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -1,20 +1,12 @@ -use std::borrow::Borrow; - -use half::f16; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; -use serde::{Deserialize, Serialize}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ - ArrayBuilder, - ArrayDataBuffer, - BooleanBuffer, - GermanVarlenBuffer, - PrimitiveBuffer, -}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, PhysicalBinary, PhysicalBool, PhysicalF16, @@ -25,6 +17,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, + PhysicalInterval, PhysicalList, PhysicalStorage, PhysicalType, @@ -33,9 +26,11 @@ use crate::arrays::executor::physical_type::{ PhysicalU32, PhysicalU64, PhysicalU8, + PhysicalUntypedNull, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::buffer::SecondaryBuffer; +use crate::arrays::datatype::{DataType, DataTypeId}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -105,194 +100,212 @@ impl ScalarFunction for ListExtract { function: Box::new(*self), return_type: inner_datatype.clone(), inputs, - function_impl: Box::new(ListExtractImpl { - index, - inner_datatype, - }), + function_impl: Box::new(ListExtractImpl { index }), }) } } -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ListExtractImpl { - inner_datatype: DataType, index: usize, } impl ScalarFunctionImpl for ListExtractImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - extract(input, self.index) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + list_extract(input, sel, output, self.index) } } -fn extract(array: &Array, idx: usize) -> Result { - let data = match array.array_data() { - ArrayData::List(list) => list.as_ref(), - _other => return Err(RayexecError::new("Unexpected storage type")), - }; - - match data.inner_array().physical_type() { - PhysicalType::UntypedNull => not_implemented!("NULL list extract"), - PhysicalType::Boolean => { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Int8 => { - let builder = ArrayBuilder { - datatype: DataType::Int8, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Int16 => { - let builder = ArrayBuilder { - datatype: DataType::Int16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Int32 => { - let builder = ArrayBuilder { - datatype: DataType::Int32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Int64 => { - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Int128 => { - let builder = ArrayBuilder { - datatype: DataType::Int128, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::UInt8 => { - let builder = ArrayBuilder { - datatype: DataType::UInt8, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::UInt16 => { - let builder = ArrayBuilder { - datatype: DataType::UInt16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) +/// Extract an element from each list within a list array. +/// +/// If the element index falls outside the bounds of a list, the result for that +/// row will be NULL. +pub fn list_extract( + array: &Array, + sel: impl IntoExactSizeIterator, + output: &mut Array, + element_idx: usize, +) -> Result<()> { + match output.datatype().physical_type() { + PhysicalType::UntypedNull => { + extract_inner::(array, sel, output, element_idx) } - PhysicalType::UInt32 => { - let builder = ArrayBuilder { - datatype: DataType::UInt32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) + PhysicalType::Boolean => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int128 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt128 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Interval => { + extract_inner::(array, sel, output, element_idx) } - PhysicalType::UInt64 => { - let builder = ArrayBuilder { - datatype: DataType::UInt64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::UInt128 => { - let builder = ArrayBuilder { - datatype: DataType::UInt128, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Float16 => { - let builder = ArrayBuilder { - datatype: DataType::Float16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Float32 => { - let builder = ArrayBuilder { - datatype: DataType::Float32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Float64 => { - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Utf8 => { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType::Binary => { - let builder = ArrayBuilder { - datatype: DataType::Binary, - buffer: GermanVarlenBuffer::<[u8]>::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - other => not_implemented!("List extract for physical type {other:?}"), + PhysicalType::Utf8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Binary => extract_inner::(array, sel, output, element_idx), + other => not_implemented!("List extract for datatype {other}"), } } -fn extract_inner<'a, S, B>( - mut builder: ArrayBuilder, - outer: &Array, - inner: &'a Array, - el_idx: usize, -) -> Result +fn extract_inner( + array: &Array, + sel: impl IntoExactSizeIterator, + output: &mut Array, + element_idx: usize, +) -> Result<()> where - S: PhysicalStorage, - B: ArrayDataBuffer, - S::Type<'a>: Borrow<::Type>, + S: MutablePhysicalStorage, { - let el_idx = el_idx as i32; + let flat = array.flat_view()?; - let mut validity = Bitmap::new_with_all_true(builder.buffer.len()); + let metas = PhysicalList::get_addressable(flat.array_buffer)?; + let child = match flat.array_buffer.get_secondary() { + SecondaryBuffer::List(l) => &l.child, + _ => return Err(RayexecError::new("Missing secondary buffer for list")), + }; + + let child_buf = S::get_addressable(child.data())?; + let child_validity = child.validity(); + + let mut out_buffer = S::get_addressable_mut(output.data.try_as_mut()?)?; + let out_validity = &mut output.validity; + + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let sel_idx = flat.selection.get(input_idx).unwrap(); - UnaryExecutor::for_each::(outer, |idx, metadata| { - if let Some(metadata) = metadata { - if el_idx >= metadata.len { - // Indexing outside of the list. Mark null - validity.set_unchecked(idx, false); - return; + if flat.validity.is_valid(sel_idx) { + let meta = metas.get(sel_idx).unwrap(); + if element_idx >= meta.len as usize { + // Indexing outside of the list. User is allowed to do that, set + // the value to null. + out_validity.set_invalid(output_idx); + continue; } - // Otherwise put the element into the builder. - let inner_el_idx = metadata.offset + el_idx; - match UnaryExecutor::value_at::(inner, inner_el_idx as usize) { - Ok(Some(el)) => { - builder.buffer.put(idx, el.borrow()); - return; - } - _ => { - // TODO: Do something if Err, just fall through right now. - } + let offset = meta.offset as usize + element_idx; + if !child_validity.is_valid(offset) { + // Element inside list is null. + out_validity.set_invalid(output_idx); + continue; } + + let val = child_buf.get(offset).unwrap(); + out_buffer.put(output_idx, val); + } else { + out_validity.set_invalid(output_idx); } + } - // Metadata null, tried to extract from null array, mark null. - validity.set_unchecked(idx, false); - })?; + Ok(()) +} - Ok(Array::new_with_validity_and_array_data( - builder.datatype, - validity, - builder.buffer.into_data(), - )) +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::ListTypeMeta; + use crate::arrays::testutil::assert_arrays_eq; + use crate::functions::scalar::builtin::list::list_values; + + #[test] + fn list_extract_primitive() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([4, 5, 6]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + } + + #[test] + fn list_extract_out_of_bounds() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut extracted_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut extracted_elements, 2).unwrap(); + + let expected = Array::try_from_iter([None as Option, None, None]).unwrap(); + assert_arrays_eq(&expected, &extracted_elements); + } + + #[test] + fn list_extract_child_invalid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + + // Elements as index 0 should still be all non-null. + let mut first_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut first_elements, 0).unwrap(); + + let expected = Array::try_from_iter([1, 2, 3]).unwrap(); + assert_arrays_eq(&expected, &first_elements); + } + + #[test] + fn list_extract_parent_invalid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + lists.validity.set_invalid(1); // [2, 5] => NULL + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index b961085db..3c7f001a1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,9 +1,36 @@ -use rayexec_error::{RayexecError, Result}; +use rayexec_error::{not_implemented, RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::array::validity::Validity; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalList, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; +use crate::arrays::buffer::{ListItemMetadata, SecondaryBuffer}; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; -use crate::arrays::executor::scalar::concat; -use crate::arrays::storage::ListStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -52,9 +79,7 @@ impl ScalarFunction for ListValues { function: Box::new(*self), return_type: return_type.clone(), inputs, - function_impl: Box::new(ListValuesImpl { - list_datatype: return_type, - }), + function_impl: Box::new(ListValuesImpl), }); } }; @@ -77,33 +102,183 @@ impl ScalarFunction for ListValues { function: Box::new(*self), return_type: return_type.clone(), inputs, - function_impl: Box::new(ListValuesImpl { - list_datatype: return_type, - }), + function_impl: Box::new(ListValuesImpl), }) } } #[derive(Debug, Clone)] -pub struct ListValuesImpl { - list_datatype: DataType, -} +pub struct ListValuesImpl; impl ScalarFunctionImpl for ListValuesImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - if inputs.is_empty() { - let inner_type = match &self.list_datatype { - DataType::List(l) => l.datatype.as_ref(), - other => panic!("invalid data type: {other}"), - }; - - let data = ListStorage::empty_list(Array::new_typed_null_array(inner_type.clone(), 1)?); - return Ok(Array::new_with_array_data(self.list_datatype.clone(), data)); + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + list_values(input.arrays(), input.selection(), output) + } +} + +pub fn list_values( + inputs: &[Array], + sel: impl IntoExactSizeIterator, + output: &mut Array, +) -> Result<()> { + let inner_type = match output.datatype() { + DataType::List(m) => m.datatype.physical_type(), + other => { + return Err(RayexecError::new(format!( + "Expected output to be list datatype, got {other}", + ))) } + }; + + match inner_type { + PhysicalType::UntypedNull => list_values_inner::(inputs, sel, output), + PhysicalType::Boolean => list_values_inner::(inputs, sel, output), + PhysicalType::Int8 => list_values_inner::(inputs, sel, output), + PhysicalType::Int16 => list_values_inner::(inputs, sel, output), + PhysicalType::Int32 => list_values_inner::(inputs, sel, output), + PhysicalType::Int64 => list_values_inner::(inputs, sel, output), + PhysicalType::Int128 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt8 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt16 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt32 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt64 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt128 => list_values_inner::(inputs, sel, output), + PhysicalType::Float16 => list_values_inner::(inputs, sel, output), + PhysicalType::Float32 => list_values_inner::(inputs, sel, output), + PhysicalType::Float64 => list_values_inner::(inputs, sel, output), + PhysicalType::Utf8 => list_values_inner::(inputs, sel, output), + PhysicalType::Binary => list_values_inner::(inputs, sel, output), + other => not_implemented!("list values for physical type {other}"), + } +} + +/// Helper for constructing the list values and writing them to `output`. +/// +/// `S` should be the inner type. +fn list_values_inner( + inputs: &[Array], + sel: impl IntoExactSizeIterator, + output: &mut Array, +) -> Result<()> { + // TODO: Dictionary + + let sel = sel.into_iter(); + let sel_len = sel.len(); + let capacity = sel.len() * inputs.len(); + + let list_buf = match output.data_mut().try_as_mut()?.get_secondary_mut() { + SecondaryBuffer::List(list) => list, + _ => return Err(RayexecError::new("Expected list buffer")), + }; + + // Resize secondary buffer (and validity) to hold everything. + // + // TODO: Need to store buffer manager somewhere else. + list_buf + .child + .data_mut() + .try_as_mut()? + .reserve_primary::(&NopBufferManager, capacity)?; + + // Replace validity with properly sized one. + list_buf + .child + .put_validity(Validity::new_all_valid(capacity))?; + + // Update metadata on the list buffer itself. Note that this can be less + // than the buffer's actual capacity. This only matters during writes to + // know if we still have room to push to the child array. + list_buf.entries = capacity; + + let mut child_outputs = S::get_addressable_mut(list_buf.child.data.try_as_mut()?)?; + let child_validity = &mut list_buf.child.validity; + + // TODO: Possibly avoid allocating here? + let col_bufs = inputs + .iter() + .map(|arr| S::get_addressable(arr.data())) + .collect::>>()?; + + // Write the list values from the input batch. + let mut output_idx = 0; + for row_idx in sel { + for (col, validity) in col_bufs.iter().zip(inputs.iter().map(|arr| arr.validity())) { + if validity.is_valid(row_idx) { + child_outputs.put(output_idx, col.get(row_idx).unwrap()); + } else { + child_validity.set_invalid(output_idx); + } + + output_idx += 1; + } + } + std::mem::drop(child_outputs); + + // Now generate and set the metadatas. + let mut out = PhysicalList::get_addressable_mut(output.data_mut().try_as_mut()?)?; + + let len = inputs.len() as i32; + for output_idx in 0..sel_len { + // Note top-level not possible if we're provided a batch. + out.put( + output_idx, + &ListItemMetadata { + offset: (output_idx as i32) * len, + len, + }, + ); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::physical_type::PhysicalStorage; + use crate::expr; + + #[test] + fn list_values_primitive() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![DataType::Int32, DataType::Int32], + vec!["a".to_string(), "b".to_string()], + ) + .unwrap(); + + let planned = ListValues + .plan( + &table_list, + vec![expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)], + ) + .unwrap(); + + let mut out = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + // TODO: Assert list equality. - let out = concat(inputs)?; - let data = ListStorage::single_list(out); + let expected_metas = &[ + ListItemMetadata { offset: 0, len: 2 }, + ListItemMetadata { offset: 2, len: 2 }, + ListItemMetadata { offset: 4, len: 2 }, + ]; - Ok(Array::new_with_array_data(self.list_datatype.clone(), data)) + let s = PhysicalList::get_addressable(&out.data).unwrap(); + assert_eq!(expected_metas, s); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index fc3099d39..02c851ab3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -2,10 +2,10 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalBool, PhysicalF16, PhysicalF32, @@ -15,10 +15,10 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, }; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -61,14 +61,14 @@ impl ScalarFunction for Negate { // TODO: Interval let function_impl: Box = match dt.clone() { - dt @ DataType::Int8 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int64 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int128 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float64 => Box::new(NegateImpl::::new(dt)), + DataType::Int8 => Box::new(NegateImpl::::new()), + DataType::Int16 => Box::new(NegateImpl::::new()), + DataType::Int32 => Box::new(NegateImpl::::new()), + DataType::Int64 => Box::new(NegateImpl::::new()), + DataType::Int128 => Box::new(NegateImpl::::new()), + DataType::Float16 => Box::new(NegateImpl::::new()), + DataType::Float32 => Box::new(NegateImpl::::new()), + DataType::Float64 => Box::new(NegateImpl::::new()), other => return Err(invalid_input_types_error(self, &[other])), }; @@ -83,36 +83,29 @@ impl ScalarFunction for Negate { #[derive(Debug, Clone)] pub struct NegateImpl { - datatype: DataType, // TODO: Would be nice not needing to store this. _s: PhantomData, } impl NegateImpl { - fn new(datatype: DataType) -> Self { - NegateImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + NegateImpl { _s: PhantomData } } } impl ScalarFunctionImpl for NegateImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: std::ops::Neg> + Default + Copy, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Neg + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - use std::ops::Neg; - - let a = inputs[0]; - let datatype = self.datatype.clone(); - let builder = ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - UnaryExecutor::execute::(a, builder, |a, buf| buf.put(&(a.neg()))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |&a, buf| buf.put(&(-a)), + ) } } @@ -165,14 +158,14 @@ impl ScalarFunction for Not { pub struct NotImpl; impl ScalarFunctionImpl for NotImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - UnaryExecutor::execute::( - inputs[0], - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }, - |b, buf| buf.put(&(!b)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |&b, buf| buf.put(&(!b)), ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index c47048fef..7e3579827 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Abs = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for AbsOp { const NAME: &'static str = "abs"; const DESCRIPTION: &'static str = "Compute the absolute value of a number"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.abs())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.abs()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index ab989ab52..9fe35a5ec 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Acos = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for AcosOp { const NAME: &'static str = "acos"; const DESCRIPTION: &'static str = "Compute the arccosine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.acos())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.acos()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index d2721fa9d..bec4271a3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Asin = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for AsinOp { const NAME: &'static str = "asin"; const DESCRIPTION: &'static str = "Compute the arcsine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.asin())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.asin()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 8a3aad508..4657884b2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Atan = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for AtanOp { const NAME: &'static str = "atan"; const DESCRIPTION: &'static str = "Compute the arctangent of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.atan())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.atan()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index fe97f1980..51d97f78d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Cbrt = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for CbrtOp { const NAME: &'static str = "cbrt"; const DESCRIPTION: &'static str = "Compute the cube root of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.cbrt())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.cbrt()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 49b0a14d4..cd40c8e2c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Ceil = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for CeilOp { const NAME: &'static str = "ceil"; const DESCRIPTION: &'static str = "Round number up"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.ceil())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.ceil()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 35d3e82b7..38e377c8a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Cos = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for CosOp { const NAME: &'static str = "cos"; const DESCRIPTION: &'static str = "Compute the cosine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.cos())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.cos()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 634d2497c..a48f4e052 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Degrees = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for DegreesOp { const NAME: &'static str = "degrees"; const DESCRIPTION: &'static str = "Converts radians to degrees"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.to_degrees())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.to_degrees()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index a71e4a70f..0042edae9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Exp = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for ExpOp { const NAME: &'static str = "exp"; const DESCRIPTION: &'static str = "Compute `e ^ val`"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.exp())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.exp()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 4e8ef22d3..67e17252e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Floor = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for FloorOp { const NAME: &'static str = "floor"; const DESCRIPTION: &'static str = "Round number down"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.floor())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.floor()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index 37d56a348..031462ce4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -4,16 +4,18 @@ use num_traits::Float; use rayexec_error::Result; use super::ScalarFunction; -use crate::arrays::array::Array; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + PhysicalBool, PhysicalF16, PhysicalF32, PhysicalF64, PhysicalStorage, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunctionImpl}; @@ -92,7 +94,7 @@ pub struct IsNanImpl { } impl IsNanImpl { - fn new() -> Self { + const fn new() -> Self { IsNanImpl { _s: PhantomData } } } @@ -100,15 +102,17 @@ impl IsNanImpl { impl ScalarFunctionImpl for IsNanImpl where S: PhysicalStorage, - for<'a> S::Type<'a>: Float, + S::StorageType: Float, { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.is_nan()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 6bd68c098..51f013c8c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Ln = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for LnOp { const NAME: &'static str = "ln"; const DESCRIPTION: &'static str = "Compute natural log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.ln())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.ln()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index c668a51e3..042faf9a5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Log = UnaryInputNumericScalar; @@ -18,17 +17,21 @@ impl UnaryInputNumericOperation for LogOp { const NAME: &'static str = "log"; const DESCRIPTION: &'static str = "Compute base-10 log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.log10())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.log10()), + ) } } @@ -41,16 +44,20 @@ impl UnaryInputNumericOperation for LogOp2 { const NAME: &'static str = "log2"; const DESCRIPTION: &'static str = "Compute base-2 log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.log2())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.log2()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index a2dcb12df..a28318491 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -29,6 +29,7 @@ pub use degrees::*; pub use exp::*; pub use floor::*; pub use isnan::*; +use stdutil::iter::IntoExactSizeIterator; pub use ln::*; pub use log::*; use num_traits::Float; @@ -38,16 +39,15 @@ pub use sin::*; pub use sqrt::*; pub use tan::*; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, - PhysicalStorage, - PhysicalType, }; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -81,11 +81,14 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat const NAME: &'static str; const DESCRIPTION: &'static str; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>; + S: MutablePhysicalStorage, + S::StorageType: Float; } /// Helper struct for creating functions that accept and produce a single @@ -145,12 +148,14 @@ pub(crate) struct UnaryInputNumericScalarImpl { } impl ScalarFunctionImpl for UnaryInputNumericScalarImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - match input.physical_type() { - PhysicalType::Float16 => O::execute_float::(input, self.ret.clone()), - PhysicalType::Float32 => O::execute_float::(input, self.ret.clone()), - PhysicalType::Float64 => O::execute_float::(input, self.ret.clone()), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + match input.datatype() { + DataType::Float16 => O::execute_float::(input, sel, output), + DataType::Float32 => O::execute_float::(input, sel, output), + DataType::Float64 => O::execute_float::(input, sel, output), other => Err(RayexecError::new(format!( "Invalid physical type: {other:?}" ))), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index d4c71dbfa..b58d4fee5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Radians = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for RadiansOp { const NAME: &'static str = "radians"; const DESCRIPTION: &'static str = "Converts degrees to radians"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.to_radians())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.to_radians()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 7561269c9..29d055644 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Sin = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for SinOp { const NAME: &'static str = "sin"; const DESCRIPTION: &'static str = "Compute the sin of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.sin())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.sin()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index 8770db9df..7f0f6e64f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Sqrt = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for SqrtOp { const NAME: &'static str = "sqrt"; const DESCRIPTION: &'static str = "Compute the square root of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.sqrt())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.sqrt()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 67d0377af..3392fe201 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -1,13 +1,12 @@ +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; pub type Tan = UnaryInputNumericScalar; @@ -18,16 +17,20 @@ impl UnaryInputNumericOperation for TanOp { const NAME: &'static str = "tan"; const DESCRIPTION: &'static str = "Compute the tangent of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> where - S: PhysicalStorage, - S::Type<'a>: Float + Default, - ArrayData: From>>, + S: MutablePhysicalStorage, + S::StorageType: Float, { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.tan())) + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.tan()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs index 5909de01d..8c40f2692 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs @@ -1,9 +1,11 @@ use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalF64; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation}; use crate::functions::scalar::{ @@ -62,12 +64,10 @@ impl ScalarFunction for Random { pub struct RandomImpl; impl ScalarFunctionImpl for RandomImpl { - fn execute(&self, _inputs: &[&Array]) -> Result { - // TODO: Need to pass in dummy input to produce all unique values. - let val = rand::random::(); - Ok(Array::new_with_array_data( - DataType::Float64, - PrimitiveStorage::from(vec![val]), - )) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + UnaryExecutor::execute_in_place::(output, sel, |v| { + *v = rand::random::() + }) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 8db1686f8..6aa951fa3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -4,16 +4,18 @@ use std::ops::AddAssign; use num_traits::{AsPrimitive, Float}; use rayexec_error::Result; -use crate::arrays::array::Array; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, PhysicalStorage, }; -use crate::arrays::executor::scalar::{BinaryListReducer, ListExecutor}; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::list_reduce::{BinaryListReducer, BinaryReducer}; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -107,19 +109,21 @@ where impl ScalarFunctionImpl for L2DistanceImpl where - S: PhysicalStorage, - for<'a> S::Type<'a>: Float + AddAssign + AsPrimitive + Default + Copy, + S: MutablePhysicalStorage, + S::StorageType: Float + AddAssign + AsPrimitive + Default + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - ListExecutor::::binary_reduce::>(a, b, builder) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryListReducer::reduce::, PhysicalF64>( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + ) } } @@ -128,16 +132,11 @@ pub(crate) struct L2DistanceReducer { pub distance: F, } -impl BinaryListReducer for L2DistanceReducer +impl BinaryReducer<&F, &F, f64> for L2DistanceReducer where - F: Float + AddAssign + AsPrimitive + Default, + F: Float + AddAssign + AsPrimitive + Default + Copy, { - fn new(left_len: i32, right_len: i32) -> Self { - debug_assert_eq!(left_len, right_len); - Self::default() - } - - fn put_values(&mut self, v1: F, v2: F) { + fn put_values(&mut self, &v1: &F, &v2: &F) { let diff = v1 - v2; self.distance += diff * diff; } @@ -146,3 +145,79 @@ where self.distance.as_().sqrt() } } + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::ListTypeMeta; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr; + use crate::functions::scalar::builtin::list::list_values; + + #[test] + fn l2_distance_ok() { + let mut a = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Float64)), + 1, + ) + .unwrap(); + list_values( + &[ + Array::try_from_iter([1.0]).unwrap(), + Array::try_from_iter([2.0]).unwrap(), + Array::try_from_iter([3.0]).unwrap(), + ], + 0..1, + &mut a, + ) + .unwrap(); + + let mut b = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Float64)), + 1, + ) + .unwrap(); + list_values( + &[ + Array::try_from_iter([1.0]).unwrap(), + Array::try_from_iter([2.0]).unwrap(), + Array::try_from_iter([4.0]).unwrap(), + ], + 0..1, + &mut b, + ) + .unwrap(); + + let batch = Batch::try_from_arrays([a, b], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![ + DataType::List(ListTypeMeta::new(DataType::Float64)), + DataType::List(ListTypeMeta::new(DataType::Float64)), + ], + vec!["a".to_string(), "b".to_string()], + ) + .unwrap(); + + let planned = L2Distance + .plan( + &table_list, + vec![expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)], + ) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Float64, 1).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([1.0]).unwrap(); + assert_arrays_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs index fc7beda38..046cbca3c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs @@ -1,10 +1,11 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -61,16 +62,18 @@ impl ScalarFunction for Ascii { pub struct AsciiImpl; impl ScalarFunctionImpl for AsciiImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - let builder = ArrayBuilder { - datatype: DataType::Int32, - buffer: PrimitiveBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; - UnaryExecutor::execute::(input, builder, |v, buf| { - let v = v.chars().next().map(|c| c as i32).unwrap_or(0); - buf.put(&v) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| { + let v = v.chars().next().map(|c| c as i32).unwrap_or(0); + buf.put(&v) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index 2c8ed0a36..47f4f47fd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -1,10 +1,12 @@ -use rayexec_error::{RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; +use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -60,9 +62,10 @@ impl ScalarFunction for Lower { pub struct LowerImpl; impl ScalarFunctionImpl for LowerImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - case_convert_execute(input, str::to_lowercase) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + case_convert_execute(input, sel, str::to_lowercase, output) } } @@ -115,28 +118,27 @@ impl ScalarFunction for Upper { pub struct UpperImpl; impl ScalarFunctionImpl for UpperImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - case_convert_execute(input, str::to_uppercase) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + case_convert_execute(input, sel, str::to_uppercase, output) } } -fn case_convert_execute(input: &Array, case_fn: F) -> Result +// TODO: Reusable string buffer. +fn case_convert_execute( + input: &Array, + sel: impl IntoExactSizeIterator, + case_fn: F, + output: &mut Array, +) -> Result<()> where F: Fn(&str) -> String, { - let cap = match input.array_data() { - ArrayData::Binary(bin) => bin.binary_data_size_bytes(), - _ => return Err(RayexecError::new("Unexpected array data type")), - }; - - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len_and_data_capacity(input.logical_len(), cap), - }; - - UnaryExecutor::execute::(input, builder, |v, buf| { - // TODO: Non-allocating variant. - buf.put(&case_fn(v)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(&case_fn(v)), + ) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index 1b7c5ae40..20dac7387 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -1,10 +1,13 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UniformExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::scalar::uniform::UniformExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -68,55 +71,70 @@ impl ScalarFunction for Concat { pub struct StringConcatImpl; impl ScalarFunctionImpl for StringConcatImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array::from_iter([""]); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Zero args should actually error during planning. + // Currently this just sets everything to an empty string. + let mut addressable = output + .data_mut() + .try_as_mut()? + .try_as_string_view_addressable_mut()?; + + for idx in 0..addressable.len() { + addressable.put(idx, ""); + } } - 1 => Ok(inputs[0].clone()), - 2 => { - let a = inputs[0]; - let b = inputs[1]; + 1 => { + let input = &input.arrays()[0]; - let mut string_buf = String::new(); + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| buf.put(s), + )?; + } + 2 => { + let a = &input.arrays()[0]; + let b = &input.arrays()[0]; - // TODO: Compute data capacity. + let mut str_buf = String::new(); - BinaryExecutor::execute::( + BinaryExecutor::execute::( a, + sel, b, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(a.logical_len()), - }, - |a, b, buf| { - string_buf.clear(); - string_buf.push_str(a); - string_buf.push_str(b); - buf.put(string_buf.as_str()); + sel, + OutBuffer::from_array(output)?, + |s1, s2, buf| { + str_buf.clear(); + str_buf.push_str(s1); + str_buf.push_str(s2); + buf.put(&str_buf); }, - ) + )?; } _ => { - let mut string_buf = String::new(); + let mut str_buf = String::new(); - UniformExecutor::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(inputs[0].logical_len()), - }, - |strings, buf| { - string_buf.clear(); - for s in strings { - string_buf.push_str(s); + UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |ss, buf| { + str_buf.clear(); + for s in ss { + str_buf.push_str(s); } - buf.put(string_buf.as_str()); + buf.put(&str_buf); }, - ) + )?; } } + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 8cda9192a..2197a7ea0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -82,15 +84,19 @@ pub struct StringContainsConstantImpl { } impl ScalarFunctionImpl for StringContainsConstantImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.contains(&self.constant)) - }) + UnaryExecutor::execute::( + haystack, + sel, + OutBuffer::from_array(output)?, + |haystack, buf| { + let v = haystack.contains(&self.constant); + buf.put(&v); + }, + ) } } @@ -98,17 +104,21 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { pub struct StringContainsImpl; impl ScalarFunctionImpl for StringContainsImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; + let needle = &input.arrays()[1]; - BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.contains(c)), + BinaryExecutor::execute::( + haystack, + sel, + needle, + sel, + OutBuffer::from_array(output)?, + |haystack, needle, buf| { + let v = haystack.contains(needle); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index 86d166b50..102b594fe 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -86,15 +88,19 @@ pub struct EndsWithConstantImpl { } impl ScalarFunctionImpl for EndsWithConstantImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.ends_with(&self.constant)) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let v = s.ends_with(&self.constant); + buf.put(&v); + }, + ) } } @@ -102,17 +108,21 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { pub struct EndsWithImpl; impl ScalarFunctionImpl for EndsWithImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.ends_with(c)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let suffix = &input.arrays()[1]; + + BinaryExecutor::execute::( + strings, + sel, + suffix, + sel, + OutBuffer::from_array(output)?, + |s, suffix, buf| { + let v = s.ends_with(&suffix); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index 7943e24eb..d471afb60 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -1,10 +1,11 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBinary, PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalBinary, PhysicalUtf8}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -64,18 +65,19 @@ impl ScalarFunction for Length { pub struct StrLengthImpl; impl ScalarFunctionImpl for StrLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor::execute::(input, builder, |v, buf| { - let len = v.chars().count() as i64; - buf.put(&len) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let len = s.chars().count() as i64; + buf.put(&len) + }, + ) } } @@ -145,18 +147,17 @@ impl ScalarFunction for ByteLength { pub struct ByteLengthImpl; impl ScalarFunctionImpl for ByteLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor::execute::(input, builder, |v, buf| { - buf.put(&(v.len() as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(&(v.len() as i64)), + ) } } @@ -222,18 +223,19 @@ impl ScalarFunction for BitLength { pub struct BitLengthImpl; impl ScalarFunctionImpl for BitLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor::execute::(input, builder, |v, buf| { - let bit_len = v.len() * 8; - buf.put(&(bit_len as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| { + let bit_len = v.len() * 8; + buf.put(&(bit_len as i64)) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index 539b35255..2452725e0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -1,11 +1,13 @@ use rayexec_error::{Result, ResultExt}; use regex::{escape, Regex}; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,16 +86,19 @@ pub struct LikeConstImpl { } impl ScalarFunctionImpl for LikeConstImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - let b = self.constant.is_match(s); - buf.put(&b); - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let b = self.constant.is_match(s); + buf.put(&b); + }, + ) } } @@ -101,22 +106,23 @@ impl ScalarFunctionImpl for LikeConstImpl { pub struct LikeImpl; impl ScalarFunctionImpl for LikeImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let patterns = &input.arrays()[2]; let mut s_buf = String::new(); - BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |a, b, buf| { - match like_pattern_to_regex(&mut s_buf, b, Some('\\')) { + BinaryExecutor::execute::( + strings, + sel, + patterns, + sel, + OutBuffer::from_array(output)?, + |s, pattern, buf| { + match like_pattern_to_regex(&mut s_buf, pattern, Some('\\')) { Ok(pat) => { - let b = pat.is_match(a); + let b = pat.is_match(s); buf.put(&b); } Err(_) => { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs index ff554eea4..6a6cf572a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -96,33 +98,38 @@ impl ScalarFunction for LeftPad { pub struct LeftPadImpl; impl ScalarFunctionImpl for LeftPadImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let mut string_buf = String::new(); - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - match inputs.len() { - 2 => BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |s, count, buf| { + match input.arrays().len() { + 2 => BinaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &count, buf| { lpad(s, count, " ", &mut string_buf); buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, - |s, count, pad, buf| { - lpad(s, count, pad, &mut string_buf); - buf.put(&string_buf); - }, - ), + 3 => { + TernaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &count, pad, buf| { + lpad(s, count, pad, &mut string_buf); + buf.put(&string_buf); + }, + ) + } other => unreachable!("num inputs checked, got {other}"), } } @@ -209,33 +216,38 @@ impl ScalarFunction for RightPad { pub struct RightPadImpl; impl ScalarFunctionImpl for RightPadImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let mut string_buf = String::new(); - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - match inputs.len() { - 2 => BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |s, count, buf| { + match input.arrays().len() { + 2 => BinaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &count, buf| { rpad(s, count, " ", &mut string_buf); buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, - |s, count, pad, buf| { - rpad(s, count, pad, &mut string_buf); - buf.put(&string_buf); - }, - ), + 3 => { + TernaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &count, pad, buf| { + rpad(s, count, pad, &mut string_buf); + buf.put(&string_buf); + }, + ) + } other => unreachable!("num inputs checked, got {other}"), } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs index 34b2ca556..cd1037423 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs @@ -1,11 +1,14 @@ use rayexec_error::{Result, ResultExt}; use regex::Regex; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -98,34 +101,42 @@ pub struct RegexpReplaceImpl { } impl ScalarFunctionImpl for RegexpReplaceImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); match (self.pattern.as_ref(), self.replacement.as_ref()) { (Some(pattern), Some(replacement)) => { - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - // TODO: Flags to more many. - let out = pattern.replace(s, replacement); - buf.put(out.as_ref()); - }) + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| { + // TODO: Flags to more many. + let out = pattern.replace(s, replacement); + buf.put(out.as_ref()); + }, + ) + } + (Some(pattern), None) => { + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, replacement, buf| { + let out = pattern.replace(s, replacement); + buf.put(out.as_ref()); + }, + ) } - (Some(pattern), None) => BinaryExecutor::execute::( - inputs[0], - inputs[2], - builder, - |s, replacement, buf| { - let out = pattern.replace(s, replacement); - buf.put(out.as_ref()); - }, - ), (None, Some(replacement)) => { - BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, |s, pattern, buf| { let pattern = match Regex::new(pattern) { Ok(pattern) => pattern, @@ -141,11 +152,14 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { ) } (None, None) => { - TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, + TernaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, |s, pattern, replacement, buf| { let pattern = match Regex::new(pattern) { Ok(pattern) => pattern, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs index b725787bc..a84e1fe54 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs @@ -2,11 +2,12 @@ use std::fmt::Debug; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -65,27 +66,25 @@ impl ScalarFunction for Repeat { pub struct RepeatUtf8Impl; impl ScalarFunctionImpl for RepeatUtf8Impl { - fn execute(&self, inputs: &[&Array]) -> Result { - let strings = inputs[0]; - let nums = inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let counts = &input.arrays()[1]; - // TODO: Capacity + let mut str_buf = String::new(); - let mut string_buf = String::new(); - - BinaryExecutor::execute::( + BinaryExecutor::execute::( strings, - nums, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(strings.logical_len()), - }, - |s, num, buf| { - string_buf.clear(); + sel, + counts, + sel, + OutBuffer::from_array(output)?, + |s, &num, buf| { + str_buf.clear(); for _ in 0..num { - string_buf.push_str(s); + str_buf.push_str(s); } - buf.put(string_buf.as_str()) + buf.put(str_buf.as_str()) }, ) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs index 9c7113461..af5684316 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,23 +86,23 @@ pub struct StartsWithImpl { } impl ScalarFunctionImpl for StartsWithImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); match self.constant.as_ref() { - Some(constant) => { - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.starts_with(constant)) - }) - } - None => BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.starts_with(c)), + Some(prefix) => UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| buf.put(&s.starts_with(prefix)), + ), + None => BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, prefix, buf| buf.put(&s.starts_with(prefix)), ), } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index d338a27cc..82c5ed1db 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -105,16 +107,16 @@ impl ScalarFunction for Substring { pub struct SubstringFromImpl; impl ScalarFunctionImpl for SubstringFromImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let len = inputs[0].logical_len(); - BinaryExecutor::execute::( - inputs[0], - inputs[1], - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(len), - }, - |s, from, buf| buf.put(substring_from(s, from)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &from, buf| buf.put(substring_from(s, from)), ) } } @@ -123,17 +125,18 @@ impl ScalarFunctionImpl for SubstringFromImpl { pub struct SubstringFromToImpl; impl ScalarFunctionImpl for SubstringFromToImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let len = inputs[0].logical_len(); - TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(len), - }, - |s, from, count, buf| buf.put(substring_from_count(s, from, count)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + TernaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &from, &count, buf| buf.put(substring_from_count(s, from, count)), ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index d36f04e87..0cb13955e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -3,11 +3,13 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -222,16 +224,18 @@ impl TrimWhitespaceImpl { } impl ScalarFunctionImpl for TrimWhitespaceImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { - let trimmed = F::trim_func(s, " "); - buf.put(trimmed) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let trimmed = F::trim_func(s, " "); + buf.put(trimmed); + }, + ) } } @@ -247,19 +251,18 @@ impl TrimPatternImpl { } impl ScalarFunctionImpl for TrimPatternImpl { - fn execute(&self, inputs: &[&Array]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - - BinaryExecutor::execute::( - inputs[0], - inputs[1], - builder, + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, |s, pattern, buf| { let trimmed = F::trim_func(s, pattern); - buf.put(trimmed) + buf.put(trimmed); }, ) } diff --git a/crates/rayexec_execution/src/functions/scalar/mod.rs b/crates/rayexec_execution/src/functions/scalar/mod.rs index 22941656d..c68b9283a 100644 --- a/crates/rayexec_execution/src/functions/scalar/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/mod.rs @@ -7,7 +7,9 @@ use dyn_clone::DynClone; use rayexec_error::Result; use super::FunctionInfo; -use crate::arrays::array::Array; +use crate::arrays::array::exp::Array; +use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; use crate::arrays::datatype::DataType; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -103,7 +105,21 @@ impl Hash for PlannedScalarFunction { } pub trait ScalarFunctionImpl: Debug + Sync + Send + DynClone { - fn execute(&self, inputs: &[&Array]) -> Result; + fn execute2(&self, inputs: &[&Array2]) -> Result { + unimplemented!() + } + + /// Execute the function the input batch, writing the output for each row + /// into `output` at the same index. + /// + /// `output` has the following guarantees: + /// - Has at least the primary buffer capacity needed to write the results. + /// - All validities are initalized to 'valid'. + /// - Array data can be made mutable via `try_as_mut()`. + /// + /// The batch's `selection` method should be called to determine which rows + /// should be looked at during function eval. + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()>; } impl Clone for Box { diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index aef14ab0a..f0cff25ea 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -3,15 +3,15 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalI64_2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::arrays::storage::PrimitiveStorage; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::execution::operators::{PollFinalize2, PollPush2}; use crate::expr::{self, Expression}; use crate::functions::documentation::{Category, Documentation}; use crate::functions::table::inout::{InOutPollPull, TableInOutFunction, TableInOutPartitionState}; @@ -164,7 +164,7 @@ struct SeriesParams { impl SeriesParams { /// Generate the next set of rows using the current parameters. - fn generate_next(&mut self, batch_size: usize) -> Array { + fn generate_next(&mut self, batch_size: usize) -> Array2 { debug_assert!(!self.exhausted); let mut series: Vec = Vec::new(); @@ -195,7 +195,7 @@ impl SeriesParams { self.curr = *last + self.step; } - Array::new_with_array_data(DataType::Int64, PrimitiveStorage::from(series)) + Array2::new_with_array_data(DataType::Int64, PrimitiveStorage::from(series)) } } @@ -203,7 +203,7 @@ impl SeriesParams { pub struct GenerateSeriesInOutPartitionState { batch_size: usize, /// Batch we're working on. - batch: Option, + batch: Option, /// Current row number next_row_idx: usize, /// If we're finished. @@ -215,29 +215,29 @@ pub struct GenerateSeriesInOutPartitionState { } impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, batch: Batch) -> Result { + fn poll_push(&mut self, cx: &mut Context, batch: Batch2) -> Result { if self.batch.is_some() { // Still processing current batch, come back later. self.push_waker = Some(cx.waker().clone()); if let Some(pull_waker) = self.pull_waker.take() { pull_waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } self.batch = Some(batch); self.next_row_idx = 0; - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { + fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { self.finished = true; if let Some(waker) = self.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } fn poll_pull(&mut self, cx: &mut Context) -> Result { @@ -259,15 +259,15 @@ impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { }; // Generate new params from row. - let start = UnaryExecutor::value_at::( + let start = UnaryExecutor2::value_at::( batch.column(0).unwrap(), self.next_row_idx, )?; - let end = UnaryExecutor::value_at::( + let end = UnaryExecutor2::value_at::( batch.column(1).unwrap(), self.next_row_idx, )?; - let step = UnaryExecutor::value_at::( + let step = UnaryExecutor2::value_at::( batch.column(2).unwrap(), self.next_row_idx, )?; @@ -308,7 +308,7 @@ impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { } let out = self.params.generate_next(self.batch_size); - let batch = Batch::try_new([out])?; + let batch = Batch2::try_new([out])?; let row_nums = vec![self.params.current_row_idx; batch.num_rows()]; diff --git a/crates/rayexec_execution/src/functions/table/builtin/system.rs b/crates/rayexec_execution/src/functions/table/builtin/system.rs index f0723ddeb..afa9afd66 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/system.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/system.rs @@ -7,8 +7,8 @@ use futures::future::BoxFuture; use parking_lot::Mutex; use rayexec_error::{OptionExt, RayexecError, Result}; -use crate::arrays::array::Array; -use crate::arrays::batch::Batch; +use crate::arrays::array::Array2; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; use crate::arrays::executor::builder::{ArrayDataBuffer, GermanVarlenBuffer}; @@ -42,7 +42,7 @@ pub trait SystemFunctionImpl: Debug + Sync + Send + Copy + 'static { fn schema() -> Schema; fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result; + ) -> Result; } pub type ListDatabases = SystemFunction; @@ -62,7 +62,7 @@ impl SystemFunctionImpl for ListDatabasesImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let len = databases.len(); let mut database_names = GermanVarlenBuffer::::with_len(len); @@ -78,9 +78,9 @@ impl SystemFunctionImpl for ListDatabasesImpl { ); } - Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names.into_data()), - Array::new_with_array_data(DataType::Utf8, datasources.into_data()), + Batch2::try_new([ + Array2::new_with_array_data(DataType::Utf8, database_names.into_data()), + Array2::new_with_array_data(DataType::Utf8, datasources.into_data()), ]) } } @@ -118,7 +118,7 @@ impl SystemFunctionImpl for ListFunctionsImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -231,33 +231,33 @@ impl SystemFunctionImpl for ListFunctionsImpl { Ok(()) })?; - Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), - Array::new_with_array_data(DataType::Utf8, function_names), - Array::new_with_array_data(DataType::Utf8, function_types), - Array::new_with_array_data( + Batch2::try_new([ + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), + Array2::new_with_array_data(DataType::Utf8, function_names), + Array2::new_with_array_data(DataType::Utf8, function_types), + Array2::new_with_array_data( DataType::List(ListTypeMeta::new(DataType::Utf8)), ListStorage::try_new( argument_types_metadatas, - Array::new_with_array_data(DataType::Utf8, argument_types), + Array2::new_with_array_data(DataType::Utf8, argument_types), )?, ), - Array::new_with_array_data( + Array2::new_with_array_data( DataType::List(ListTypeMeta::new(DataType::Utf8)), ListStorage::try_new( argument_names_metadatas, - Array::new_with_array_data(DataType::Utf8, argument_names), + Array2::new_with_array_data(DataType::Utf8, argument_names), )?, ), - Array::new_with_array_data(DataType::Utf8, return_types), - Array::new_with_validity_and_array_data( + Array2::new_with_array_data(DataType::Utf8, return_types), + Array2::new_with_validity_and_array_data( DataType::Utf8, descriptions_validity, descriptions, ), - Array::new_with_validity_and_array_data(DataType::Utf8, examples_validity, examples), - Array::new_with_validity_and_array_data( + Array2::new_with_validity_and_array_data(DataType::Utf8, examples_validity, examples), + Array2::new_with_validity_and_array_data( DataType::Utf8, example_outputs_validity, example_outputs, @@ -284,7 +284,7 @@ impl SystemFunctionImpl for ListTablesImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -308,10 +308,10 @@ impl SystemFunctionImpl for ListTablesImpl { Ok(()) })?; - Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), - Array::new_with_array_data(DataType::Utf8, table_names), + Batch2::try_new([ + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), + Array2::new_with_array_data(DataType::Utf8, table_names), ]) } } @@ -333,7 +333,7 @@ impl SystemFunctionImpl for ListSchemasImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -348,9 +348,9 @@ impl SystemFunctionImpl for ListSchemasImpl { Ok(()) })?; - Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), + Batch2::try_new([ + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), ]) } } @@ -469,7 +469,7 @@ struct SystemDataTableScan { } impl DataTableScan for SystemDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { if self.databases.is_empty() { return Ok(None); diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index 0535b3f5c..50baf2f58 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -3,15 +3,15 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; -use crate::arrays::batch::Batch; +use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::{PhysicalList_2, PhysicalType2}; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::execution::operators::unnest::unnest; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::execution::operators::{PollFinalize2, PollPush2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation}; use crate::functions::table::inout::{InOutPollPull, TableInOutFunction, TableInOutPartitionState}; @@ -134,7 +134,7 @@ impl TableInOutFunction for UnnestInOutImpl { #[derive(Debug)] pub struct UnnestInOutPartitionState { /// The array we're unnesting. - input: Option, + input: Option, /// Number of rows in the input batch. input_num_rows: usize, /// Current row we're processing. @@ -152,7 +152,7 @@ pub struct UnnestInOutPartitionState { } impl TableInOutPartitionState for UnnestInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch) -> Result { + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result { if self.current_row < self.input_num_rows { // Still processing inputs, come back later. self.push_waker = Some(cx.waker().clone()); @@ -160,7 +160,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { waker.wake(); } - return Ok(PollPush::Pending(inputs)); + return Ok(PollPush2::Pending(inputs)); } self.input_num_rows = inputs.num_rows(); @@ -177,17 +177,17 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { + fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { self.finished = true; if let Some(waker) = self.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } fn poll_pull(&mut self, cx: &mut Context) -> Result { @@ -207,26 +207,26 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { let input = self.input.as_ref().unwrap(); let output = match input.physical_type() { - PhysicalType::List => { + PhysicalType2::List => { let child = match input.array_data() { - ArrayData::List(list) => list.inner_array(), + ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor::value_at::(input, self.current_row)? { + match UnaryExecutor2::value_at::(input, self.current_row)? { Some(meta) => { // Row is a list, unnest. unnest(child, meta.len as usize, meta)? } None => { // Row is null, produce as single null - Array::new_typed_null_array(child.datatype().clone(), 1)? + Array2::new_typed_null_array(child.datatype().clone(), 1)? } } } - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { // Just produce null array of length 1. - Array::new_untyped_null_array(1) + Array2::new_untyped_null_array(1) } other => { return Err(RayexecError::new(format!( @@ -247,7 +247,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { } } - let batch = Batch::try_new([output])?; + let batch = Batch2::try_new([output])?; Ok(InOutPollPull::Batch { batch, row_nums }) } diff --git a/crates/rayexec_execution/src/functions/table/inout.rs b/crates/rayexec_execution/src/functions/table/inout.rs index f4b1229af..94d3901ef 100644 --- a/crates/rayexec_execution/src/functions/table/inout.rs +++ b/crates/rayexec_execution/src/functions/table/inout.rs @@ -4,8 +4,8 @@ use std::task::Context; use dyn_clone::DynClone; use rayexec_error::Result; -use crate::arrays::batch::Batch; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::arrays::batch::Batch2; +use crate::execution::operators::{PollFinalize2, PollPush2}; pub trait TableInOutFunction: Debug + Sync + Send + DynClone { fn create_states( @@ -16,14 +16,14 @@ pub trait TableInOutFunction: Debug + Sync + Send + DynClone { #[derive(Debug)] pub enum InOutPollPull { - Batch { batch: Batch, row_nums: Vec }, + Batch { batch: Batch2, row_nums: Vec }, Pending, Exhausted, } pub trait TableInOutPartitionState: Debug + Sync + Send { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch) -> Result; - fn poll_finalize_push(&mut self, cx: &mut Context) -> Result; + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result; + fn poll_finalize_push(&mut self, cx: &mut Context) -> Result; fn poll_pull(&mut self, cx: &mut Context) -> Result; } diff --git a/crates/rayexec_execution/src/hybrid/buffer.rs b/crates/rayexec_execution/src/hybrid/buffer.rs index ffba013b8..900af7014 100644 --- a/crates/rayexec_execution/src/hybrid/buffer.rs +++ b/crates/rayexec_execution/src/hybrid/buffer.rs @@ -12,7 +12,7 @@ use tracing::debug; use uuid::Uuid; use super::client::{IpcBatch, PullStatus}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::StreamId; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -95,7 +95,7 @@ impl ServerStreamBuffers { Ok(error_sink.value().clone()) } - pub fn push_batch_for_stream(&self, stream_id: &StreamId, batch: Batch) -> Result<()> { + pub fn push_batch_for_stream(&self, stream_id: &StreamId, batch: Batch2) -> Result<()> { let incoming = self.incoming.get(stream_id).ok_or_else(|| { RayexecError::new(format!("Missing incoming stream with id: {stream_id:?}")) })?; @@ -192,7 +192,7 @@ pub struct OutgoingPartitionStream { } impl PartitionSink for OutgoingPartitionStream { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(OutgoingPushFuture { batch: Some(batch), state: self.state.clone(), @@ -209,13 +209,13 @@ impl PartitionSink for OutgoingPartitionStream { #[derive(Debug)] struct OutgoingStreamState { finished: bool, - batch: Option, + batch: Option, push_waker: Option, error_sink: Arc, } struct OutgoingPushFuture { - batch: Option, + batch: Option, state: Arc>, } @@ -282,7 +282,7 @@ pub struct IncomingPartitionStream { } impl PartitionSource for IncomingPartitionStream { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(IncomingPullFuture { state: self.state.clone(), }) @@ -292,7 +292,7 @@ impl PartitionSource for IncomingPartitionStream { #[derive(Debug)] struct IncomingStreamState { finished: bool, - batches: VecDeque, + batches: VecDeque, pull_waker: Option, } @@ -301,7 +301,7 @@ struct IncomingPullFuture { } impl Future for IncomingPullFuture { - type Output = Result>; + type Output = Result>; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut state = self.state.lock(); diff --git a/crates/rayexec_execution/src/hybrid/client.rs b/crates/rayexec_execution/src/hybrid/client.rs index a9d430a4f..b3d436d63 100644 --- a/crates/rayexec_execution/src/hybrid/client.rs +++ b/crates/rayexec_execution/src/hybrid/client.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use url::{Host, Url}; use uuid::Uuid; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::{IntermediatePipelineGroup, StreamId}; @@ -310,7 +310,7 @@ impl ProtoConv for PullStatus { /// Wrapper around a batch that implements IPC encoding/decoding when converting /// to protobuf. #[derive(Debug)] -pub struct IpcBatch(pub Batch); +pub struct IpcBatch(pub Batch2); // TODO: Don't allocate vectors in this. impl ProtoConv for IpcBatch { @@ -478,7 +478,7 @@ impl HybridClient { Ok(()) } - pub async fn push(&self, stream_id: StreamId, partition: usize, batch: Batch) -> Result<()> { + pub async fn push(&self, stream_id: StreamId, partition: usize, batch: Batch2) -> Result<()> { let url = self .url .join(REMOTE_ENDPOINTS.rpc_hybrid_push) diff --git a/crates/rayexec_execution/src/hybrid/stream.rs b/crates/rayexec_execution/src/hybrid/stream.rs index c997eb591..ef8ec0ce4 100644 --- a/crates/rayexec_execution/src/hybrid/stream.rs +++ b/crates/rayexec_execution/src/hybrid/stream.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use rayexec_io::http::HttpClient; use super::client::{HybridClient, PullStatus}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::StreamId; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -64,7 +64,7 @@ pub struct ClientToServerPartitionSink { } impl PartitionSink for ClientToServerPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { // TODO: Figure out backpressure Box::pin(async { self.client.push(self.stream_id, 0, batch).await }) } @@ -116,7 +116,7 @@ pub struct ServerToClientPartitionSource { } impl PartitionSource for ServerToClientPartitionSource { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { // TODO: Backoff + hint somehow loop { diff --git a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs index ebf145831..29de5a46e 100644 --- a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs +++ b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs @@ -1,8 +1,8 @@ -use rayexec_error::{RayexecError, Result}; +use rayexec_error::Result; use super::ExpressionRewriteRule; -use crate::arrays::batch::Batch; use crate::expr::literal_expr::LiteralExpr; +use crate::expr::physical::evaluator::ExpressionEvaluator; use crate::expr::physical::planner::PhysicalExpressionPlanner; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -26,28 +26,11 @@ fn maybe_fold(table_list: &TableList, expr: &mut Expression) -> Result<()> { if expr.is_const_foldable() { let planner = PhysicalExpressionPlanner::new(table_list); let phys_expr = planner.plan_scalar(&[], expr)?; - let dummy = Batch::empty_with_num_rows(1); - let val = phys_expr.eval(&dummy)?; - - if val.logical_len() != 1 { - return Err(RayexecError::new(format!( - "Expected 1 value from const eval, got {}", - val.logical_len() - ))); - } - - let val = val - .logical_value(0) // Len checked above. - .map_err(|_| { - RayexecError::new(format!( - "Failed to get folded scalar value from expression: {expr}" - )) - })?; + let mut evaluator = ExpressionEvaluator::try_new(vec![phys_expr], 1)?; + let val = evaluator.try_eval_constant()?; // Our brand new expression. - *expr = Expression::Literal(LiteralExpr { - literal: val.into_owned(), - }); + *expr = Expression::Literal(LiteralExpr { literal: val }); return Ok(()); } diff --git a/crates/rayexec_execution/src/storage/memory.rs b/crates/rayexec_execution/src/storage/memory.rs index d0d1efa7b..fc9e34509 100644 --- a/crates/rayexec_execution/src/storage/memory.rs +++ b/crates/rayexec_execution/src/storage/memory.rs @@ -5,7 +5,7 @@ use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; use super::table_storage::{DataTable, DataTableScan, ProjectedScan, Projections, TableStorage}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog_entry::CatalogEntry; use crate::execution::computed_batch::ComputedBatches; use crate::execution::operators::sink::PartitionSink; @@ -84,7 +84,7 @@ impl TableStorage for MemoryTableStorage { #[derive(Debug, Clone, Default)] pub struct MemoryDataTable { - data: Arc>>, + data: Arc>>, } impl DataTable for MemoryDataTable { @@ -129,11 +129,11 @@ impl DataTable for MemoryDataTable { #[derive(Debug)] pub struct MemoryDataTableScan { - data: Vec, + data: Vec, } impl DataTableScan for MemoryDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { Ok(self.data.pop()) }) } } @@ -142,11 +142,11 @@ impl DataTableScan for MemoryDataTableScan { pub struct MemoryDataTableInsert { resizer: BatchResizer, // TODO: Need to replace. collected: Vec, - data: Arc>>, + data: Arc>>, } impl PartitionSink for MemoryDataTableInsert { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { let batches = self.resizer.try_push(batch)?; if batches.is_empty() { diff --git a/crates/rayexec_execution/src/storage/table_storage.rs b/crates/rayexec_execution/src/storage/table_storage.rs index 258f69cff..6e884e576 100644 --- a/crates/rayexec_execution/src/storage/table_storage.rs +++ b/crates/rayexec_execution/src/storage/table_storage.rs @@ -4,7 +4,7 @@ use futures::future::BoxFuture; use rayexec_error::{RayexecError, Result}; use rayexec_proto::ProtoConv; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog_entry::CatalogEntry; use crate::execution::operators::sink::PartitionSink; @@ -99,7 +99,7 @@ pub trait DataTableScan: Debug + Send { /// Pull the next batch in the scan. /// /// Returns None if the scan is exhausted. - fn pull(&mut self) -> BoxFuture<'_, Result>>; + fn pull(&mut self) -> BoxFuture<'_, Result>>; } /// Helper for wrapping an unprojected scan with a projections list to produce @@ -118,7 +118,7 @@ impl ProjectedScan { ProjectedScan { projections, scan } } - async fn pull_inner(&mut self) -> Result> { + async fn pull_inner(&mut self) -> Result> { let batch = match self.scan.pull().await? { Some(batch) => batch, None => return Ok(None), @@ -135,7 +135,7 @@ impl ProjectedScan { } impl DataTableScan for ProjectedScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.pull_inner().await }) } } @@ -145,7 +145,7 @@ impl DataTableScan for ProjectedScan { pub struct EmptyTableScan; impl DataTableScan for EmptyTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async move { Ok(None) }) } } diff --git a/crates/rayexec_iceberg/src/datatable.rs b/crates/rayexec_iceberg/src/datatable.rs index 1be505567..6931c5e38 100644 --- a/crates/rayexec_iceberg/src/datatable.rs +++ b/crates/rayexec_iceberg/src/datatable.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; use crate::table::{Table, TableScan}; @@ -34,7 +34,7 @@ struct IcebergTableScan { } impl DataTableScan for IcebergTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.scan.read_next().await }) } } diff --git a/crates/rayexec_iceberg/src/table.rs b/crates/rayexec_iceberg/src/table.rs index f5a08175a..d0fcac93e 100644 --- a/crates/rayexec_iceberg/src/table.rs +++ b/crates/rayexec_iceberg/src/table.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use futures::StreamExt; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::storage::table_storage::Projections; use rayexec_io::location::{AccessConfig, FileLocation}; @@ -276,7 +276,7 @@ pub struct TableScan { } impl TableScan { - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { loop { if self.current.is_none() { let file = match self.files.pop_front() { diff --git a/crates/rayexec_parquet/src/copy_to.rs b/crates/rayexec_parquet/src/copy_to.rs index 1bf3fbb42..0c20dff30 100644 --- a/crates/rayexec_parquet/src/copy_to.rs +++ b/crates/rayexec_parquet/src/copy_to.rs @@ -3,7 +3,7 @@ use std::fmt; use futures::future::BoxFuture; use futures::FutureExt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -47,7 +47,7 @@ pub struct ParquetCopyToSink { } impl ParquetCopyToSink { - async fn push_inner(&mut self, batch: Batch) -> Result<()> { + async fn push_inner(&mut self, batch: Batch2) -> Result<()> { self.writer.write(&batch).await?; Ok(()) } @@ -59,7 +59,7 @@ impl ParquetCopyToSink { } impl PartitionSink for ParquetCopyToSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { self.push_inner(batch).boxed() } diff --git a/crates/rayexec_parquet/src/functions/datatable.rs b/crates/rayexec_parquet/src/functions/datatable.rs index bf00983bd..1859526a7 100644 --- a/crates/rayexec_parquet/src/functions/datatable.rs +++ b/crates/rayexec_parquet/src/functions/datatable.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::runtime::Runtime; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; @@ -71,7 +71,7 @@ struct RowGroupsScan { } impl DataTableScan for RowGroupsScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.reader.read_next().await }) } } diff --git a/crates/rayexec_parquet/src/reader/mod.rs b/crates/rayexec_parquet/src/reader/mod.rs index 15d8c4106..1a592824a 100644 --- a/crates/rayexec_parquet/src/reader/mod.rs +++ b/crates/rayexec_parquet/src/reader/mod.rs @@ -19,8 +19,8 @@ use parquet::file::reader::{ChunkReader, Length, SerializedPageReader}; use parquet::schema::types::ColumnDescPtr; use primitive::PrimitiveArrayReader; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array, ArrayData}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Schema; @@ -32,7 +32,7 @@ use crate::metadata::Metadata; pub trait ArrayBuilder: Send { /// Consume the current buffer and build an array. - fn build(&mut self) -> Result; + fn build(&mut self) -> Result; /// Sets the page reader the builder should now be reading from. fn set_page_reader(&mut self, page_reader: P) -> Result<()>; @@ -115,7 +115,7 @@ where /// Trait for converting a buffer of values into array data. pub trait IntoArrayData { - fn into_array_data(self) -> ArrayData; + fn into_array_data(self) -> ArrayData2; } pub fn def_levels_into_bitmap(def_levels: Vec) -> Bitmap { @@ -232,7 +232,7 @@ impl AsyncBatchReader { }) } - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { if self.current_row_group.is_none() { match self.row_groups.pop_front() { Some(group) => { @@ -266,7 +266,7 @@ impl AsyncBatchReader { /// Try to read the next batch from the array builders. /// /// Returns Ok(None) when there's nothing left to read. - fn maybe_read_batch(&mut self) -> Result> { + fn maybe_read_batch(&mut self) -> Result> { for state in self.column_states.iter_mut() { state.builder.read_rows(self.batch_size)?; } @@ -276,7 +276,7 @@ impl AsyncBatchReader { .map(|state| state.builder.build()) .collect::>>()?; - let batch = Batch::try_new(arrays)?; + let batch = Batch2::try_new(arrays)?; if batch.num_rows() == 0 { Ok(None) diff --git a/crates/rayexec_parquet/src/reader/primitive.rs b/crates/rayexec_parquet/src/reader/primitive.rs index 81e8c615f..f7ccb09e1 100644 --- a/crates/rayexec_parquet/src/reader/primitive.rs +++ b/crates/rayexec_parquet/src/reader/primitive.rs @@ -4,10 +4,8 @@ use parquet::column::reader::basic::BasicColumnValueDecoder; use parquet::data_type::{DataType as ParquetDataType, Int96}; use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::bitmap::Bitmap; -use rayexec_execution::arrays::compute::cast::array::cast_array; -use rayexec_execution::arrays::compute::cast::behavior::CastFailBehavior; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::storage::{BooleanStorage, PrimitiveStorage}; @@ -43,7 +41,7 @@ where } /// Take the currently read values and convert into an array. - pub fn take_array(&mut self) -> Result { + pub fn take_array(&mut self) -> Result { let def_levels = self.values_reader.take_def_levels(); let _rep_levels = self.values_reader.take_rep_levels(); @@ -86,14 +84,17 @@ where let needs_cast = build_type != self.datatype; - let mut array = match bitmap { - Some(bitmap) => Array::new_with_validity_and_array_data(build_type, bitmap, array_data), - None => Array::new_with_array_data(build_type, array_data), + let array = match bitmap { + Some(bitmap) => { + Array2::new_with_validity_and_array_data(build_type, bitmap, array_data) + } + None => Array2::new_with_array_data(build_type, array_data), }; - if needs_cast { - array = cast_array(&array, self.datatype.clone(), CastFailBehavior::Null)?; - } + // TODO + // if needs_cast { + // array = cast_array(&array, self.datatype.clone(), CastFailBehavior::Null)?; + // } Ok(array) } @@ -106,7 +107,7 @@ where T::T: Copy + Default, Vec: IntoArrayData, { - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { self.take_array() } @@ -121,7 +122,7 @@ where } impl IntoArrayData for Vec { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { let values = Bitmap::from_iter(self); BooleanStorage::from(values).into() } @@ -130,7 +131,7 @@ impl IntoArrayData for Vec { macro_rules! impl_into_array_primitive { ($prim:ty) => { impl IntoArrayData for Vec<$prim> { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { PrimitiveStorage::from(self).into() } } @@ -151,7 +152,7 @@ impl_into_array_primitive!(f32); impl_into_array_primitive!(f64); impl IntoArrayData for Vec { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { let values: Vec<_> = self.into_iter().map(|v| v.to_nanos()).collect(); PrimitiveStorage::from(values).into() } diff --git a/crates/rayexec_parquet/src/reader/varlen.rs b/crates/rayexec_parquet/src/reader/varlen.rs index 5091e6f19..32ecdf837 100644 --- a/crates/rayexec_parquet/src/reader/varlen.rs +++ b/crates/rayexec_parquet/src/reader/varlen.rs @@ -5,7 +5,7 @@ use parquet::data_type::{ByteArray, DataType as ParquetDataType}; use parquet::decoding::view::ViewBuffer; use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::executor::builder::ArrayDataBuffer; @@ -32,7 +32,7 @@ where } } - pub fn take_array(&mut self) -> Result { + pub fn take_array(&mut self) -> Result { let def_levels = self.values_reader.take_def_levels(); let _rep_levels = self.values_reader.take_rep_levels(); @@ -55,10 +55,10 @@ where // The "null" values will just be zeroed metadata fields. insert_null_values(buffer.metadata_mut(), &bitmap); - Array::new_with_validity_and_array_data(self.datatype.clone(), bitmap, buffer.into_data()) + Array2::new_with_validity_and_array_data(self.datatype.clone(), bitmap, buffer.into_data()) } None => { - Array::new_with_array_data(self.datatype.clone(), view_buffer.into_buffer().into_data()) + Array2::new_with_array_data(self.datatype.clone(), view_buffer.into_buffer().into_data()) } } } @@ -73,7 +73,7 @@ impl

ArrayBuilder

for VarlenArrayReader

where P: PageReader, { - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { self.take_array() } diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index 1e380b9df..149f17914 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -12,10 +12,10 @@ use parquet::file::writer::{write_page, SerializedFileWriter}; use parquet::format::FileMetaData; use parquet::schema::types::SchemaDescriptor; use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array, ArrayData}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; -use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage}; +use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary_2, PhysicalStorage2}; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::storage::AddressableStorage; use rayexec_io::FileSink; @@ -59,7 +59,7 @@ impl AsyncBatchWriter { } /// Encode and write a batch to the underlying file sink. - pub async fn write(&mut self, batch: &Batch) -> Result<()> { + pub async fn write(&mut self, batch: &Batch2) -> Result<()> { if batch.num_rows() == 0 { return Ok(()); } @@ -178,7 +178,7 @@ impl RowGroupWriter { }) } - fn write(&mut self, batch: &Batch) -> Result<()> { + fn write(&mut self, batch: &Batch2) -> Result<()> { for (writer, col) in self.column_writers.iter_mut().zip(batch.columns()) { if col.has_selection() { let unselected_array = col.unselect()?; @@ -234,7 +234,7 @@ impl PageWriter for BufferedPageWriter { /// Write an array into the column writer. // TODO: Validity. -fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Result<()> { +fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> Result<()> { if array.has_selection() { return Err(RayexecError::new( "Array needs to be unselected before it can be written", @@ -244,7 +244,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re match writer { ColumnWriter::BoolColumnWriter(writer) => { match array.array_data() { - ArrayData::Boolean(d) => { + ArrayData2::Boolean(d) => { let bools: Vec<_> = d.as_ref().as_ref().iter().collect(); writer .write_batch(&bools, None, None) @@ -255,13 +255,13 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re } } ColumnWriter::Int32ColumnWriter(writer) => match array.array_data() { - ArrayData::Int32(d) => { + ArrayData2::Int32(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write i32 data")?; Ok(()) } - ArrayData::UInt32(d) => { + ArrayData2::UInt32(d) => { // SAFETY: u32 and i32 safe to cast to/from. This follows // upstream behavior. let data = unsafe { d.try_reintepret_cast::()? }; @@ -273,13 +273,13 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re _ => Err(RayexecError::new("expected i32/u32 data")), }, ColumnWriter::Int64ColumnWriter(writer) => match array.array_data() { - ArrayData::Int64(d) => { + ArrayData2::Int64(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write i64 data")?; Ok(()) } - ArrayData::UInt64(d) => { + ArrayData2::UInt64(d) => { // SAFETY: u64 and i64 safe to cast to/from. This follows // upstream behavior. let data = unsafe { d.try_reintepret_cast::()? }; @@ -291,7 +291,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re _ => Err(RayexecError::new("expected i64/u64 data")), }, ColumnWriter::FloatColumnWriter(writer) => match array.array_data() { - ArrayData::Float32(d) => { + ArrayData2::Float32(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write f32 data")?; @@ -300,7 +300,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re _ => Err(RayexecError::new("expected f32 data")), }, ColumnWriter::DoubleColumnWriter(writer) => match array.array_data() { - ArrayData::Float64(d) => { + ArrayData2::Float64(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write f64 data")?; @@ -309,11 +309,11 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Re _ => Err(RayexecError::new("expected f64 data")), }, ColumnWriter::ByteArrayColumnWriter(writer) => match array.array_data() { - ArrayData::Binary(_) => { + ArrayData2::Binary(_) => { // TODO: Try not to copy here. There's a hard requirement on the // physical type being `Bytes`, and so a conversion needs to // happen somewhere. - let storage = PhysicalBinary::get_storage(array.array_data())?; + let storage = PhysicalBinary_2::get_storage(array.array_data())?; let mut data = Vec::with_capacity(storage.len()); for idx in 0..storage.len() { let val = storage.get(idx).required("binary data")?; diff --git a/crates/rayexec_postgres/src/lib.rs b/crates/rayexec_postgres/src/lib.rs index 0f5e1e344..a91fe8698 100644 --- a/crates/rayexec_postgres/src/lib.rs +++ b/crates/rayexec_postgres/src/lib.rs @@ -11,8 +11,8 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; use futures::{StreamExt, TryFutureExt}; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::Array; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::Array2; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DecimalTypeMeta}; use rayexec_execution::arrays::field::Field; use rayexec_execution::arrays::scalar::OwnedScalarValue; @@ -236,11 +236,11 @@ impl DataTable for PostgresDataTable { } pub struct PostgresDataTableScan { - stream: BoxStream<'static, Result>, + stream: BoxStream<'static, Result>, } impl DataTableScan for PostgresDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.stream.next().await.transpose() }) } } @@ -390,7 +390,7 @@ impl PostgresClient { Ok(fields) } - fn binary_rows_to_batch(typs: &[DataType], rows: Vec) -> Result { + fn binary_rows_to_batch(typs: &[DataType], rows: Vec) -> Result { fn row_iter<'a, T: FromSql<'a>>( rows: &'a [BinaryCopyOutRow], idx: usize, @@ -401,32 +401,32 @@ impl PostgresClient { let mut arrays = Vec::with_capacity(typs.len()); for (idx, typ) in typs.iter().enumerate() { let arr = match typ { - DataType::Boolean => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int8 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int16 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int32 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int64 => Array::from_iter(row_iter::(&rows, idx)), + DataType::Boolean => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int8 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int16 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int32 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int64 => Array2::from_iter(row_iter::(&rows, idx)), DataType::Decimal128(m) => { - let primitives = Array::from_iter(rows.iter().map(|row| { + let primitives = Array2::from_iter(rows.iter().map(|row| { let decimal = row.try_get::(idx).ok(); // TODO: Rescale decimal.map(|d| d.0.value) })); match primitives.validity() { - Some(validity) => Array::new_with_validity_and_array_data( + Some(validity) => Array2::new_with_validity_and_array_data( DataType::Decimal128(DecimalTypeMeta::new(m.precision, m.scale)), validity.clone(), primitives.array_data().clone(), ), - None => Array::new_with_array_data( + None => Array2::new_with_array_data( DataType::Decimal128(DecimalTypeMeta::new(m.precision, m.scale)), primitives.array_data().clone(), ), } } - DataType::Utf8 => Array::from_iter( + DataType::Utf8 => Array2::from_iter( rows.iter() .map(|row| -> Option<&str> { row.try_get(idx).ok() }), ), @@ -439,6 +439,6 @@ impl PostgresClient { arrays.push(arr); } - Batch::try_new(arrays) + Batch2::try_new(arrays) } } diff --git a/crates/rayexec_shell/src/result_table.rs b/crates/rayexec_shell/src/result_table.rs index 208c0f64e..519a7a6ec 100644 --- a/crates/rayexec_shell/src/result_table.rs +++ b/crates/rayexec_shell/src/result_table.rs @@ -5,8 +5,8 @@ use std::task::{Context, Poll}; use futures::stream::Stream; use futures::{StreamExt, TryStreamExt}; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::Array; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::Array2; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::pretty::table::PrettyTable; use rayexec_execution::arrays::row::ScalarRow; @@ -61,7 +61,7 @@ impl StreamingTable { } impl Stream for StreamingTable { - type Item = Result; + type Item = Result; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { self.result.stream.poll_next_unpin(cx) @@ -71,7 +71,7 @@ impl Stream for StreamingTable { #[derive(Debug, Clone, PartialEq)] pub struct MaterializedResultTable { pub(crate) schema: Schema, - pub(crate) batches: Vec, + pub(crate) batches: Vec, pub(crate) planning_profile: Option, pub(crate) execution_profile: Option, } @@ -80,7 +80,7 @@ impl MaterializedResultTable { /// Create a new materialized result table. /// /// Mostly for testing. - pub fn try_new(schema: Schema, batches: impl IntoIterator) -> Result { + pub fn try_new(schema: Schema, batches: impl IntoIterator) -> Result { let batches: Vec<_> = batches.into_iter().collect(); for batch in &batches { if batch.columns().len() != schema.fields.len() { @@ -126,7 +126,7 @@ impl MaterializedResultTable { PrettyTable::try_new(&self.schema, &self.batches, width, max_rows) } - pub fn iter_batches(&self) -> impl Iterator { + pub fn iter_batches(&self) -> impl Iterator { self.batches.iter() } @@ -148,7 +148,7 @@ impl MaterializedResultTable { /// within that array. pub fn with_cell(&self, cell_fn: F, col: usize, row: usize) -> Result where - F: Fn(&Array, usize) -> Result, + F: Fn(&Array2, usize) -> Result, { let (batch_idx, row) = find_normalized_row(row, self.batches.iter().map(|b| b.num_rows())) .ok_or_else(|| RayexecError::new(format!("Row out of range: {}", row)))?; @@ -185,7 +185,7 @@ impl MaterializedResultTable { #[derive(Debug, Clone, PartialEq)] pub struct MaterializedColumn { - pub(crate) arrays: Vec, + pub(crate) arrays: Vec, } impl MaterializedColumn { @@ -199,7 +199,7 @@ impl MaterializedColumn { pub fn with_row(&self, row_fn: F, row: usize) -> Result where - F: Fn(&Array, usize) -> Result, + F: Fn(&Array2, usize) -> Result, { let (arr_idx, row) = find_normalized_row(row, self.arrays.iter().map(|arr| arr.logical_len())) diff --git a/crates/rayexec_unity_catalog/src/functions.rs b/crates/rayexec_unity_catalog/src/functions.rs index 5fe88720a..b8daad5b2 100644 --- a/crates/rayexec_unity_catalog/src/functions.rs +++ b/crates/rayexec_unity_catalog/src/functions.rs @@ -7,8 +7,8 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; use futures::{FutureExt, TryStreamExt}; use rayexec_error::Result; -use rayexec_execution::arrays::array::Array; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::Array2; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DataTypeId}; use rayexec_execution::arrays::field::{Field, Schema}; use rayexec_execution::arrays::scalar::OwnedScalarValue; @@ -66,7 +66,7 @@ pub trait UnityObjectsOperation: /// Read the next batch from the stream. /// /// Returns Ok(None) when stream is finished. - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>>; + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>>; } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -128,18 +128,18 @@ impl UnityObjectsOperation for ListSchemasOperation { Ok(ListSchemasStreamState { stream }) } - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { Box::pin(async { let resp = state.stream.try_next().await?; match resp { Some(resp) => { - let names = Array::from_iter(resp.schemas.iter().map(|s| s.name.as_str())); + let names = Array2::from_iter(resp.schemas.iter().map(|s| s.name.as_str())); let catalog_names = - Array::from_iter(resp.schemas.iter().map(|s| s.catalog_name.as_str())); + Array2::from_iter(resp.schemas.iter().map(|s| s.catalog_name.as_str())); let comments = - Array::from_iter(resp.schemas.iter().map(|s| s.comment.as_deref())); + Array2::from_iter(resp.schemas.iter().map(|s| s.comment.as_deref())); - let batch = Batch::try_new([names, catalog_names, comments])?; + let batch = Batch2::try_new([names, catalog_names, comments])?; Ok(Some(batch)) } None => Ok(None), @@ -216,26 +216,27 @@ impl UnityObjectsOperation for ListTablesOperation { Ok(ListTablesStreamState { stream }) } - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { Box::pin(async { let resp = state.stream.try_next().await?; match resp { Some(resp) => { - let names = Array::from_iter(resp.tables.iter().map(|s| s.name.as_str())); + let names = Array2::from_iter(resp.tables.iter().map(|s| s.name.as_str())); let catalog_names = - Array::from_iter(resp.tables.iter().map(|s| s.catalog_name.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.catalog_name.as_str())); let schema_names = - Array::from_iter(resp.tables.iter().map(|s| s.schema_name.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.schema_name.as_str())); let table_types = - Array::from_iter(resp.tables.iter().map(|s| s.table_type.as_str())); - let data_source_formats = - Array::from_iter(resp.tables.iter().map(|s| s.data_source_format.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.table_type.as_str())); + let data_source_formats = Array2::from_iter( + resp.tables.iter().map(|s| s.data_source_format.as_str()), + ); let storage_locations = - Array::from_iter(resp.tables.iter().map(|s| s.storage_location.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.storage_location.as_str())); let comments = - Array::from_iter(resp.tables.iter().map(|s| s.comment.as_deref())); + Array2::from_iter(resp.tables.iter().map(|s| s.comment.as_deref())); - let batch = Batch::try_new([ + let batch = Batch2::try_new([ names, catalog_names, schema_names, @@ -352,7 +353,7 @@ pub struct UnityObjectsDataTableScan> { } impl> DataTableScan for UnityObjectsDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { O::next_batch(&mut self.stream) } } diff --git a/crates/rayexec_wasm/src/session.rs b/crates/rayexec_wasm/src/session.rs index 0c3e693d3..eb39d2873 100644 --- a/crates/rayexec_wasm/src/session.rs +++ b/crates/rayexec_wasm/src/session.rs @@ -182,8 +182,8 @@ impl WasmMaterializedColumn { #[cfg(test)] mod tests { - use rayexec_execution::arrays::array::Array; - use rayexec_execution::arrays::batch::Batch; + use rayexec_execution::arrays::array::Array2; + use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::{Field, Schema}; @@ -194,9 +194,9 @@ mod tests { let table = MaterializedResultTable::try_new( Schema::new([Field::new("c1", DataType::Int32, true)]), [ - Batch::try_new([Array::from_iter([0, 1, 2, 3])]).unwrap(), - Batch::try_new([Array::from_iter([4, 5])]).unwrap(), - Batch::try_new([Array::from_iter([6, 7, 8, 9, 10])]).unwrap(), + Batch2::try_new([Array2::from_iter([0, 1, 2, 3])]).unwrap(), + Batch2::try_new([Array2::from_iter([4, 5])]).unwrap(), + Batch2::try_new([Array2::from_iter([6, 7, 8, 9, 10])]).unwrap(), ], ) .unwrap(); diff --git a/crates/stdutil/Cargo.toml b/crates/stdutil/Cargo.toml new file mode 100644 index 000000000..7cb3e1417 --- /dev/null +++ b/crates/stdutil/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "stdutil" +version.workspace = true +edition.workspace = true + +[dependencies] diff --git a/crates/stdutil/src/iter.rs b/crates/stdutil/src/iter.rs new file mode 100644 index 000000000..f77a6fda8 --- /dev/null +++ b/crates/stdutil/src/iter.rs @@ -0,0 +1,35 @@ +/// Similar to `IntoIterator`, but for an iterator with an exact size. +pub trait IntoExactSizeIterator { + type Item; + type IntoIter: ExactSizeIterator; + + /// Converts self into the `ExactSizeIteror`. + fn into_iter(self) -> Self::IntoIter; +} + +/// Auto-implement for any exact size iterator. +impl IntoExactSizeIterator for I +where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, +{ + type Item = I::Item; + type IntoIter = I::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.into_iter() + } +} + +pub trait FromExactSizeIterator: Sized { + /// Create Self from an exact size iterator. + fn from_iter>(iter: T) -> Self; +} + +pub trait TryFromExactSizeIterator: Sized { + /// Error type that will be returned. + type Error; + + /// Try to create Self from an exact size iterator. + fn try_from_iter>(iter: T) -> Result; +} diff --git a/crates/stdutil/src/lib.rs b/crates/stdutil/src/lib.rs new file mode 100644 index 000000000..854431330 --- /dev/null +++ b/crates/stdutil/src/lib.rs @@ -0,0 +1,4 @@ +//! Utilities that are closely related to items found in std. + +pub mod iter; +pub mod marker; diff --git a/crates/stdutil/src/marker.rs b/crates/stdutil/src/marker.rs new file mode 100644 index 000000000..b835146fb --- /dev/null +++ b/crates/stdutil/src/marker.rs @@ -0,0 +1,42 @@ +use std::marker::PhantomData; + +/// Marker type that indicates covariance of `T` but does not inherit the bounds +/// of `T`. +/// +/// Has all the same properties of `PhantomData` minus the inherited trait +/// bounds. This lets us make structs and other types covariant to `T` but +/// without the potential inheritence of `?Sized` (or other undesired traits) in +/// the outer type. +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PhantomCovariant(PhantomData T>) +where + T: ?Sized; + +impl PhantomCovariant +where + T: ?Sized, +{ + pub const fn new() -> Self { + PhantomCovariant(PhantomData) + } +} + +impl Clone for PhantomCovariant +where + T: ?Sized, +{ + fn clone(&self) -> Self { + *self + } +} + +impl Copy for PhantomCovariant where T: ?Sized {} + +impl Default for PhantomCovariant +where + T: ?Sized, +{ + fn default() -> Self { + Self::new() + } +} diff --git a/test_bin/integration_slt_hybrid.rs b/test_bin/integration_slt_hybrid.rs index 69c13283d..b5a89aed6 100644 --- a/test_bin/integration_slt_hybrid.rs +++ b/test_bin/integration_slt_hybrid.rs @@ -5,8 +5,8 @@ use std::time::Duration; use rayexec_debug::table_storage::TablePreload; use rayexec_debug::{DebugDataSource, DebugDataSourceOptions}; use rayexec_error::Result; -use rayexec_execution::arrays::array::Array; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::array::Array2; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Field; use rayexec_execution::datasource::DataSourceRegistry; @@ -36,9 +36,9 @@ pub fn main() -> Result<()> { Field::new("c1", DataType::Int64, false), Field::new("c2", DataType::Utf8, false), ], - data: Batch::try_new([ - Array::from_iter([1_i64, 2_i64]), - Array::from_iter(["a", "b"]), + data: Batch2::try_new([ + Array2::from_iter([1_i64, 2_i64]), + Array2::from_iter(["a", "b"]), ])?, }, // Table specific to insert into. Don't rely on this outside of @@ -50,9 +50,9 @@ pub fn main() -> Result<()> { Field::new("c1", DataType::Int64, false), Field::new("c2", DataType::Utf8, false), ], - data: Batch::try_new([ - Array::from_iter([1_i64, 2_i64]), - Array::from_iter(["a", "b"]), + data: Batch2::try_new([ + Array2::from_iter([1_i64, 2_i64]), + Array2::from_iter(["a", "b"]), ])?, }, ],