Skip to content

Commit

Permalink
Merge pull request #48 from pydiverse/fuzz
Browse files Browse the repository at this point in the history
Add operator documentation and basic fuzz testing
  • Loading branch information
finn-rudolph authored Jan 11, 2025
2 parents 3f600e9 + 66db4a0 commit dfb863e
Show file tree
Hide file tree
Showing 45 changed files with 2,299 additions and 266 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ coverage.xml
*.cover
.hypothesis/
/.pytest_cache/
fuzz_failures/*

# Translations
*.mo
Expand Down
File renamed without changes.
5 changes: 0 additions & 5 deletions docs/source/_templates/autosummary/short_title.rst

This file was deleted.

25 changes: 25 additions & 0 deletions docs/source/_templates/short_title.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{% if (name == "__add__") %}\+
{% elif name == "__sub__" %}\-
{% elif name == "__mul__" %}\*
{% elif name == "__truediv__" %}\/
{% elif name == "__floordiv__" %}\/\/
{% elif name == "__pow__" %}\*\* (pow)
{% elif name == "__mod__" %}\%
{% elif name == "__pos__" %}\+ (unary)
{% elif name == "__neg__" %}\- (unary)
{% elif name == "__lt__" %}\<
{% elif name == "__le__" %}\<\=
{% elif name == "__gt__" %}\>
{% elif name == "__ge__" %}\>\=
{% elif name == "__eq__" %}\=\=
{% elif name == "__ne__" %}\!\=
{% elif name == "__or__" %}\|
{% elif name == "__and__" %}\&
{% elif name == "__xor__" %}\^
{% elif name == "__invert__" %}\~
{% else %}{{ name }}
{% endif %}{{ underline }}

.. currentmodule:: {{ module }}

.. auto{{ objtype }}:: {{ objname }}
3 changes: 1 addition & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@
"sphinx.ext.autosummary",
"sphinx.ext.autosectionlabel",
"sphinx.ext.intersphinx",
# "sphinx.ext.viewcode",
"sphinx.ext.napoleon",
"sphinx_autosummary_accessors"
"sphinx_autosummary_accessors",
]

maximum_signature_line_length = 100
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/aggregation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Aggregation functions take a ``partition_by`` and ``filter`` keyword argument. T
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

count
all
Expand Down
10 changes: 5 additions & 5 deletions docs/source/reference/operators/arithmetic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ Arithmetic
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

__add__
__floordiv__
__mod__
__sub__
__mul__
__truediv__
__neg__
__pos__
__sub__
__truediv__
__floordiv__
__mod__
2 changes: 1 addition & 1 deletion docs/source/reference/operators/comparison.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Comparison
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

__eq__
__ge__
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/conditional_logic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Conditional Logic

.. autosummary::
:toctree: _generated/
:template: autosummary/short_title.rst
:template: short_title.rst
:nosignatures:

when
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Datetime / Duration
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/accessor_method.rst
:template: accessor_method.rst

dt.day
dt.day_of_week
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/horizontal_aggregation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Horizontal Aggregation
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

coalesce
count
Expand Down
3 changes: 3 additions & 0 deletions docs/source/reference/operators/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ Global functions
.. autosummary::
:nosignatures:

all
any
coalesce
count
dense_rank
Expand All @@ -121,4 +123,5 @@ Global functions
min
rank
row_number
sum
when
2 changes: 1 addition & 1 deletion docs/source/reference/operators/logical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Logical
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

__and__
__invert__
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/numerical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Numerical
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

__pow__
abs
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/sorting_markers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Sorting Markers
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

ascending
descending
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ String
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/accessor_method.rst
:template: accessor_method.rst

str.contains
str.ends_with
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/operators/type_conversion.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Type Conversion

.. autosummary::
:toctree: _generated/
:template: autosummary/short_title.rst
:template: short_title.rst
:nosignatures:

lit
Expand Down
4 changes: 2 additions & 2 deletions docs/source/reference/operators/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Window
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

shift

.. currentmodule:: pydiverse.transform
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

dense_rank
rank
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/targets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Backends / Export Targets
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

DuckDb
Pandas
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Types
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

Dtype
Bool
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/verbs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Verbs
.. autosummary::
:toctree: _generated/
:nosignatures:
:template: autosummary/short_title.rst
:template: short_title.rst

alias
arrange
Expand Down
144 changes: 144 additions & 0 deletions fuzz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# ruff: noqa: F405

from __future__ import annotations

import random
import string
from functools import partial

import numpy as np
import polars as pl
from polars.testing import assert_frame_equal

import pydiverse.transform as pdt
from pydiverse.transform._internal.ops import ops
from pydiverse.transform._internal.ops.op import Ftype, Operator
from pydiverse.transform._internal.ops.ops.markers import Marker
from pydiverse.transform._internal.ops.signature import Signature
from pydiverse.transform._internal.tree.col_expr import ColFn
from pydiverse.transform._internal.tree.types import Tvar
from pydiverse.transform.common import * # noqa: F403
from tests.util.backend import BACKEND_TABLES

rng = np.random.default_rng()
letters = list(string.printable)

ALL_TYPES = [pdt.Int(), pdt.Float(), pdt.Bool(), pdt.String()]
MEAN_HEIGHT = 3

RNG_FNS = {
pdt.Float(): rng.standard_normal,
pdt.Int(): partial(rng.integers, -(1 << 13), 1 << 13),
pdt.Bool(): partial(rng.integers, 0, 1, dtype=bool),
pdt.String(): (
lambda rows: np.array(
["".join(random.choices(letters, k=rng.poisson(10))) for _ in range(rows)]
)
),
}


def gen_table(rows: int, types: dict[pdt.Dtype, int]) -> pl.DataFrame:
d = pl.DataFrame()

for ty, fn in RNG_FNS.items():
if ty in types:
d = d.with_columns(
**{
f"{ty.__class__.__name__.lower()} #{i+1}": pl.lit(fn(rows))
for i in range(types[ty])
}
)

return d


ops_with_return_type: dict[pdt.Dtype, list[tuple[Operator, Signature]]] = {
ty: [] for ty in ALL_TYPES
}

for op in ops.__dict__.values():
if (
not isinstance(op, Operator)
or op.ftype != Ftype.ELEMENT_WISE
or isinstance(op, Marker)
):
continue
for sig in op.signatures:
if not all(t in (*ALL_TYPES, Tvar("T")) for t in (*sig.types, sig.return_type)):
continue

if isinstance(sig.return_type, Tvar) or any(
isinstance(param, Tvar) for param in sig.types
):
for ty in ALL_TYPES:
rtype = ty if isinstance(sig.return_type, Tvar) else sig.return_type
ops_with_return_type[rtype].append(
(
op,
Signature(
*(
ty if isinstance(param, Tvar) else param
for param in sig.types
),
return_type=rtype,
),
)
)
else:
ops_with_return_type[sig.return_type].append((op, sig))


def gen_expr(
dtype: pdt.Dtype, cols: dict[pdt.Dtype, list[str]], q: float = 0.0
) -> pdt.ColExpr:
if dtype.const:
return RNG_FNS[dtype.without_const()](1).item()

if q > 1:
# we always use C here so the expression does not have to be generated for each
# backend
return C[rng.choice(cols[dtype])]

op, sig = rng.choice(ops_with_return_type[dtype])
assert isinstance(op, Operator)
assert isinstance(sig, Signature)

args = []
for param in sig.types[: len(sig.types) - sig.is_vararg]:
args.append(gen_expr(param, cols, q + rng.exponential(1 / MEAN_HEIGHT)))

if sig.is_vararg:
nargs = int(rng.normal(2.5, 1 / 1.5))
for _ in range(nargs):
args.append(
gen_expr(sig.types[-1], cols, q + rng.exponential(1 / MEAN_HEIGHT))
)

return ColFn(op, *args)


it = int(input("number of iterations: "))
rows = int(input("number of rows: "))
seed = int(input("seed: "))

rng = np.random.default_rng(seed)
NUM_COLS_PER_TYPE = 5

df = gen_table(rows, {dtype: NUM_COLS_PER_TYPE for dtype in ALL_TYPES})


tables = {backend: fn(df, "t") for backend, fn in BACKEND_TABLES.items()}
cols = {
dtype: [col.name for col in tables["polars"] if col.dtype() <= dtype]
for dtype in ALL_TYPES
}

for _ in range(it):
expr = gen_expr(rng.choice(ALL_TYPES), cols)
results = {
backend: table >> mutate(y=expr) >> select(C.y) >> export(Polars())
for backend, table in tables.items()
}
for _backend, res in results:
assert_frame_equal(results["polars"], res)
14 changes: 12 additions & 2 deletions src/pydiverse/transform/_internal/backend/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
LiteralCol,
Order,
)
from pydiverse.transform._internal.tree.types import Bool, Datetime, Dtype, String
from pydiverse.transform._internal.tree.types import Bool, Datetime, Dtype, Int, String


class MsSqlImpl(SqlImpl):
Expand Down Expand Up @@ -132,7 +132,6 @@ def convert_bool_bit(expr: ColExpr | Order, wants_bool_as_bit: bool) -> ColExpr

elif isinstance(expr, ColFn):
wants_args_bool_as_bit = expr.op not in (
ops.bool_xor,
ops.bool_and,
ops.bool_or,
ops.bool_invert,
Expand Down Expand Up @@ -302,3 +301,14 @@ def _is_nan(x):
@impl(ops.is_not_nan)
def _is_not_nan(x):
return True

@impl(ops.pow)
def _pow(x, y):
return_type = sqa.Double()
if isinstance(x.type, sqa.Numeric) and isinstance(y.type, sqa.Numeric):
return_type = sqa.Numeric()
return sqa.func.POWER(x, y, type_=return_type)

@impl(ops.pow, Int(), Int())
def _pow_int(x, y):
return sqa.func.POWER(sqa.cast(x, type_=sqa.Double()), y)
Loading

0 comments on commit dfb863e

Please sign in to comment.