Skip to content

Commit

Permalink
Revert Cython upgrade (pandas-dev#54497)
Browse files Browse the repository at this point in the history
* Revert "CLN: Cython 3 cleanups (pandas-dev#54482)"

This reverts commit a936863.

* Revert "DEPS: Bump cython 3.0 (pandas-dev#54335)"

This reverts commit 4cf63ea.
  • Loading branch information
phofl authored Aug 11, 2023
1 parent 0582e35 commit 3edb82b
Show file tree
Hide file tree
Showing 49 changed files with 412 additions and 171 deletions.
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
"matrix": {
"Cython": ["3.0.0"],
"Cython": ["0.29.33"],
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
# build dependencies
- versioneer[toml]
- meson[ninja]=1.0.1
- cython>=3.0.0
- cython>=0.29.33
- meson-python=0.13.1

# test dependencies
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
1 change: 0 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,6 @@ Other
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
- Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
- The minimum version of Cython needed to compile pandas is now ``3.0.0`` (:issue:`54335`)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.contributors:
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython=3.0.0
- cython=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
117 changes: 109 additions & 8 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,8 @@ def rank_1d(

N = len(values)
if labels is not None:
assert len(labels) == N
# TODO(cython3): cast won't be necessary (#2992)
assert <Py_ssize_t>len(labels) == N
out = np.empty(N)
grp_sizes = np.ones(N, dtype=np.int64)

Expand Down Expand Up @@ -1087,7 +1088,8 @@ cdef void rank_sorted_1d(
float64_t[::1] out,
int64_t[::1] grp_sizes,
const intp_t[:] sort_indexer,
const numeric_object_t[:] masked_vals,
# TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
numeric_object_t[:] masked_vals,
const uint8_t[:] mask,
bint check_mask,
Py_ssize_t N,
Expand Down Expand Up @@ -1142,7 +1144,108 @@ cdef void rank_sorted_1d(
# array that we sorted previously, which gives us the location of
# that sorted value for retrieval back from the original
# values / masked_vals arrays
with gil(numeric_object_t is object):
# TODO(cython3): de-duplicate once cython supports conditional nogil
if numeric_object_t is object:
with gil:
for i in range(N):
at_end = i == N - 1

# dups and sum_ranks will be incremented each loop where
# the value / group remains the same, and should be reset
# when either of those change. Used to calculate tiebreakers
dups += 1
sum_ranks += i - grp_start + 1

next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
masked_vals[sort_indexer[i+1]])

# We'll need this check later anyway to determine group size, so just
# compute it here since shortcircuiting won't help
group_changed = at_end or (check_labels and
(labels[sort_indexer[i]]
!= labels[sort_indexer[i+1]]))

# Update out only when there is a transition of values or labels.
# When a new value or group is encountered, go back #dups steps(
# the number of occurrence of current value) and assign the ranks
# based on the starting index of the current group (grp_start)
# and the current index
if (next_val_diff or group_changed or (check_mask and
(mask[sort_indexer[i]]
^ mask[sort_indexer[i+1]]))):

# If keep_na, check for missing values and assign back
# to the result where appropriate
if keep_na and check_mask and mask[sort_indexer[i]]:
grp_na_count = dups
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = NaN
elif tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = sum_ranks / <float64_t>dups
elif tiebreak == TIEBREAK_MIN:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = i - grp_start - dups + 2
elif tiebreak == TIEBREAK_MAX:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = i - grp_start + 1

# With n as the previous rank in the group and m as the number
# of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
# then rankings should be n + 1, n + 2 ... n + m
elif tiebreak == TIEBREAK_FIRST:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = j + 1 - grp_start

# If TIEBREAK_FIRST and descending, the ranking should be
# n + m, n + (m - 1) ... n + 1. This is equivalent to
# (i - dups + 1) + (i - j + 1) - grp_start
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
elif tiebreak == TIEBREAK_DENSE:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = grp_vals_seen

# Look forward to the next value (using the sorting in
# lexsort_indexer). If the value does not equal the current
# value then we need to reset the dups and sum_ranks, knowing
# that a new value is coming up. The conditional also needs
# to handle nan equality and the end of iteration. If group
# changes we do not record seeing a new value in the group
if not group_changed and (next_val_diff or (check_mask and
(mask[sort_indexer[i]]
^ mask[sort_indexer[i+1]]))):
dups = sum_ranks = 0
grp_vals_seen += 1

# Similar to the previous conditional, check now if we are
# moving to a new group. If so, keep track of the index where
# the new group occurs, so the tiebreaker calculations can
# decrement that from their position. Fill in the size of each
# group encountered (used by pct calculations later). Also be
# sure to reset any of the items helping to calculate dups
if group_changed:

# If not dense tiebreak, group size used to compute
# percentile will be # of non-null elements in group
if tiebreak != TIEBREAK_DENSE:
grp_size = i - grp_start + 1 - grp_na_count

# Otherwise, it will be the number of distinct values
# in the group, subtracting 1 if NaNs are present
# since that is a distinct value we shouldn't count
else:
grp_size = grp_vals_seen - (grp_na_count > 0)

for j in range(grp_start, i + 1):
grp_sizes[sort_indexer[j]] = grp_size

dups = sum_ranks = 0
grp_na_count = 0
grp_start = i + 1
grp_vals_seen = 1
else:
for i in range(N):
at_end = i == N - 1

Expand Down Expand Up @@ -1371,18 +1474,16 @@ ctypedef fused out_t:
@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d(
# TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
ndarray[diff_t, ndim=2] arr,
out_t[:, :] out,
ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr"
ndarray[out_t, ndim=2] out,
Py_ssize_t periods,
int axis,
bint datetimelike=False,
):
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous
# TODO: change to this when arr becomes a memoryview
# bint f_contig = arr.is_f_contig()
# bint f_contig = arr.is_f_contig() # TODO(cython3)
diff_t left, right

# Disable for unsupported dtype combinations,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/arrays.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class NDArrayBacked:
def size(self) -> int: ...
@property
def nbytes(self) -> int: ...
def copy(self, order=...): ...
def copy(self): ...
def delete(self, loc, axis=...): ...
def swapaxes(self, axis1, axis2): ...
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/arrays.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ cdef class NDArrayBacked:

@property
def size(self) -> int:
return self._ndarray.size
# TODO(cython3): use self._ndarray.size
return cnp.PyArray_SIZE(self._ndarray)

@property
def nbytes(self) -> int:
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def group_fillna_indexer(
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
direction: Literal["ffill", "bfill"],
limit: int, # int64_t
dropna: bool,
) -> None: ...
Expand All @@ -54,7 +55,7 @@ def group_any_all(
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
result_mask: np.ndarray | None,
nullable: bool,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
Expand Down
Loading

0 comments on commit 3edb82b

Please sign in to comment.