Skip to content

Commit

Permalink
ENH: Reflect changes from numpy namespace refactor Part 3 (pandas-d…
Browse files Browse the repository at this point in the history
…ev#54579)

* ENH: Reflect changes from numpy namespace refactor part 3

* ENH: Move to fixtures for dtype access
  • Loading branch information
mtsokol authored Aug 16, 2023
1 parent b6333e6 commit ff86177
Show file tree
Hide file tree
Showing 102 changed files with 280 additions and 283 deletions.
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def setup(self, series_type, vals_type):
elif series_type == "long":
ser_vals = np.arange(N_many)
elif series_type == "long_floats":
ser_vals = np.arange(N_many, dtype=np.float_)
ser_vals = np.arange(N_many, dtype=np.float64)

self.series = Series(ser_vals).astype(object)

Expand All @@ -258,7 +258,7 @@ def setup(self, series_type, vals_type):
elif vals_type == "long":
values = np.arange(N_many)
elif vals_type == "long_floats":
values = np.arange(N_many, dtype=np.float_)
values = np.arange(N_many, dtype=np.float64)

self.values = values.astype(object)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ methods.
.. ipython:: python
frame = pd.DataFrame(
{"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]}
{"col1": ["A", "B", np.nan, "C", "D"], "col2": ["F", np.nan, "G", "H", "I"]}
)
frame
Expand Down
4 changes: 2 additions & 2 deletions doc/source/user_guide/enhancingperf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ can be improved by passing an ``np.ndarray``.
...: return s * dx
...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
...: np.ndarray col_N):
...: assert (col_a.dtype == np.float_
...: and col_b.dtype == np.float_ and col_N.dtype == np.int_)
...: assert (col_a.dtype == np.float64
...: and col_b.dtype == np.float64 and col_N.dtype == np.int_)
...: cdef Py_ssize_t i, n = len(col_N)
...: assert (len(col_a) == len(col_b) == n)
...: cdef np.ndarray[double] res = np.empty(n)
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/gotchas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ present in the more domain-specific statistical programming language `R
``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64``
``numpy.object_`` | ``object_``
``numpy.bool_`` | ``bool_``
``numpy.character`` | ``string_, unicode_``
``numpy.character`` | ``bytes_, str_``

The R language, by contrast, only has a handful of built-in data types:
``integer``, ``numeric`` (floating-point), ``character``, and
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4881,7 +4881,7 @@ unspecified columns of the given DataFrame. The argument ``selector``
defines which table is the selector table (which you can make queries from).
The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure
tables are synchronized. This means that if a row for one of the tables
being written to is entirely ``np.NaN``, that row will be dropped from all tables.
being written to is entirely ``np.nan``, that row will be dropped from all tables.

If ``dropna`` is False, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**.
Remember that entirely ``np.Nan`` rows are not written to the HDFStore, so if
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ You must pass in the ``line_terminator`` explicitly, even in this case.
.. _whatsnew_0240.bug_fixes.nan_with_str_dtype:

Proper handling of ``np.NaN`` in a string data-typed column with the Python engine
Proper handling of ``np.nan`` in a string data-typed column with the Python engine
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

There was bug in :func:`read_excel` and :func:`read_csv` with the Python
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ from pandas._libs.util cimport get_nat

cdef:
float64_t FP_ERR = 1e-13
float64_t NaN = <float64_t>np.NaN
float64_t NaN = <float64_t>np.nan
int64_t NPY_NAT = get_nat()


Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ from pandas._libs.missing cimport checknull

cdef int64_t NPY_NAT = util.get_nat()

cdef float64_t NaN = <float64_t>np.NaN
cdef float64_t NaN = <float64_t>np.nan

cdef enum InterpolationEnumType:
INTERPOLATION_LINEAR,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ cdef:
object oINT64_MIN = <int64_t>INT64_MIN
object oUINT64_MAX = <uint64_t>UINT64_MAX

float64_t NaN = <float64_t>np.NaN
float64_t NaN = <float64_t>np.nan

# python-visible
i8max = <int64_t>INT64_MAX
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ cdef inline bint is_integer_object(object obj) noexcept nogil:

cdef inline bint is_float_object(object obj) noexcept nogil:
"""
Cython equivalent of `isinstance(val, (float, np.float_))`
Cython equivalent of `isinstance(val, (float, np.float64))`
Parameters
----------
Expand All @@ -91,7 +91,7 @@ cdef inline bint is_float_object(object obj) noexcept nogil:

cdef inline bint is_complex_object(object obj) noexcept nogil:
"""
Cython equivalent of `isinstance(val, (complex, np.complex_))`
Cython equivalent of `isinstance(val, (complex, np.complex128))`
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ cdef:
float32_t MAXfloat32 = np.inf
float64_t MAXfloat64 = np.inf

float64_t NaN = <float64_t>np.NaN
float64_t NaN = <float64_t>np.nan

cdef bint is_monotonic_increasing_start_end_bounds(
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
Expand Down
2 changes: 1 addition & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ def series_with_multilevel_index() -> Series:
index = MultiIndex.from_tuples(tuples)
data = np.random.default_rng(2).standard_normal(8)
ser = Series(data, index=index)
ser.iloc[3] = np.NaN
ser.iloc[3] = np.nan
return ser


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2109,7 +2109,7 @@ def _codes(self) -> np.ndarray:

def _box_func(self, i: int):
if i == -1:
return np.NaN
return np.nan
return self.categories[i]

def _unbox_scalar(self, key) -> int:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,8 @@ def __init__(self, lhs, rhs) -> None:
)

# do not upcast float32s to float64 un-necessarily
acceptable_dtypes = [np.float32, np.float_]
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
acceptable_dtypes = [np.float32, np.float64]
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float64)


UNARY_OPS_SYMS = ("+", "-", "~", "not")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
dtype = np.dtype(np.float64)

elif is_complex(val):
dtype = np.dtype(np.complex_)
dtype = np.dtype(np.complex128)

if lib.is_period(val):
dtype = PeriodDtype(freq=val.freq)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:
False
>>> is_complex_dtype(int)
False
>>> is_complex_dtype(np.complex_)
>>> is_complex_dtype(np.complex128)
True
>>> is_complex_dtype(np.array(['a', 'b']))
False
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5307,7 +5307,7 @@ def reindex(
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level.
fill_value : scalar, default np.NaN
fill_value : scalar, default np.nan
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value.
limit : int, default None
Expand Down Expand Up @@ -7376,7 +7376,7 @@ def ffill(
2 3.0 4.0 NaN 1.0
3 3.0 3.0 NaN 4.0

>>> ser = pd.Series([1, np.NaN, 2, 3])
>>> ser = pd.Series([1, np.nan, 2, 3])
>>> ser.ffill()
0 1.0
1 1.0
Expand Down Expand Up @@ -8375,7 +8375,7 @@ def isna(self) -> Self:
--------
Show which entries in a DataFrame are NA.

>>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
>>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
... born=[pd.NaT, pd.Timestamp('1939-05-27'),
... pd.Timestamp('1940-04-25')],
... name=['Alfred', 'Batman', ''],
Expand All @@ -8394,7 +8394,7 @@ def isna(self) -> Self:

Show which entries in a Series are NA.

>>> ser = pd.Series([5, 6, np.NaN])
>>> ser = pd.Series([5, 6, np.nan])
>>> ser
0 5.0
1 6.0
Expand Down Expand Up @@ -8442,7 +8442,7 @@ def notna(self) -> Self:
--------
Show which entries in a DataFrame are not NA.

>>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
>>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
... born=[pd.NaT, pd.Timestamp('1939-05-27'),
... pd.Timestamp('1940-04-25')],
... name=['Alfred', 'Batman', ''],
Expand All @@ -8461,7 +8461,7 @@ def notna(self) -> Self:

Show which entries in a Series are not NA.

>>> ser = pd.Series([5, 6, np.NaN])
>>> ser = pd.Series([5, 6, np.nan])
>>> ser
0 5.0
1 6.0
Expand Down Expand Up @@ -8628,7 +8628,7 @@ def clip(

Clips using specific lower threshold per column element, with missing values:

>>> t = pd.Series([2, -4, np.NaN, 6, 3])
>>> t = pd.Series([2, -4, np.nan, 6, 3])
>>> t
0 2.0
1 -4.0
Expand Down Expand Up @@ -9828,7 +9828,7 @@ def align(
copy : bool, default True
Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.
fill_value : scalar, default np.NaN
fill_value : scalar, default np.nan
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value.
method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5418,7 +5418,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT:
def _reindex_output(
self,
output: OutputFrameOrSeries,
fill_value: Scalar = np.NaN,
fill_value: Scalar = np.nan,
qs: npt.NDArray[np.float64] | None = None,
) -> OutputFrameOrSeries:
"""
Expand All @@ -5436,7 +5436,7 @@ def _reindex_output(
----------
output : Series or DataFrame
Object resulting from grouping and applying an operation.
fill_value : scalar, default np.NaN
fill_value : scalar, default np.nan
Value to use for unobserved categories if self.observed is False.
qs : np.ndarray[float64] or None, default None
quantile values, only relevant for quantile.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2837,7 +2837,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
Show which entries in a pandas.Index are NA. The result is an
array.

>>> idx = pd.Index([5.2, 6.0, np.NaN])
>>> idx = pd.Index([5.2, 6.0, np.nan])
>>> idx
Index([5.2, 6.0, nan], dtype='float64')
>>> idx.isna()
Expand Down Expand Up @@ -2893,7 +2893,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
Show which entries in an Index are not NA. The result is an
array.

>>> idx = pd.Index([5.2, 6.0, np.NaN])
>>> idx = pd.Index([5.2, 6.0, np.nan])
>>> idx
Index([5.2, 6.0, nan], dtype='float64')
>>> idx.notna()
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _get_next_label(label):
elif is_integer_dtype(dtype):
return label + 1
elif is_float_dtype(dtype):
return np.nextafter(label, np.infty)
return np.nextafter(label, np.inf)
else:
raise TypeError(f"cannot determine next label for type {repr(type(label))}")

Expand All @@ -142,7 +142,7 @@ def _get_prev_label(label):
elif is_integer_dtype(dtype):
return label - 1
elif is_float_dtype(dtype):
return np.nextafter(label, -np.infty)
return np.nextafter(label, -np.inf)
else:
raise TypeError(f"cannot determine next label for type {repr(type(label))}")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5586,7 +5586,7 @@ def dropna(
Empty strings are not considered NA values. ``None`` is considered an
NA value.

>>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
>>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay'])
>>> ser
0 NaN
1 2
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,7 @@ def contains(
--------
Returning a Series of booleans using only a literal pattern.

>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan])
>>> s1.str.contains('og', regex=False)
0 False
1 True
Expand All @@ -1226,7 +1226,7 @@ def contains(

Returning an Index of booleans using only a literal pattern.

>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan])
>>> ind.str.contains('23', regex=False)
Index([False, False, False, True, nan], dtype='object')

Expand Down Expand Up @@ -3500,7 +3500,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame:
for match_i, match_tuple in enumerate(regex.findall(subject)):
if isinstance(match_tuple, str):
match_tuple = (match_tuple,)
na_tuple = [np.NaN if group == "" else group for group in match_tuple]
na_tuple = [np.nan if group == "" else group for group in match_tuple]
match_list.append(na_tuple)
result_key = tuple(subject_key + (match_i,))
index_list.append(result_key)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1715,7 +1715,7 @@ def format_percentiles(
"""
percentiles = np.asarray(percentiles)

# It checks for np.NaN as well
# It checks for np.nan as well
if (
not is_numeric_dtype(percentiles)
or not np.all(percentiles >= 0)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,15 +637,15 @@ def test_apply_with_byte_string():
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("val", ["asd", 12, None, np.NaN])
@pytest.mark.parametrize("val", ["asd", 12, None, np.nan])
def test_apply_category_equalness(val):
# Check if categorical comparisons on apply, GH 21239
df_values = ["asd", None, 12, "asd", "cde", np.NaN]
df_values = ["asd", None, 12, "asd", "cde", np.nan]
df = DataFrame({"a": df_values}, dtype="category")

result = df.a.apply(lambda x: x == val)
expected = Series(
[np.NaN if pd.isnull(x) else x == val for x in df_values], name="a"
[np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
)
tm.assert_series_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def test_apply_categorical(by_row):
assert result.dtype == object


@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
def test_apply_categorical_with_nan_values(series, by_row):
# GH 20714 bug fixed in: GH 24275
s = Series(series, dtype="category")
Expand All @@ -254,7 +254,7 @@ def test_apply_categorical_with_nan_values(series, by_row):

result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
result = result.astype(object)
expected = Series(["1", "1", np.NaN], dtype="category")
expected = Series(["1", "1", np.nan], dtype="category")
expected = expected.astype(object)
tm.assert_series_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def test_min_max_reduce(self):
@pytest.mark.parametrize(
"categories,expected",
[
(list("ABC"), np.NaN),
([1, 2, 3], np.NaN),
(list("ABC"), np.nan),
([1, 2, 3], np.nan),
pytest.param(
Series(date_range("2020-01-01", periods=3), dtype="category"),
NaT,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_set_na(self, left_right_dtypes):
# GH#45484 TypeError, not ValueError, matches what we get with
# non-NA un-holdable value.
with pytest.raises(TypeError, match=msg):
result[0] = np.NaN
result[0] = np.nan
return

result[0] = np.nan
Expand Down
Loading

0 comments on commit ff86177

Please sign in to comment.