diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 90627216a1354..1bda47e0631a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.282 + rev: v0.0.284 hooks: - id: ruff args: [--exit-non-zero-on-fix] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7babce46a3977..3a6db34b0e8b5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -479,7 +479,7 @@ def __contains__(self, item: object) -> bool | np.bool_: return (item == self).any() # type: ignore[union-attr] # error: Signature of "__eq__" incompatible with supertype "object" - def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] + def __eq__(self, other: object) -> ArrayLike: # type: ignore[override] """ Return for `self == other` (element-wise equality). """ @@ -492,11 +492,12 @@ def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] raise AbstractMethodError(self) # error: Signature of "__ne__" incompatible with supertype "object" - def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] + def __ne__(self, other: object) -> ArrayLike: # type: ignore[override] """ Return for `self != other` (element-wise in-equality). """ - return ~(self == other) + # error: Unsupported operand type for ~ ("ExtensionArray") + return ~(self == other) # type: ignore[operator] def to_numpy( self, diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 25f1c2ec6ce4f..d74fb3fc99e22 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -574,7 +574,7 @@ def _str_map( arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): - constructor: type[IntegerArray] | type[BooleanArray] + constructor: type[IntegerArray | BooleanArray] if is_integer_dtype(dtype): constructor = IntegerArray else: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 4a70fcf6b5a93..76f0d0699b272 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -262,7 +262,7 @@ def _str_map( arr = np.asarray(self) if is_integer_dtype(dtype) or is_bool_dtype(dtype): - constructor: type[IntegerArray] | type[BooleanArray] + constructor: type[IntegerArray | BooleanArray] if is_integer_dtype(dtype): constructor = IntegerArray else: diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index bc776434b2e6e..fdd720a526de2 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -110,7 +110,7 @@ class property**. def __str__(self) -> str: return self.name - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """ Check whether 'other' is equal to self. @@ -144,7 +144,7 @@ def __hash__(self) -> int: # we need to avoid that and thus use hash function with old behavior return object_hash(tuple(getattr(self, attr) for attr in self._metadata)) - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: object) -> bool: return not self.__eq__(other) @property @@ -422,7 +422,7 @@ def __repr__(self) -> str: def __str__(self) -> str: return self.name - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, str) and other == self.name: return True return super().__eq__(other) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 53f0fb2843653..d839d1b55831d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -388,7 +388,7 @@ def __hash__(self) -> int: # We *do* want to include the real self.ordered here return int(self._hash_categories) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """ Rules for CDT equality: 1) Any CDT is equal to the string 'category' @@ -860,7 +860,7 @@ def __hash__(self) -> int: # TODO: update this. return hash(str(self)) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, str): if other.startswith("M8["): other = f"datetime64[{other[3:]}" @@ -1052,13 +1052,13 @@ def name(self) -> str_type: def na_value(self) -> NaTType: return NaT - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, str): return other in [self.name, self.name.title()] return super().__eq__(other) - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: object) -> bool: return not self.__eq__(other) @classmethod @@ -1301,7 +1301,7 @@ def __hash__(self) -> int: # make myself hashable return hash(str(self)) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, str): return other.lower() in (self.name.lower(), str(self).lower()) elif not isinstance(other, IntervalDtype): @@ -1647,7 +1647,7 @@ def __hash__(self) -> int: # __eq__, so we explicitly do it here. return super().__hash__() - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: # We have to override __eq__ to handle NA values in _metadata. # The base class does simple == checks, which fail for NA. if isinstance(other, str): @@ -2062,7 +2062,7 @@ def __hash__(self) -> int: # make myself hashable return hash(str(self)) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if not isinstance(other, type(self)): return super().__eq__(other) return self.pyarrow_dtype == other.pyarrow_dtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9939daadd9237..797b2f4ddb45e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3416,7 +3416,7 @@ def to_xml( lxml = import_optional_dependency("lxml.etree", errors="ignore") - TreeBuilder: type[EtreeXMLFormatter] | type[LxmlXMLFormatter] + TreeBuilder: type[EtreeXMLFormatter | LxmlXMLFormatter] if parser == "lxml": if lxml is not None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 288fd35892fd0..dbb7cb97d1d6f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -458,7 +458,7 @@ def _can_hold_strings(self) -> bool: @property def _engine_type( self, - ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]: + ) -> type[libindex.IndexEngine | libindex.ExtensionEngine]: return self._engine_types.get(self.dtype, libindex.ObjectEngine) # whether we support partial string indexing. Overridden @@ -481,7 +481,7 @@ def __new__( copy: bool = False, name=None, tupleize_cols: bool = True, - ) -> Index: + ) -> Self: from pandas.core.indexes.range import RangeIndex name = maybe_extract_name(name, data, cls) @@ -500,7 +500,9 @@ def __new__( result = RangeIndex(start=data, copy=copy, name=name) if dtype is not None: return result.astype(dtype, copy=False) - return result + # error: Incompatible return value type (got "MultiIndex", + # expected "Self") + return result # type: ignore[return-value] elif is_ea_or_datetimelike_dtype(dtype): # non-EA dtype indexes have special casting logic, so we punt here @@ -523,7 +525,7 @@ def __new__( elif is_scalar(data): raise cls._raise_scalar_data_error(data) elif hasattr(data, "__array__"): - return Index(np.asarray(data), dtype=dtype, copy=copy, name=name) + return cls(np.asarray(data), dtype=dtype, copy=copy, name=name) elif not is_list_like(data) and not isinstance(data, memoryview): # 2022-11-16 the memoryview check is only necessary on some CI # builds, not clear why @@ -540,7 +542,11 @@ def __new__( # 10697 from pandas.core.indexes.multi import MultiIndex - return MultiIndex.from_tuples(data, names=name) + # error: Incompatible return value type (got "MultiIndex", + # expected "Self") + return MultiIndex.from_tuples( # type: ignore[return-value] + data, names=name + ) # other iterable of some kind if not isinstance(data, (list, tuple)): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e189d9216d5e3..bc6fb61700aec 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -46,6 +46,7 @@ from pandas._typing import ( Dtype, DtypeObj, + Self, npt, ) @@ -210,7 +211,7 @@ def __new__( dtype: Dtype | None = None, copy: bool = False, name: Hashable | None = None, - ) -> CategoricalIndex: + ) -> Self: name = maybe_extract_name(name, data, cls) if is_scalar(data): diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 3b8aefdbeb879..2d1d2a81a8a71 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -8,10 +8,7 @@ """ from __future__ import annotations -from typing import ( - Any, - NoReturn, -) +from typing import NoReturn from pandas.core.base import PandasObject @@ -80,7 +77,7 @@ def __radd__(self, other): other = list(other) return type(self)(other + list(self)) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if isinstance(other, (tuple, FrozenList)): other = list(other) return super().__eq__(other) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f915c08bb8294..302d8fdb353fd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -93,6 +93,7 @@ Dtype, DtypeObj, IntervalClosedType, + Self, npt, ) _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -225,7 +226,7 @@ def __new__( copy: bool = False, name: Hashable | None = None, verify_integrity: bool = True, - ) -> IntervalIndex: + ) -> Self: name = maybe_extract_name(name, data, cls) with rewrite_exception("IntervalArray", cls.__name__): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 33eb411374e67..ae0ab66dd5f3e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -38,6 +38,7 @@ IgnoreRaise, IndexLabel, Scalar, + Self, Shape, npt, ) @@ -330,7 +331,7 @@ def __new__( copy: bool = False, name=None, verify_integrity: bool = True, - ) -> MultiIndex: + ) -> Self: # compat with Index if name is not None: names = name diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 1e8a3851b406e..9576997af6641 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -139,12 +139,12 @@ def __new__( dtype: Dtype | None = None, copy: bool = False, name: Hashable | None = None, - ) -> RangeIndex: + ) -> Self: cls._validate_dtype(dtype) name = maybe_extract_name(name, start, cls) # RangeIndex - if isinstance(start, RangeIndex): + if isinstance(start, cls): return start.copy(name=name) elif isinstance(start, range): return cls._simple_new(start, name=name) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ac306229f3111..e23bf6269893d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -133,7 +133,7 @@ class FulldatetimeDict(YearMonthDayDict, total=False): def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str | None: # Try to guess the format based on the first non-NaN element, return None if can't if (first_non_null := tslib.first_non_null(arr)) != -1: - if type(first_non_nan_element := arr[first_non_null]) is str: + if type(first_non_nan_element := arr[first_non_null]) is str: # noqa: E721 # GH#32264 np.str_ object guessed_format = guess_datetime_format( first_non_nan_element, dayfirst=dayfirst diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index a50dbeb110bff..7a445ad7ac2b2 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -291,7 +291,7 @@ def to_numeric( IntegerArray, ) - klass: type[IntegerArray] | type[BooleanArray] | type[FloatingArray] + klass: type[IntegerArray | BooleanArray | FloatingArray] if is_integer_dtype(data.dtype): klass = IntegerArray elif is_bool_dtype(data.dtype): diff --git a/pandas/io/common.py b/pandas/io/common.py index 6be6f3f4300e4..f255ea8197304 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -30,6 +30,7 @@ import tarfile from typing import ( IO, + TYPE_CHECKING, Any, AnyStr, DefaultDict, @@ -51,13 +52,7 @@ from pandas._typing import ( BaseBuffer, - CompressionDict, - CompressionOptions, - FilePath, - ReadBuffer, ReadCsvBuffer, - StorageOptions, - WriteBuffer, ) from pandas.compat import ( get_bz2_file, @@ -84,6 +79,19 @@ BaseBufferT = TypeVar("BaseBufferT", bound=BaseBuffer) +if TYPE_CHECKING: + from types import TracebackType + + from pandas._typing import ( + CompressionDict, + CompressionOptions, + FilePath, + ReadBuffer, + StorageOptions, + WriteBuffer, + ) + + @dataclasses.dataclass class IOArgs: """ @@ -138,7 +146,12 @@ def close(self) -> None: def __enter__(self) -> IOHandles[AnyStr]: return self - def __exit__(self, *args: Any) -> None: + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: TracebackType | None, + ) -> None: self.close() diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 7fad2b779ab28..50dee463a06eb 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -79,6 +79,7 @@ HashableT, IndexLabel, ReadCsvBuffer, + Self, StorageOptions, ) _doc_read_csv_and_table = ( @@ -1776,7 +1777,7 @@ def get_chunk(self, size: int | None = None) -> DataFrame: size = min(size, self.nrows - self._currow) return self.read(nrows=size) - def __enter__(self) -> TextFileReader: + def __enter__(self) -> Self: return self def __exit__( diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f26411f65d91f..89c3f7bbc4f84 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1731,7 +1731,7 @@ def _create_storer( errors: str = "strict", ) -> GenericFixed | Table: """return a suitable class to operate""" - cls: type[GenericFixed] | type[Table] + cls: type[GenericFixed | Table] if value is not None and not isinstance(value, (Series, DataFrame)): raise TypeError("value must be None, Series, or DataFrame") @@ -2119,7 +2119,7 @@ def __repr__(self) -> str: ] ) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """compare 2 col items""" return all( getattr(self, a, None) == getattr(other, a, None) @@ -2160,7 +2160,7 @@ def convert( if self.freq is not None: kwargs["freq"] = _ensure_decoded(self.freq) - factory: type[Index] | type[DatetimeIndex] = Index + factory: type[Index | DatetimeIndex] = Index if lib.is_np_dtype(values.dtype, "M") or isinstance( values.dtype, DatetimeTZDtype ): @@ -2426,7 +2426,7 @@ def __repr__(self) -> str: ] ) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: """compare 2 col items""" return all( getattr(self, a, None) == getattr(other, a, None) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 7fdfd214c452c..60b48bed8e124 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -3,9 +3,12 @@ """ from __future__ import annotations +from abc import ( + ABC, + abstractmethod, +) from typing import ( TYPE_CHECKING, - Protocol, overload, ) @@ -23,23 +26,26 @@ CompressionOptions, FilePath, ReadBuffer, + Self, ) from pandas import DataFrame -class ReaderBase(Protocol): +class ReaderBase(ABC): """ Protocol for XportReader and SAS7BDATReader classes. """ + @abstractmethod def read(self, nrows: int | None = None) -> DataFrame: ... + @abstractmethod def close(self) -> None: ... - def __enter__(self) -> ReaderBase: + def __enter__(self) -> Self: return self def __exit__( diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 698a2882ada39..c5648a022d4a9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -23,7 +23,6 @@ from typing import ( IO, TYPE_CHECKING, - Any, AnyStr, Callable, Final, @@ -84,6 +83,7 @@ CompressionOptions, FilePath, ReadBuffer, + Self, StorageOptions, WriteBuffer, ) @@ -935,7 +935,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"{type(self)}({self})" - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) and self.string == other.string @@ -1212,7 +1212,7 @@ def _open_file(self) -> None: self._read_header() self._setup_dtype() - def __enter__(self) -> StataReader: + def __enter__(self) -> Self: """enter context manager""" self._entered = True return self @@ -1293,7 +1293,9 @@ def _read_header(self) -> None: else: self._read_old_header(first_char) - self._has_string_data = len([x for x in self._typlist if type(x) is int]) > 0 + self._has_string_data = ( + len([x for x in self._typlist if isinstance(x, int)]) > 0 + ) # calculate size of a data record self._col_sizes = [self._calcsize(typ) for typ in self._typlist] @@ -1793,7 +1795,7 @@ def read( # Decode strings for col, typ in zip(data, self._typlist): - if type(typ) is int: + if isinstance(typ, int): data[col] = data[col].apply(self._decode) data = self._insert_strls(data) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 1446a74b29c32..1118ad88d5092 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -489,8 +489,8 @@ def test_to_dict_masked_native_python(self): # GH#34665 df = DataFrame({"a": Series([1, 2], dtype="Int64"), "B": 1}) result = df.to_dict(orient="records") - assert type(result[0]["a"]) is int + assert isinstance(result[0]["a"], int) df = DataFrame({"a": Series([1, NA], dtype="Int64"), "B": 1}) result = df.to_dict(orient="records") - assert type(result[0]["a"]) is int + assert isinstance(result[0]["a"], int) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3b0dac21ef10c..8341dda1597bb 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -324,7 +324,7 @@ def test_repr_should_return_str(self): index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] cols = ["\u03c8"] df = DataFrame(data, columns=cols, index=index1) - assert type(df.__repr__()) == str # both py2 / 3 + assert isinstance(df.__repr__(), str) def test_repr_no_backslash(self): with option_context("mode.sim_interactive", True): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 9d7cb52e3817d..f5b095675dfb8 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -547,8 +547,8 @@ def test_store_index_name_numpy_str(tmp_path, table_format, setup_path): tm.assert_frame_equal(df, df2, check_names=True) - assert type(df2.index.name) == str - assert type(df2.columns.name) == str + assert isinstance(df2.index.name, str) + assert isinstance(df2.columns.name, str) def test_store_series_name(setup_path): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index bc596fb0a3abe..1e091db21ff83 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1436,7 +1436,7 @@ def obj(self): def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace): super().test_slice_key(obj, key, expected, warn, val, indexer_sli, is_inplace) - if type(val) is float: + if isinstance(val, float): # the xfail would xpass bc test_slice_key short-circuits raise AssertionError("xfail not relevant for this test.") diff --git a/pyproject.toml b/pyproject.toml index 1034196baa15e..c28f9259c749c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -211,6 +211,7 @@ line-length = 88 target-version = "py310" fix = true unfixable = [] +typing-modules = ["pandas._typing"] select = [ # pyflakes @@ -303,16 +304,14 @@ ignore = [ "PLW0603", # Docstrings should not be included in stubs "PYI021", + # Use `typing.NamedTuple` instead of `collections.namedtuple` + "PYI024", # No builtin `eval()` allowed "PGH001", # compare-to-empty-string "PLC1901", - # Use typing_extensions.TypeAlias for type aliases - # "PYI026", # not yet implemented - # Use "collections.abc.*" instead of "typing.*" (PEP 585 syntax) - # "PYI027", # not yet implemented # while int | float can be shortened to float, the former is more explicit - # "PYI041", # not yet implemented + "PYI041", # incorrect-dict-iterator, flags valid Series.items usage "PERF102", # try-except-in-loop, becomes useless in Python 3.11