From 746c7605f23cff56a4fb0aa41f2f6f9801d19738 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Thu, 11 Dec 2025 09:11:03 +0100 Subject: [PATCH 1/3] setitem with none --- pandas-stubs/_typing.pyi | 2 + pandas-stubs/core/frame.pyi | 192 +++++++--------------------------- tests/frame/test_indexing.py | 22 ++-- tests/series/test_indexing.py | 4 + 4 files changed, 56 insertions(+), 164 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 343e8831e..58e346299 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -52,6 +52,7 @@ from pandas._libs.tslibs import ( Timedelta, Timestamp, ) +from pandas._libs.tslibs.nattype import NaTType from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -134,6 +135,7 @@ _IndexIterScalar: TypeAlias = ( Scalar: TypeAlias = ( _IndexIterScalar | complex | np.integer | np.floating | np.complexfloating ) +ScalarOrNA: TypeAlias = Scalar | NAType | NaTType | None IntStrT = TypeVar("IntStrT", int, str) # timestamp and timedelta convertible types diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index c45da47c1..344c57580 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -81,7 +81,6 @@ import xarray as xr from pandas._libs.lib import _NoDefaultDoNotUse from pandas._libs.missing import NAType from pandas._libs.tslibs import BaseOffset -from pandas._libs.tslibs.nattype import NaTType from pandas._typing import ( S2, AggFuncTypeBase, @@ -146,6 +145,7 @@ from pandas._typing import ( Renamer, ReplaceValue, Scalar, + ScalarOrNA, ScalarT, SequenceNotStr, SeriesByT, @@ -181,6 +181,26 @@ _T_MUTABLE_MAPPING_co = TypeVar( "_T_MUTABLE_MAPPING_co", bound=MutableMapping, covariant=True ) +_iLocSetItemKey: TypeAlias = ( + int + | IndexType + | tuple[int, int] + | tuple[IndexType, int] + | tuple[IndexType, IndexType] + | tuple[int, IndexType] +) +_LocSetItemKey: TypeAlias = ( + MaskType | Hashable | _IndexSliceTuple | Iterable[Scalar] | IndexingInt | slice +) +_SetItemValueNotDataFrame: TypeAlias = ( + ScalarOrNA + | Sequence[ScalarOrNA] + | Sequence[Sequence[ScalarOrNA]] + | Mapping[Any, ScalarOrNA] + | ArrayLike + | IndexOpsMixin +) + class _iLocIndexerFrame(_iLocIndexer, Generic[_T]): @overload def __getitem__(self, key: tuple[int, int]) -> Scalar: ... @@ -203,26 +223,7 @@ class _iLocIndexerFrame(_iLocIndexer, Generic[_T]): # Keep in sync with `DataFrame.__setitem__` def __setitem__( - self, - key: ( - int - | IndexType - | tuple[int, int] - | tuple[IndexType, int] - | tuple[IndexType, IndexType] - | tuple[int, IndexType] - ), - value: ( - Scalar - | IndexOpsMixin - | Sequence[Scalar] - | DataFrame - | np_ndarray - | NAType - | NaTType - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, key: _iLocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... class _LocIndexerFrame(_LocIndexer, Generic[_T]): @@ -283,52 +284,16 @@ class _LocIndexerFrame(_LocIndexer, Generic[_T]): # Keep in sync with `DataFrame.__setitem__` @overload def __setitem__( - self, - key: tuple[_IndexSliceTuple, Hashable], - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, key: tuple[_IndexSliceTuple, Hashable], value: _SetItemValueNotDataFrame ) -> None: ... @overload def __setitem__( - self, - key: ( - MaskType - | Hashable - | _IndexSliceTuple - | Iterable[Scalar] - | IndexingInt - | slice - ), - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | DataFrame - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, key: _LocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... class _iAtIndexerFrame(_iAtIndexer): def __getitem__(self, key: tuple[int, int]) -> Scalar: ... - def __setitem__( - self, - key: tuple[int, int], - value: Scalar | NAType | NaTType | None, - ) -> None: ... + def __setitem__(self, key: tuple[int, int], value: ScalarOrNA) -> None: ... class _AtIndexerFrame(_AtIndexer): def __getitem__( @@ -347,42 +312,26 @@ class _AtIndexerFrame(_AtIndexer): key: ( MaskType | StrLike | _IndexSliceTuple | list[ScalarT] | IndexingInt | slice ), - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | DataFrame - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + value: _SetItemValueNotDataFrame | DataFrame, ) -> None: ... -# With mypy 1.14.1 and python 3.12, the second overload needs a type-ignore statement -if sys.version_info >= (3, 12): - class _GetItemHack: - @overload - def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] +# With python 3.12+, the second overload needs a type-ignore statement +class _GetItemHack: + @overload + def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + if sys.version_info >= (3, 12): @overload def __getitem__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] self, key: Iterable[Hashable] | slice ) -> Self: ... - @overload - def __getitem__(self, key: Hashable) -> Series: ... - -else: - class _GetItemHack: - @overload - def __getitem__(self, key: Scalar | tuple[Hashable, ...]) -> Series: ... # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + else: @overload def __getitem__( # pyright: ignore[reportOverlappingOverload] self, key: Iterable[Hashable] | slice ) -> Self: ... - @overload - def __getitem__(self, key: Hashable) -> Series: ... + + @overload + def __getitem__(self, key: Hashable) -> Series: ... _AstypeArgExt: TypeAlias = ( AstypeArg @@ -829,85 +778,22 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): # Keep in sync with `_iLocIndexerFrame.__setitem__` @overload def __setitem__( - self, - idx: ( - int - | IndexType - | tuple[int, int] - | tuple[IndexType, int] - | tuple[IndexType, IndexType] - | tuple[int, IndexType] - ), - value: ( - Scalar - | IndexOpsMixin - | Sequence[Scalar] - | DataFrame - | np_ndarray - | NAType - | NaTType - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, idx: _iLocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... # Keep in sync with `_LocIndexerFrame.__setitem__` @overload def __setitem__( - self, - idx: tuple[_IndexSliceTuple, Hashable], - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, idx: tuple[_IndexSliceTuple, Hashable], value: _SetItemValueNotDataFrame ) -> None: ... @overload def __setitem__( - self, - idx: ( - MaskType - | Hashable - | _IndexSliceTuple - | Iterable[Scalar] - | IndexingInt - | slice - ), - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | DataFrame - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, idx: _LocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... # Extra cases not supported by `_LocIndexerFrame.__setitem__` / # `_iLocIndexerFrame.__setitem__`. @overload def __setitem__( - self, - idx: IndexOpsMixin | DataFrame, - value: ( - Scalar - | NAType - | NaTType - | ArrayLike - | IndexOpsMixin - | Sequence[Scalar] - | Sequence[Sequence[Scalar]] - | Mapping[Hashable, Scalar | NAType | NaTType] - | None - ), + self, idx: IndexOpsMixin | DataFrame, value: _SetItemValueNotDataFrame ) -> None: ... @overload def query( diff --git a/tests/frame/test_indexing.py b/tests/frame/test_indexing.py index 249ddd127..16b8d9b5a 100644 --- a/tests/frame/test_indexing.py +++ b/tests/frame/test_indexing.py @@ -365,17 +365,6 @@ def test_isetframe() -> None: check(assert_type(frame.isetitem([0], [10, 12]), None), type(None)) -def test_setitem_none() -> None: - df = pd.DataFrame( - {"A": [1, 2, 3], "B": ["abc", "def", "ghi"]}, index=["x", "y", "z"] - ) - df.loc["x", "B"] = None - df.iloc[2, 0] = None - sb = pd.Series([1, 2, 3], dtype=int) - sb.loc["y"] = None - sb.iloc[0] = None - - def test_getsetitem_multiindex() -> None: # GH 466 rows = pd.Index(["project A", "project B", "project C"]) @@ -422,6 +411,14 @@ def test_frame_setitem_na() -> None: df.loc[ind, :] = pd.NaT df.iloc[[0, 2], :] = pd.NaT + df.loc["a", "x"] = None + df.iloc[2, 0] = None + + df.loc[:, "x"] = [None, pd.NA, pd.NaT] + df.iloc[:, 0] = [None, pd.NA, pd.NaT] + df.loc[:, ["x"]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] + df.iloc[:, [0]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment] + def test_loc_set() -> None: df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) @@ -571,6 +568,9 @@ def test_df_loc_dict() -> None: df.iloc[0] = {"X": 0} check(assert_type(df, pd.DataFrame), pd.DataFrame) + df.loc[0] = {None: None, pd.NA: pd.NA, pd.NaT: pd.NaT} + df.iloc[0] = {None: None, pd.NA: pd.NA, pd.NaT: pd.NaT} + def test_iloc_npint() -> None: # GH 69 diff --git a/tests/series/test_indexing.py b/tests/series/test_indexing.py index ddef397ed..48cd9f0c4 100644 --- a/tests/series/test_indexing.py +++ b/tests/series/test_indexing.py @@ -220,6 +220,10 @@ def test_series_setitem_na() -> None: s2.loc[ind] = pd.NaT s2.iloc[[0, 2]] = pd.NaT + sb = pd.Series([1, 2, 3], dtype=int) + sb.loc["y"] = None + sb.iloc[0] = None + def test_slice_timestamp() -> None: dti = pd.date_range("1/1/2025", "2/28/2025") From 7308eb9978128e04b07e01b4306a99d91bca10ac Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Mon, 15 Dec 2025 13:22:54 +0100 Subject: [PATCH 2/3] more tests and typings --- pandas-stubs/core/frame.pyi | 9 +++++++++ tests/frame/test_indexing.py | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 344c57580..4256a1fc2 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -222,6 +222,11 @@ class _iLocIndexerFrame(_iLocIndexer, Generic[_T]): ) -> _T: ... # Keep in sync with `DataFrame.__setitem__` + @overload + def __setitem__( + self, key: tuple[slice, Hashable], value: _SetItemValueNotDataFrame + ) -> None: ... + @overload def __setitem__( self, key: _iLocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... @@ -777,6 +782,10 @@ class DataFrame(NDFrame, OpsMixin, _GetItemHack): # Keep in sync with `_iLocIndexerFrame.__setitem__` @overload + def __setitem__( + self, idex: tuple[slice, Hashable], value: _SetItemValueNotDataFrame + ) -> None: ... + @overload def __setitem__( self, idx: _iLocSetItemKey, value: _SetItemValueNotDataFrame | DataFrame ) -> None: ... diff --git a/tests/frame/test_indexing.py b/tests/frame/test_indexing.py index 16b8d9b5a..c32280832 100644 --- a/tests/frame/test_indexing.py +++ b/tests/frame/test_indexing.py @@ -90,6 +90,10 @@ def test_types_setitem() -> None: df[a] = [[1, 2], [3, 4]] df[i] = [8, 9] + df["col1"] = [None, pd.NaT] + df[["col1"]] = [[None], [pd.NA]] # type: ignore[assignment,list-item] + df[iter(["col1"])] = [[None], [pd.NA]] # type: ignore[assignment] + def test_types_setitem_mask() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) @@ -404,20 +408,30 @@ def test_frame_setitem_na() -> None: df.loc[ind, :] = pd.NA df.iloc[[0, 2], :] = pd.NA + df.at["a", "x"] = pd.NA + df.iat[0, 0] = pd.NA # reveal_type(df["y"]) gives Series[Any], so we have to cast to tell the # type checker what kind of type it is when adding to a Timedelta df["x"] = cast("pd.Series[pd.Timestamp]", df["y"]) + pd.Timedelta(days=3) df.loc[ind, :] = pd.NaT df.iloc[[0, 2], :] = pd.NaT + df.at["a", "y"] = pd.NaT + df.iat[0, 0] = pd.NaT df.loc["a", "x"] = None df.iloc[2, 0] = None + df.at["a", "y"] = None + df.iat[0, 0] = None df.loc[:, "x"] = [None, pd.NA, pd.NaT] df.iloc[:, 0] = [None, pd.NA, pd.NaT] + df.loc[:, ["x"]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] - df.iloc[:, [0]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment] + df.iloc[:, [0]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] + + df.loc[:, iter(["x"])] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] + df.iloc[:, iter([0])] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] def test_loc_set() -> None: From 4682c1a7513e66e8e70533b0fa74575dc0c61fca Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Tue, 16 Dec 2025 09:25:50 +0100 Subject: [PATCH 3/3] python/mypy#20420 https://github.com/pandas-dev/pandas-stubs/pull/1550#discussion_r2620686338 --- tests/frame/test_indexing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/frame/test_indexing.py b/tests/frame/test_indexing.py index c32280832..01503a6b3 100644 --- a/tests/frame/test_indexing.py +++ b/tests/frame/test_indexing.py @@ -91,6 +91,7 @@ def test_types_setitem() -> None: df[i] = [8, 9] df["col1"] = [None, pd.NaT] + # TODO: mypy bug, remove after python/mypy#20420 has been resolved df[["col1"]] = [[None], [pd.NA]] # type: ignore[assignment,list-item] df[iter(["col1"])] = [[None], [pd.NA]] # type: ignore[assignment] @@ -427,9 +428,11 @@ def test_frame_setitem_na() -> None: df.loc[:, "x"] = [None, pd.NA, pd.NaT] df.iloc[:, 0] = [None, pd.NA, pd.NaT] + # TODO: mypy bug, remove after python/mypy#20420 has been resolved df.loc[:, ["x"]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] df.iloc[:, [0]] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] + # TODO: mypy bug, remove after python/mypy#20420 has been resolved df.loc[:, iter(["x"])] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index] df.iloc[:, iter([0])] = [[None], [pd.NA], [pd.NaT]] # type: ignore[assignment,index]