From 98fafb21b0a59fd614fff8432f57149eeaa3e6a9 Mon Sep 17 00:00:00 2001 From: samay2504 Date: Tue, 16 Dec 2025 16:53:49 +0530 Subject: [PATCH 1/2] fix: Include index coordinates in to_dataframe when name differs from dimension (Fixes #10851) --- xarray/core/dataset.py | 22 ++++++++++---- xarray/tests/test_issue_10851.py | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 xarray/tests/test_issue_10851.py diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bce048048da..a0adfe3769c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7202,11 +7202,23 @@ def to_pandas(self) -> pd.Series | pd.DataFrame: def _to_dataframe(self, ordered_dims: Mapping[Any, int]): from xarray.core.extension_array import PandasExtensionArray - - # All and only non-index arrays (whether data or coordinates) should - # become columns in the output DataFrame. Excluding indexes rather - # than dims handles the case of a MultiIndex along a single dimension. - columns_in_order = [k for k in self.variables if k not in self.xindexes] + from xarray.core.indexes import PandasIndex, PandasMultiIndex + + # All non-index variables become columns. For indexes, I excluded: + # 1. PandasMultiIndex components (A, B from a MultiIndex) + # 2. PandasIndex where name matches dim (e.g., 'x' indexing dim 'x') + # 3. Any index whose name matches a dimension + # This allows PandasIndex coords created via set_xindex with a different + # name (e.g., 'pf' indexing dim 'pos') to be included as columns. + indexes_to_exclude = set() + for name, idx in self.xindexes.items(): + if ( + isinstance(idx, PandasMultiIndex) + or (isinstance(idx, PandasIndex) and name == idx.dim) + or name in self.dims + ): + indexes_to_exclude.add(name) + columns_in_order = [k for k in self.variables if k not in indexes_to_exclude] non_extension_array_columns = [ k for k in columns_in_order diff --git a/xarray/tests/test_issue_10851.py b/xarray/tests/test_issue_10851.py new file mode 100644 index 00000000000..27888138477 --- /dev/null +++ b/xarray/tests/test_issue_10851.py @@ -0,0 +1,51 @@ +"""Test for issue #10851: Dataset Index not included in to_dataframe when name differs from dimension.""" +import numpy as np +import pandas as pd + +import xarray as xr + + +class TestToDataFrameIndexColumn: + """Tests for to_dataframe including index coordinates with different names.""" + + def test_to_dataframe_includes_index_with_different_name(self): + """Index coordinates with name different from dimension should be in columns.""" + ds_temp = xr.Dataset( + data_vars=dict(temp=(["time", "pos"], np.array([[5, 10, 15, 20, 25]]))), + coords=dict( + pf=("pos", [1.0, 2.0, 4.2, 8.0, 10.0]), + time=("time", [pd.to_datetime("2025-01-01")]), + ), + ).set_xindex("pf") + + df = ds_temp.to_dataframe() + + assert "pf" in df.columns + assert "temp" in df.columns + np.testing.assert_array_equal(df["pf"].values, [1.0, 2.0, 4.2, 8.0, 10.0]) + + def test_to_dataframe_still_excludes_matching_dim_index(self): + """Index coordinates where name matches dimension should not be in columns.""" + ds = xr.Dataset( + data_vars=dict(temp=(["x"], [1, 2, 3])), + coords=dict(x=("x", [10, 20, 30])), + ) + + df = ds.to_dataframe() + + assert "temp" in df.columns + assert "x" not in df.columns + + def test_to_dataframe_roundtrip_with_set_xindex(self): + """Dataset with set_xindex should roundtrip to DataFrame correctly.""" + ds = xr.Dataset( + data_vars=dict(val=(["dim"], [100, 200, 300])), + coords=dict(coord_idx=("dim", ["a", "b", "c"])), + ).set_xindex("coord_idx") + + df = ds.to_dataframe() + + assert "coord_idx" in df.columns + assert "val" in df.columns + assert list(df["coord_idx"]) == ["a", "b", "c"] + assert list(df["val"]) == [100, 200, 300] From 02168348f02fa590c4d629100168c4ee3bcb3b9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Dec 2025 12:30:24 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_issue_10851.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_issue_10851.py b/xarray/tests/test_issue_10851.py index 27888138477..dae48cde1d1 100644 --- a/xarray/tests/test_issue_10851.py +++ b/xarray/tests/test_issue_10851.py @@ -1,4 +1,5 @@ """Test for issue #10851: Dataset Index not included in to_dataframe when name differs from dimension.""" + import numpy as np import pandas as pd