From 12be649ded2be2819070b2588a0d5a425a5af63e Mon Sep 17 00:00:00 2001 From: Omkar Kabde Date: Thu, 19 Feb 2026 00:18:58 +0530 Subject: [PATCH] [MNT] Complete test for sparse dataset --- tests/test_datasets/test_dataset.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index b13bac30b..f534d9c9d 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -379,8 +379,7 @@ def test_get_sparse_dataset_dataframe(self): ) assert rval.shape == (600, 20001) - def test_get_sparse_dataset_rowid_and_ignore_and_target(self): - # TODO: re-add row_id and ignore attributes + def test_get_sparse_dataset_excludes_rowid(self): self.sparse_dataset.ignore_attribute = ["V256"] self.sparse_dataset.row_id_attribute = ["V512"] X, y, categorical, _ = self.sparse_dataset.get_data( @@ -397,6 +396,21 @@ def test_get_sparse_dataset_rowid_and_ignore_and_target(self): self.assertListEqual(categorical, [False] * 19998) assert y.shape == (600,) + def test_get_sparse_dataset_includes_rowid(self): + self.sparse_dataset.ignore_attribute = ["V256"] + self.sparse_dataset.row_id_attribute = ["V512"] + X, y, categorical, _ = self.sparse_dataset.get_data( + target="class", + include_row_id=True, + include_ignore_attribute=True, + ) + assert all(dtype == pd.SparseDtype(np.float32, fill_value=0.0) for dtype in X.dtypes) + assert isinstance(y.dtypes, pd.SparseDtype) + assert X.shape == (600, 20000) + assert len(categorical) == 20000 + self.assertListEqual(categorical, [False] * 20000) + assert y.shape == (600,) + def test_get_sparse_categorical_data_id_395(self): dataset = openml.datasets.get_dataset(395, download_data=True) feature = dataset.features[3758]