Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`)
- Fixed performance regression in :func:`read_csv` (:issue:`44106`)
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
-

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
# i.e. all-bool Categorical, BooleanArray
try:
return np.asarray(values).astype("uint8", copy=False)
except TypeError:
except (TypeError, ValueError):
# GH#42107 we have pd.NAs present
return np.asarray(values)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/series/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

from pandas import (
NA,
Categorical,
Series,
)
Expand Down Expand Up @@ -224,6 +225,20 @@ def test_drop_duplicates_categorical_bool(self, ordered):
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])

def test_drop_duplicates_categorical_bool_na(self):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, NA], categories=[True, False], ordered=True
)
)
result = ser.drop_duplicates()
expected = Series(
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be a bug as this is not preserving NA (but unrelated / not a regression, so pls open a new issue)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, #44405

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test case is actually a bit misleading. This regression was just about a boolean categorical with missing values, not specifically with NA (also if you create the data with np.nan, you had the same issue)

index=[0, 1, 4],
)
tm.assert_series_equal(result, expected)


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/series/methods/test_duplicated.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import numpy as np
import pytest

from pandas import Series
from pandas import (
NA,
Categorical,
Series,
)
import pandas._testing as tm


Expand Down Expand Up @@ -33,3 +37,15 @@ def test_duplicated_nan_none(keep, expected):

result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)


def test_duplicated_categorical_bool_na():
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, NA], categories=[True, False], ordered=True
)
)
result = ser.duplicated()
expected = Series([False, False, True, True, False])
tm.assert_series_equal(result, expected)