Skip to content

Commit 21eb213

Browse files
authored
feat: support pd.cut() for array-like type (#2064)
Fixes internal issue 329866195
1 parent b3cf824 commit 21eb213

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

bigframes/core/reshape/tile.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
2121
import pandas as pd
2222

23+
import bigframes
2324
import bigframes.constants
2425
import bigframes.core.expression as ex
2526
import bigframes.core.ordering as order
@@ -32,7 +33,7 @@
3233

3334

3435
def cut(
35-
x: bigframes.series.Series,
36+
x,
3637
bins: typing.Union[
3738
int,
3839
pd.IntervalIndex,
@@ -60,9 +61,12 @@ def cut(
6061
f"but found {type(list(labels)[0])}. {constants.FEEDBACK_LINK}"
6162
)
6263

63-
if x.size == 0:
64+
if len(x) == 0:
6465
raise ValueError("Cannot cut empty array.")
6566

67+
if not isinstance(x, bigframes.series.Series):
68+
x = bigframes.series.Series(x)
69+
6670
if isinstance(bins, int):
6771
if bins <= 0:
6872
raise ValueError("`bins` should be a positive integer.")

tests/system/small/test_pandas.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,18 @@ def _convert_pandas_category(pd_s: pd.Series):
520520
)
521521

522522

523+
def test_cut_for_array():
524+
"""Avoid regressions for internal issue 329866195"""
525+
sc = [30, 80, 40, 90, 60, 45, 95, 75, 55, 100, 65, 85]
526+
x = [20, 40, 60, 80, 100]
527+
528+
pd_result: pd.Series = pd.Series(pd.cut(sc, x))
529+
bf_result = bpd.cut(sc, x)
530+
531+
pd_result = _convert_pandas_category(pd_result)
532+
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)
533+
534+
523535
@pytest.mark.parametrize(
524536
("right", "labels"),
525537
[

tests/unit/test_pandas.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def test_method_matches_session(method_name: str):
122122
)
123123
def test_cut_raises_with_invalid_labels(bins: int, labels, error_message: str):
124124
mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
125+
mock_series.__len__.return_value = 5
125126
with pytest.raises(ValueError, match=error_message):
126127
bigframes.pandas.cut(mock_series, bins, labels=labels)
127128

@@ -160,6 +161,8 @@ def test_cut_raises_with_unsupported_labels():
160161
)
161162
def test_cut_raises_with_invalid_bins(bins: int, error_message: str):
162163
mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
164+
mock_series.__len__.return_value = 5
165+
163166
with pytest.raises(ValueError, match=error_message):
164167
bigframes.pandas.cut(mock_series, bins, labels=False)
165168

third_party/bigframes_vendored/pandas/core/reshape/tile.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
import pandas as pd
1010

11-
from bigframes import constants, series
11+
from bigframes import constants
1212

1313

1414
def cut(
15-
x: series.Series,
15+
x,
1616
bins: typing.Union[
1717
int,
1818
pd.IntervalIndex,
@@ -113,7 +113,7 @@ def cut(
113113
dtype: struct<left_inclusive: int64, right_exclusive: int64>[pyarrow]
114114
115115
Args:
116-
x (bigframes.pandas.Series):
116+
x (array-like):
117117
The input Series to be binned. Must be 1-dimensional.
118118
bins (int, pd.IntervalIndex, Iterable):
119119
The criteria to bin by.

0 commit comments

Comments
 (0)