Skip to content
2 changes: 1 addition & 1 deletion bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def bigframes_dtype_to_ibis_dtype(
f"""
Unexpected data type {bigframes_dtype}. The following
str dtypes are supppted: 'boolean','Float64','Int64', 'string',
'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
'timestamp[us][pyarrow]','date32[day][pyarrow]',
'time64[us][pyarrow]'. The following pandas.ExtensionDtype are
supported: pandas.BooleanDtype(), pandas.Float64Dtype(),
Expand Down
38 changes: 37 additions & 1 deletion third_party/bigframes_vendored/pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,46 @@ def astype(self, dtype):
"""
Cast a pandas object to a specified dtype ``dtype``.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Create a DataFrame:

>>> d = {'col1': [1, 2], 'col2': [3, 4]}
>>> df = bpd.DataFrame(data=d)
>>> df.dtypes
col1 Int64
col2 Int64
dtype: object

Cast all columns to ``Float64``:

>>> df.astype('Float64').dtypes
col1 Float64
col2 Float64
dtype: object

Create a series of type ``Int64``:

>>> ser = bpd.Series([1, 2], dtype='Int64')
>>> ser
0 1
1 2
dtype: Int64

Convert to ``Float64`` type:

>>> ser.astype('Float64')
0 1.0
1 2.0
dtype: Float64

Args:
dtype (str or pandas.ExtensionDtype):
A dtype supported by BigQuery DataFrame include 'boolean','Float64','Int64',
'string', 'tring[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
'string', 'string[pyarrow]','timestamp[us, tz=UTC][pyarrow]',
'timestamp[us][pyarrow]','date32[day][pyarrow]','time64[us][pyarrow]'
A pandas.ExtensionDtype include pandas.BooleanDtype(), pandas.Float64Dtype(),
pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
Expand Down
192 changes: 192 additions & 0 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,95 @@ def groupby(
used to group large amounts of data and compute operations on these
groups.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

You can group by a named index level.

>>> s = bpd.Series([380, 370., 24., 26.],
... index=["Falcon", "Falcon", "Parrot", "Parrot"],
... name="Max Speed")
>>> s.index.name="Animal"
>>> s
Animal
Falcon 380.0
Falcon 370.0
Parrot 24.0
Parrot 26.0
Name: Max Speed, dtype: Float64
>>> s.groupby("Animal").mean()
Animal
Falcon 375.0
Parrot 25.0
Name: Max Speed, dtype: Float64

You can also group by more than one index levels.

>>> import pandas as pd
>>> s = bpd.Series([380, 370., 24., 26.],
... index=pd.MultiIndex.from_tuples(
... [("Falcon", "Clear"),
... ("Falcon", "Cloudy"),
... ("Parrot", "Clear"),
... ("Parrot", "Clear")],
... names=["Animal", "Sky"]),
... name="Max Speed")
>>> s
Animal Sky
Falcon Clear 380.0
Cloudy 370.0
Parrot Clear 24.0
Clear 26.0
Name: Max Speed, dtype: Float64

>>> s.groupby("Animal").mean()
Animal
Falcon 375.0
Parrot 25.0
Name: Max Speed, dtype: Float64

>>> s.groupby("Sky").mean()
Sky
Clear 143.333333
Cloudy 370.0
Name: Max Speed, dtype: Float64

>>> s.groupby(["Animal", "Sky"]).mean()
Animal Sky
Falcon Clear 380.0
Cloudy 370.0
Parrot Clear 25.0
Name: Max Speed, dtype: Float64

You can also group by values in a Series provided the index matches with
the original series.

>>> df = bpd.DataFrame({'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
... 'Max Speed': [380., 370., 24., 26.],
... 'Age': [10., 20., 4., 6.]})
>>> df
Animal Max Speed Age
0 Falcon 380.0 10.0
1 Falcon 370.0 20.0
2 Parrot 24.0 4.0
3 Parrot 26.0 6.0
<BLANKLINE>
[4 rows x 3 columns]

>>> df['Max Speed'].groupby(df['Animal']).mean()
Animal
Falcon 375.0
Parrot 25.0
Name: Max Speed, dtype: Float64

>>> df['Age'].groupby(df['Animal']).max()
Animal
Falcon 20.0
Parrot 6.0
Name: Age, dtype: Float64

Args:
by (mapping, function, label, pd.Grouper or list of such, default None):
Used to determine the groups for the groupby.
Expand Down Expand Up @@ -1661,6 +1750,31 @@ def max(
If you want the index of the maximum, use ``idxmax``. This is the equivalent
of the ``numpy.ndarray`` method ``argmax``.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Calculating the max of a Series:

>>> s = bpd.Series([1, 3])
>>> s
0 1
1 3
dtype: Int64
>>> s.max()
3

Calculating the max of a Series containing ``NA`` values:

>>> s = bpd.Series([1, 3, bpd.NA])
>>> s
0 1.0
1 3.0
2 <NA>
dtype: Float64
>>> s.max()
3.0

Returns:
scalar: Scalar.
Expand All @@ -1676,6 +1790,32 @@ def min(
If you want the index of the minimum, use ``idxmin``. This is the equivalent
of the ``numpy.ndarray`` method ``argmin``.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Calculating the min of a Series:

>>> s = bpd.Series([1, 3])
>>> s
0 1
1 3
dtype: Int64
>>> s.min()
1

Calculating the min of a Series containing ``NA`` values:

>>> s = bpd.Series([1, 3, bpd.NA])
>>> s
0 1.0
1 3.0
2 <NA>
dtype: Float64
>>> s.min()
1.0

Returns:
scalar: Scalar.
"""
Expand Down Expand Up @@ -1714,6 +1854,32 @@ def sum(self):

This is equivalent to the method ``numpy.sum``.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Calculating the sum of a Series:

>>> s = bpd.Series([1, 3])
>>> s
0 1
1 3
dtype: Int64
>>> s.sum()
4

Calculating the sum of a Series containing ``NA`` values:

>>> s = bpd.Series([1, 3, bpd.NA])
>>> s
0 1.0
1 3.0
2 <NA>
dtype: Float64
>>> s.sum()
4.0

Returns:
scalar: Scalar.
"""
Expand All @@ -1722,6 +1888,32 @@ def sum(self):
def mean(self):
"""Return the mean of the values over the requested axis.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

Calculating the mean of a Series:

>>> s = bpd.Series([1, 3])
>>> s
0 1
1 3
dtype: Int64
>>> s.mean()
2.0

Calculating the mean of a Series containing ``NA`` values:

>>> s = bpd.Series([1, 3, bpd.NA])
>>> s
0 1.0
1 3.0
2 <NA>
dtype: Float64
>>> s.mean()
2.0

Returns:
scalar: Scalar.
"""
Expand Down