@@ -582,6 +582,101 @@ def test_dataframe_groupby_nonnumeric_with_mean():
582582 )
583583
584584
585+ @pytest .mark .parametrize (
586+ ("subset" , "normalize" , "ascending" , "dropna" , "as_index" ),
587+ [
588+ (None , True , True , True , True ),
589+ (["int64_too" , "int64_col" ], False , False , False , False ),
590+ ],
591+ )
592+ def test_dataframe_groupby_value_counts (
593+ scalars_df_index ,
594+ scalars_pandas_df_index ,
595+ subset ,
596+ normalize ,
597+ ascending ,
598+ dropna ,
599+ as_index ,
600+ ):
601+ if pd .__version__ .startswith ("1." ):
602+ pytest .skip ("pandas 1.x produces different column labels." )
603+ col_names = ["float64_col" , "int64_col" , "bool_col" , "int64_too" ]
604+ bf_result = (
605+ scalars_df_index [col_names ]
606+ .groupby ("bool_col" , as_index = as_index )
607+ .value_counts (
608+ subset = subset , normalize = normalize , ascending = ascending , dropna = dropna
609+ )
610+ .to_pandas ()
611+ )
612+ pd_result = (
613+ scalars_pandas_df_index [col_names ]
614+ .groupby ("bool_col" , as_index = as_index )
615+ .value_counts (
616+ subset = subset , normalize = normalize , ascending = ascending , dropna = dropna
617+ )
618+ )
619+
620+ if as_index :
621+ pd .testing .assert_series_equal (pd_result , bf_result , check_dtype = False )
622+ else :
623+ pd_result .index = pd_result .index .astype ("Int64" )
624+ pd .testing .assert_frame_equal (pd_result , bf_result , check_dtype = False )
625+
626+
627+ @pytest .mark .parametrize (
628+ ("numeric_only" , "min_count" ),
629+ [
630+ (False , 4 ),
631+ (True , 0 ),
632+ ],
633+ )
634+ def test_dataframe_groupby_first (
635+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
636+ ):
637+ # min_count seems to not work properly on older pandas
638+ pytest .importorskip ("pandas" , minversion = "2.0.0" )
639+ # bytes, dates not handling min_count properly in pandas
640+ bf_result = (
641+ scalars_df_index .drop (columns = ["bytes_col" , "date_col" ])
642+ .groupby (scalars_df_index .int64_col % 2 )
643+ .first (numeric_only = numeric_only , min_count = min_count )
644+ ).to_pandas ()
645+ pd_result = (
646+ scalars_pandas_df_index .drop (columns = ["bytes_col" , "date_col" ])
647+ .groupby (scalars_pandas_df_index .int64_col % 2 )
648+ .first (numeric_only = numeric_only , min_count = min_count )
649+ )
650+ pd .testing .assert_frame_equal (
651+ pd_result ,
652+ bf_result ,
653+ )
654+
655+
656+ @pytest .mark .parametrize (
657+ ("numeric_only" , "min_count" ),
658+ [
659+ (True , 2 ),
660+ (False , - 1 ),
661+ ],
662+ )
663+ def test_dataframe_groupby_last (
664+ scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
665+ ):
666+ bf_result = (
667+ scalars_df_index .groupby (scalars_df_index .int64_col % 2 ).last (
668+ numeric_only = numeric_only , min_count = min_count
669+ )
670+ ).to_pandas ()
671+ pd_result = scalars_pandas_df_index .groupby (
672+ scalars_pandas_df_index .int64_col % 2
673+ ).last (numeric_only = numeric_only , min_count = min_count )
674+ pd .testing .assert_frame_equal (
675+ pd_result ,
676+ bf_result ,
677+ )
678+
679+
585680# ==============
586681# Series.groupby
587682# ==============
@@ -770,6 +865,41 @@ def test_series_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q):
770865 )
771866
772867
868+ @pytest .mark .parametrize (
869+ ("normalize" , "ascending" , "dropna" ),
870+ [
871+ (
872+ True ,
873+ True ,
874+ True ,
875+ ),
876+ (
877+ False ,
878+ False ,
879+ False ,
880+ ),
881+ ],
882+ )
883+ def test_series_groupby_value_counts (
884+ scalars_df_index ,
885+ scalars_pandas_df_index ,
886+ normalize ,
887+ ascending ,
888+ dropna ,
889+ ):
890+ if pd .__version__ .startswith ("1." ):
891+ pytest .skip ("pandas 1.x produces different column labels." )
892+ bf_result = (
893+ scalars_df_index .groupby ("bool_col" )["string_col" ]
894+ .value_counts (normalize = normalize , ascending = ascending , dropna = dropna )
895+ .to_pandas ()
896+ )
897+ pd_result = scalars_pandas_df_index .groupby ("bool_col" )["string_col" ].value_counts (
898+ normalize = normalize , ascending = ascending , dropna = dropna
899+ )
900+ pd .testing .assert_series_equal (pd_result , bf_result , check_dtype = False )
901+
902+
773903@pytest .mark .parametrize (
774904 ("numeric_only" , "min_count" ),
775905 [
@@ -813,56 +943,3 @@ def test_series_groupby_last(
813943 numeric_only = numeric_only , min_count = min_count
814944 )
815945 pd .testing .assert_series_equal (pd_result , bf_result )
816-
817-
818- @pytest .mark .parametrize (
819- ("numeric_only" , "min_count" ),
820- [
821- (False , 4 ),
822- (True , 0 ),
823- ],
824- )
825- def test_dataframe_groupby_first (
826- scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
827- ):
828- # min_count seems to not work properly on older pandas
829- pytest .importorskip ("pandas" , minversion = "2.0.0" )
830- # bytes, dates not handling min_count properly in pandas
831- bf_result = (
832- scalars_df_index .drop (columns = ["bytes_col" , "date_col" ])
833- .groupby (scalars_df_index .int64_col % 2 )
834- .first (numeric_only = numeric_only , min_count = min_count )
835- ).to_pandas ()
836- pd_result = (
837- scalars_pandas_df_index .drop (columns = ["bytes_col" , "date_col" ])
838- .groupby (scalars_pandas_df_index .int64_col % 2 )
839- .first (numeric_only = numeric_only , min_count = min_count )
840- )
841- pd .testing .assert_frame_equal (
842- pd_result ,
843- bf_result ,
844- )
845-
846-
847- @pytest .mark .parametrize (
848- ("numeric_only" , "min_count" ),
849- [
850- (True , 2 ),
851- (False , - 1 ),
852- ],
853- )
854- def test_dataframe_groupby_last (
855- scalars_df_index , scalars_pandas_df_index , numeric_only , min_count
856- ):
857- bf_result = (
858- scalars_df_index .groupby (scalars_df_index .int64_col % 2 ).last (
859- numeric_only = numeric_only , min_count = min_count
860- )
861- ).to_pandas ()
862- pd_result = scalars_pandas_df_index .groupby (
863- scalars_pandas_df_index .int64_col % 2
864- ).last (numeric_only = numeric_only , min_count = min_count )
865- pd .testing .assert_frame_equal (
866- pd_result ,
867- bf_result ,
868- )
0 commit comments