@@ -305,15 +305,17 @@ def aggregate(
305305 aggregations : typing .Sequence [typing .Tuple [ex .Aggregation , str ]],
306306 by_column_ids : typing .Sequence [str ] = (),
307307 dropna : bool = True ,
308- ) -> UnorderedIR :
308+ ) -> OrderedIR :
309309 """
310310 Apply aggregations to the expression.
311311 Arguments:
312312 aggregations: input_column_id, operation, output_column_id tuples
313- by_column_id: column id of the aggregation key, this is preserved through the transform
313+ by_column_id: column id of the aggregation key, this is preserved through
314+ the transform
314315 dropna: whether null keys should be dropped
315316 Returns:
316- UnorderedIR
317+ OrderedIR: the grouping key is a unique-valued column and has ordering
318+ information.
317319 """
318320 table = self ._to_ibis_expr ()
319321 bindings = {col : table [col ] for col in self .column_ids }
@@ -323,18 +325,32 @@ def aggregate(
323325 }
324326 if by_column_ids :
325327 result = table .group_by (by_column_ids ).aggregate (** stats )
328+ # Must have deterministic ordering, so order by the unique "by" column
329+ ordering = ExpressionOrdering (
330+ tuple ([ascending_over (column_id ) for column_id in by_column_ids ]),
331+ total_ordering_columns = frozenset (by_column_ids ),
332+ )
326333 columns = tuple (result [key ] for key in result .columns )
327- expr = UnorderedIR (result , columns = columns )
334+ expr = OrderedIR (result , columns = columns , ordering = ordering )
328335 if dropna :
329336 for column_id in by_column_ids :
330337 expr = expr ._filter (expr ._get_ibis_column (column_id ).notnull ())
331338 return expr
332339 else :
333340 aggregates = {** stats , ORDER_ID_COLUMN : ibis_types .literal (0 )}
334341 result = table .aggregate (** aggregates )
342+ # Ordering is irrelevant for single-row output, but set ordering id regardless
343+ # as other ops(join etc.) expect it.
344+ # TODO: Maybe can make completely empty
345+ ordering = ExpressionOrdering (
346+ ordering_value_columns = tuple ([]),
347+ total_ordering_columns = frozenset ([]),
348+ )
335349 return UnorderedIR (
336350 result ,
337351 columns = [result [col_id ] for col_id in [* stats .keys ()]],
352+ hidden_ordering_columns = [result [ORDER_ID_COLUMN ]],
353+ ordering = ordering ,
338354 )
339355
340356 def _uniform_sampling (self , fraction : float ) -> UnorderedIR :
@@ -523,7 +539,8 @@ def from_pandas(
523539 """
524540 Builds an in-memory only (SQL only) expr from a pandas dataframe.
525541
526- Assumed that the dataframe has unique string column names and bigframes-suppported dtypes.
542+ Assumed that the dataframe has unique string column names and bigframes-suppported
543+ dtypes.
527544 """
528545
529546 # ibis memtable cannot handle NA, must convert to None
@@ -560,7 +577,8 @@ def _hidden_column_ids(self) -> typing.Sequence[str]:
560577
561578 @property
562579 def _ibis_order (self ) -> Sequence [ibis_types .Value ]:
563- """Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
580+ """Returns a sequence of ibis values which can be directly used to order a
581+ table expression. Has direction modifiers applied."""
564582 return _convert_ordering_to_table_values (
565583 {** self ._column_names , ** self ._hidden_ordering_column_names },
566584 self ._ordering .all_ordering_columns ,
@@ -602,7 +620,8 @@ def aggregate(
602620 Apply aggregations to the expression.
603621 Arguments:
604622 aggregations: input_column_id, operation, output_column_id tuples
605- by_column_id: column id of the aggregation key, this is preserved through the transform
623+ by_column_id: column id of the aggregation key, this is preserved through
624+ the transform
606625 dropna: whether null keys should be dropped
607626 Returns:
608627 OrderedIR
0 commit comments