Skip to content

Commit

Permalink
FIX-modin-project#5091: Handle pd.Grouper objects correctly
Browse files Browse the repository at this point in the history
Signed-off-by: Karthik Velayutham <[email protected]>
  • Loading branch information
Karthik Velayutham committed May 19, 2023
1 parent 89e67ab commit f39e496
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 13 deletions.
7 changes: 6 additions & 1 deletion modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3114,7 +3114,12 @@ def _groupby_internal_columns(self, by, drop):
else:
if not isinstance(by, list):
by = [by] if by is not None else []
internal_by = [o for o in by if hashable(o) and o in self.columns]
internal_by = []
for o in by:
if isinstance(o, pandas.Grouper):
internal_by.append(o.key)
elif hashable(o) and o in self.columns:
internal_by.append(o)
internal_qc = (
[self.getitem_column_array(internal_by)] if len(internal_by) else []
)
Expand Down
27 changes: 16 additions & 11 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,29 +482,34 @@ def groupby(
(
(hashable(o) and (o in self))
or isinstance(o, Series)
or (isinstance(o, pandas.Grouper) and o.key in self)
or (is_list_like(o) and len(o) == len(self.axes[axis]))
)
for o in by
):
# We want to split 'by's into those that belongs to the self (internal_by)
# and those that doesn't (external_by)
internal_by, external_by = [], []
has_external = False
processed_by = []

for current_by in by:
if hashable(current_by):
internal_by.append(current_by)
if isinstance(current_by, pandas.Grouper):
processed_by.append(current_by)
has_external = True
elif hashable(current_by):
processed_by.append(current_by)
elif isinstance(current_by, Series):
if current_by._parent is self:
internal_by.append(current_by.name)
processed_by.append(current_by.name)
else:
external_by.append(current_by._query_compiler)
processed_by.append(current_by._query_compiler)
has_external = True
else:
external_by.append(current_by)
has_external = True
processed_by.append(current_by)

by = internal_by + external_by
by = processed_by

if len(external_by) == 0:
by = self[internal_by]._query_compiler
if not has_external:
by = self[processed_by]._query_compiler

drop = True
else:
Expand Down
8 changes: 7 additions & 1 deletion modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,13 @@ def _internal_by(self):
internal_by = tuple()
if self._drop:
if is_list_like(self._by):
internal_by = tuple(by for by in self._by if isinstance(by, str))
internal_by_list = []
for by in self._by:
if isinstance(by, str):
internal_by_list.append(by)
elif isinstance(by, pandas.Grouper):
internal_by_list.append(by.key)
internal_by = tuple(internal_by_list)
else:
ErrorMessage.catch_bugs_and_request_email(
failure_condition=not isinstance(self._by, BaseQueryCompiler),
Expand Down

0 comments on commit f39e496

Please sign in to comment.