Skip to content

Commit

Permalink
Revert "PERF: Remove columnarize [upstream] (#23)" (#44)
Browse files Browse the repository at this point in the history
This reverts commit abf176c.

It's no longer useful for performance.
  • Loading branch information
mvashishtha authored May 17, 2023
1 parent dfa2746 commit 941a49b
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 57 deletions.
21 changes: 21 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,27 @@ def transpose(self, *args, **kwargs): # noqa: PR02
self, *args, **kwargs
)

def columnarize(self):
"""
Transpose this QueryCompiler if it has a single row but multiple columns.
This method should be called for QueryCompilers representing a Series object,
i.e. ``self.is_series_like()`` should be True.
Returns
-------
BaseQueryCompiler
Transposed new QueryCompiler or self.
"""
if self._shape_hint == "column":
return self

if len(self.columns) != 1 or (
len(self.index) == 1 and self.index[0] == MODIN_UNNAMED_SERIES_LABEL
):
return self.transpose()
return self

def is_series_like(self):
"""
Check whether this QueryCompiler can represent ``modin.pandas.Series`` object.
Expand Down
85 changes: 43 additions & 42 deletions modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,11 +769,10 @@ def all(
"level > 0 or level < -1 only valid with MultiIndex"
)
return self.groupby(level=level, axis=axis, sort=False).all(**kwargs)
compiler = self._query_compiler.all(
axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
)
return self._reduce_dimension(
compiler.transpose() if axis == 0 else compiler
self._query_compiler.all(
axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
)
)
else:
if bool_only:
Expand All @@ -791,7 +790,7 @@ def all(
skipna=skipna,
level=level,
**kwargs,
).transpose()
)
)
if isinstance(result, BasePandasDataset):
return result.all(
Expand Down Expand Up @@ -833,11 +832,10 @@ def any(
"level > 0 or level < -1 only valid with MultiIndex"
)
return self.groupby(level=level, axis=axis, sort=False).any(**kwargs)
compiler = self._query_compiler.any(
axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
)
return self._reduce_dimension(
compiler.transpose() if axis == 0 else compiler
self._query_compiler.any(
axis=axis, bool_only=bool_only, skipna=skipna, level=level, **kwargs
)
)
else:
if bool_only:
Expand All @@ -853,7 +851,7 @@ def any(
skipna=skipna,
level=level,
**kwargs,
).transpose()
)
)
if isinstance(result, BasePandasDataset):
return result.any(
Expand Down Expand Up @@ -1178,10 +1176,11 @@ def count(self, axis=0, level=None, numeric_only=False): # noqa: PR01, RT01, D2
if not frame._query_compiler.has_multiindex(axis=axis):
raise TypeError("Can only count levels on hierarchical columns.")
return frame.groupby(level=level, axis=axis, sort=True).count()
compiler = frame._query_compiler.count(
axis=axis, level=level, numeric_only=numeric_only
return frame._reduce_dimension(
frame._query_compiler.count(
axis=axis, level=level, numeric_only=numeric_only
)
)
return frame._reduce_dimension(compiler.transpose() if axis == 0 else compiler)

def cummax(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200
"""
Expand Down Expand Up @@ -1831,14 +1830,16 @@ def kurt(
if numeric_only is None or numeric_only
else self
)
compiler = data._query_compiler.kurt(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,

return self._reduce_dimension(
data._query_compiler.kurt(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)
)
return self._reduce_dimension(compiler.transpose() if axis == 0 else compiler)

kurtosis = kurt

Expand Down Expand Up @@ -1936,14 +1937,15 @@ def max(
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
axis = self._get_axis_number(axis)
data = self._validate_dtypes_min_max(axis, numeric_only)
compiler = data._query_compiler.max(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
return data._reduce_dimension(
data._query_compiler.max(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)
)
return data._reduce_dimension(compiler.transpose() if axis == 0 else compiler)

def _stat_operation(
self,
Expand Down Expand Up @@ -2009,14 +2011,14 @@ def _stat_operation(
numeric_only=numeric_only,
**kwargs,
)
return self._reduce_dimension(result_qc.transpose() if axis == 0 else result_qc)
return self._reduce_dimension(result_qc)

def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200
"""
Return the memory usage of the `BasePandasDataset`.
"""
return self._reduce_dimension(
self._query_compiler.memory_usage(index=index, deep=deep).transpose()
self._query_compiler.memory_usage(index=index, deep=deep)
)

def min(
Expand All @@ -2033,14 +2035,15 @@ def min(
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
axis = self._get_axis_number(axis)
data = self._validate_dtypes_min_max(axis, numeric_only)
compiler = data._query_compiler.min(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
return data._reduce_dimension(
data._query_compiler.min(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
**kwargs,
)
)
return data._reduce_dimension(compiler.transpose() if axis == 0 else compiler)

def mod(
self, other, axis="columns", level=None, fill_value=None
Expand All @@ -2057,11 +2060,10 @@ def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D2
Get the mode(s) of each element along the selected axis.
"""
axis = self._get_axis_number(axis)
compiler = self._query_compiler.mode(
axis=axis, numeric_only=numeric_only, dropna=dropna
)
return self.__constructor__(
query_compiler=compiler.transpose() if axis == 0 else compiler
query_compiler=self._query_compiler.mode(
axis=axis, numeric_only=numeric_only, dropna=dropna
)
)

def mul(
Expand Down Expand Up @@ -2095,9 +2097,8 @@ def nunique(self, axis=0, dropna=True): # noqa: PR01, RT01, D200
Return number of unique elements in the `BasePandasDataset`.
"""
axis = self._get_axis_number(axis)
compiler = self._query_compiler.nunique(axis=axis, dropna=dropna)
return self._reduce_dimension(
compiler.transpose() if self._get_axis_number(axis) == 0 else compiler
self._query_compiler.nunique(axis=axis, dropna=dropna)
)

def pct_change(
Expand Down
27 changes: 13 additions & 14 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2055,17 +2055,19 @@ def sum(
return self.groupby(level=level, axis=axis, sort=False).sum(
numeric_only=numeric_only, min_count=min_count
)
sum_result = (
data._query_compiler.sum_min_count(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
min_count=min_count,
**kwargs,
if min_count > 1:
return data._reduce_dimension(
data._query_compiler.sum_min_count(
axis=axis,
skipna=skipna,
level=level,
numeric_only=numeric_only,
min_count=min_count,
**kwargs,
)
)
if min_count > 1
else data._query_compiler.sum(
return data._reduce_dimension(
data._query_compiler.sum(
axis=axis,
skipna=skipna,
level=level,
Expand All @@ -2074,9 +2076,6 @@ def sum(
**kwargs,
)
)
if axis == 0:
sum_result = sum_result.transpose()
return data._reduce_dimension(sum_result)

def to_feather(self, path, **kwargs): # pragma: no cover # noqa: PR01, RT01, D200
"""
Expand Down Expand Up @@ -3014,7 +3013,7 @@ def _set_axis_name(self, name, axis=0, inplace=False):
if not is_list_like(name):
name = [name]
renamed = renamed.reset_index()
mapper = {n1:n2 for n1, n2 in zip(renamed.columns, list(name))}
mapper = {n1: n2 for n1, n2 in zip(renamed.columns, list(name))}
renamed = renamed.rename(columns=mapper).set_index(list(name))
else:
raise NotImplementedError("'axis=1' is not supported yet")
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def __init__(
)
)
)._query_compiler
self._query_compiler = query_compiler
self._query_compiler = query_compiler.columnarize()
if name is not None:
self.name = name

Expand Down

0 comments on commit 941a49b

Please sign in to comment.