You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Tried to grouping by a column of datetime64[ns] type using modin.pandas.Grouper, but got a KeyError instead (see screenshot). The equivalent code works just fine in vanilla pandas.
import modin.pandas as mpd
import pandas as pd
import ray
ray.init(num_cpus=3)
""" With modin --- this is where the KeyError comes from """
mdf = mpd.read_csv("data/metro-trips-2020-q1.csv", parse_dates=["start_time"])
mdf.groupby(mpd.Grouper(key="start_time", freq="W-MON"))["trip_id"].sum()
""" With pandas """
df = pd.read_csv("data/metro-trips-2020-q1.csv", parse_dates=["start_time"])
df.groupby(pd.Grouper(key="start_time", freq="W-MON"))["trip_id"].count()
Trackback
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-13-5655986b8ba0> in <module>
1 """ With modin """
2 mdf = mpd.read_csv("data/metro-trips-2020-q1.csv", parse_dates=["start_time"])
----> 3 mdf.groupby(mpd.Grouper(key="start_time", freq="W-MON"))["trip_id"].sum()
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/pandas/groupby.py in sum(self, **kwargs)
547
548 def sum(self, **kwargs):
--> 549 return self._wrap_aggregation(
550 type(self._query_compiler).groupby_sum,
551 lambda df, **kwargs: df.sum(**kwargs),
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/pandas/groupby.py in _wrap_aggregation(self, qc_method, default_func, drop, numeric_only, **kwargs)
889
890 result = type(self._df)(
--> 891 query_compiler=qc_method(
892 groupby_qc,
893 by=self._by,
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/data_management/functions/groupby_function.py in <lambda>(*args, **kwargs)
56 ), "Map and reduce functions must be either both dict or both callable."
57
---> 58 return lambda *args, **kwargs: cls.caller(
59 *args, map_func=map_func, reduce_func=reduce_func, **kwargs, **call_kwds
60 )
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/data_management/functions/groupby_function.py in caller(cls, query_compiler, by, axis, groupby_args, map_args, map_func, numeric_only, **kwargs)
159 else map_func
160 )
--> 161 return query_compiler.default_to_pandas(
162 lambda df: default_func(
163 df.groupby(by=by, axis=axis, **groupby_args), **map_args
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/backends/pandas/query_compiler.py in default_to_pandas(self, pandas_op, *args, **kwargs)
192 }
193
--> 194 result = pandas_op(self.to_pandas(), *args, **kwargs)
195 if isinstance(result, pandas.Series):
196 if result.name is None:
~/miniconda3/envs/DS/lib/python3.8/site-packages/modin/data_management/functions/groupby_function.py in <lambda>(df)
161 return query_compiler.default_to_pandas(
162 lambda df: default_func(
--> 163 df.groupby(by=by, axis=axis, **groupby_args), **map_args
164 )
165 )
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/frame.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)
6715 axis = self._get_axis_number(axis)
6716
-> 6717 return DataFrameGroupBy(
6718 obj=self,
6719 keys=by,
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)
558 from pandas.core.groupby.grouper import get_grouper
559
--> 560 grouper, exclusions, obj = get_grouper(
561 obj,
562 keys,
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in get_grouper(obj, key, axis, level, sort, observed, mutated, validate, dropna)
717 # a passed-in Grouper, directly convert
718 if isinstance(key, Grouper):
--> 719 binner, grouper, obj = key._get_grouper(obj, validate=False)
720 if key.key is None:
721 return grouper, set(), obj
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/resample.py in _get_grouper(self, obj, validate)
1482 def _get_grouper(self, obj, validate: bool = True):
1483 # create the resampler and return our binner
-> 1484 r = self._get_resampler(obj)
1485 r._set_binner()
1486 return r.binner, r.grouper, r.obj
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/resample.py in _get_resampler(self, obj, kind)
1464
1465 """
-> 1466 self._set_grouper(obj)
1467
1468 ax = self.ax
~/miniconda3/envs/DS/lib/python3.8/site-packages/pandas/core/groupby/grouper.py in _set_grouper(self, obj, sort)
363 else:
364 if key not in obj._info_axis:
--> 365 raise KeyError(f"The grouper name {key} is not found")
366 ax = Index(obj[key], name=key)
367
KeyError: 'The grouper name start_time is not found'
The text was updated successfully, but these errors were encountered:
System information
Describe the problem
Tried to grouping by a column of
datetime64[ns]
type usingmodin.pandas.Grouper
, but got a KeyError instead (see screenshot). The equivalent code works just fine in vanillapandas
.The data, a small CSV file, comes from here.
Source code / logs
Trackback
The text was updated successfully, but these errors were encountered: