Skip to content

Commit

Permalink
fix(lcbench-tabular): Correctly preprocess data
Browse files Browse the repository at this point in the history
  • Loading branch information
eddiebergman committed Oct 31, 2023
1 parent b59cd95 commit 5784214
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 15 deletions.
42 changes: 29 additions & 13 deletions src/mfpbench/lcbench_tabular/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from mfpbench.tabular import TabularBenchmark


def _get_raw_lcbench_space(name: str, seed: int | None = None) -> ConfigurationSpace:
def _get_raw_lcbench_space(
name: str,
seed: int | None = None,
*,
with_constants: bool = False,
) -> ConfigurationSpace:
"""Get the raw configuration space for lcbench tabular.
!!! note
Expand All @@ -29,6 +34,7 @@ def _get_raw_lcbench_space(name: str, seed: int | None = None) -> ConfigurationS
Args:
name: The name for the space.
seed: The seed to use.
with_constants: Whether to include constants or not
Returns:
The configuration space.
Expand Down Expand Up @@ -86,17 +92,23 @@ def _get_raw_lcbench_space(name: str, seed: int | None = None) -> ConfigurationS
log=False,
default_value=0.2, # reasonable default
),
Constant("cosine_annealing_T_max", 50),
Constant("cosine_annealing_eta_min", 0.0),
Constant("normalization_strategy", "standardize"),
Constant("optimizer", "sgd"),
Constant("learning_rate_scheduler", "cosine_annealing"),
Constant("network", "shapedmlpnet"),
Constant("activation", "relu"),
Constant("mlp_shape", "funnel"),
Constant("imputation_strategy", "mean"),
],
)

if with_constants:
cs.add_hyperparameters(
[
Constant("cosine_annealing_T_max", 50),
Constant("cosine_annealing_eta_min", 0.0),
Constant("normalization_strategy", "standardize"),
Constant("optimizer", "sgd"),
Constant("learning_rate_scheduler", "cosine_annealing"),
Constant("network", "shapedmlpnet"),
Constant("activation", "relu"),
Constant("mlp_shape", "funnel"),
Constant("imputation_strategy", "mean"),
],
)
return cs


Expand Down Expand Up @@ -221,7 +233,7 @@ def __init__(
task_id: str,
datadir: str | Path | None = None,
*,
remove_constants: bool = False,
remove_constants: bool = True,
seed: int | None = None,
prior: str | Path | LCBenchTabularConfig | Mapping[str, Any] | None = None,
perturb_prior: float | None = None,
Expand Down Expand Up @@ -274,12 +286,16 @@ def __init__(
table = table.drop(index=drop_epoch, level="epoch")

benchmark_task_name = f"lcbench_tabular-{task_id}"
space = _get_raw_lcbench_space(name=f"lcbench_tabular-{task_id}", seed=seed)
space = _get_raw_lcbench_space(
name=f"lcbench_tabular-{task_id}",
seed=seed,
with_constants=not remove_constants,
)

super().__init__(
table=table, # type: ignore
name=benchmark_task_name,
config_name="id",
config_name="config_id",
fidelity_name=cls.fidelity_name,
result_keys=LCBenchTabularResult.names(),
config_keys=LCBenchTabularConfig.names(),
Expand Down
4 changes: 2 additions & 2 deletions src/mfpbench/setup_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _process(cls, path: Path) -> None:
)
# These are single valued but this will make them as a list into
# the dataframe
df = df.assign(**{"id": config_id, **config})
df = df.assign(**{"config_id": config_id, **config})

config_frames_for_dataset.append(df)

Expand All @@ -249,7 +249,7 @@ def _process(cls, path: Path) -> None:
df_for_dataset = (
pd.concat(config_frames_for_dataset, ignore_index=True)
.convert_dtypes()
.set_index(["id", "epoch"])
.set_index(["config_id", "epoch"])
.sort_index()
)
table_path = path / f"{dataset_name}.parquet"
Expand Down

0 comments on commit 5784214

Please sign in to comment.