Skip to content

Commit

Permalink
Merge pull request #188 from automl/feature/mo_hpi
Browse files Browse the repository at this point in the history
[WIP] Add LPI, fANOVA and ablation paths for two objectives
  • Loading branch information
sarah-segel authored Aug 30, 2024
2 parents e1f8f04 + 5923991 commit 75e5f26
Show file tree
Hide file tree
Showing 21 changed files with 1,325 additions and 61 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Version 1.3.2

## Features
- Allow Importance and ablation path analysis for multi-objective runs.

# Version 1.3.1

## Quality of Life
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# are usually completed in github actions.

SHELL := /bin/bash
VERSION := 1.3.1
VERSION := 1.3.2

NAME := DeepCAVE
PACKAGE_NAME := deepcave
Expand Down
2 changes: 1 addition & 1 deletion deepcave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"Source Code": "https://github.com/automl/deepcave",
}
copyright = f"Copyright {datetime.date.today().strftime('%Y')}, {author}"
version = "1.3.1"
version = "1.3.2"

_exec_file = sys.argv[0]
_exec_files = ["server.py", "worker.py", "sphinx-build"]
Expand Down
2 changes: 1 addition & 1 deletion deepcave/evaluators/ablation.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def _ablation(
max_hp_difference = -np.inf

for hp in hp_it:
if hp in hp in incumbent_config.keys() and hp in self.default_config.keys():
if hp in incumbent_config.keys() and hp in self.default_config.keys():
config_copy = copy.copy(self.default_config)
config_copy[hp] = incumbent_config[hp]

Expand Down
350 changes: 350 additions & 0 deletions deepcave/evaluators/mo_ablation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,350 @@
# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# noqa: D400
"""
# Ablation Paths
This module evaluates the ablation paths.
Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
Starting from a default configuration, the default configuration is iteratively changed to the
incumbent configuration by changing one hyperparameter at a time, choosing the
hyperparameter that leads to the largest improvement in the objective function at each step.
## Classes:
- Ablation: Provide an evaluator of the ablation paths.
"""

from typing import Any, List, Optional, Tuple, Union

import copy

import numpy as np
import pandas as pd

from deepcave.evaluators.ablation import Ablation
from deepcave.evaluators.epm.random_forest_surrogate import RandomForestSurrogate
from deepcave.runs import AbstractRun
from deepcave.runs.objective import Objective
from deepcave.utils.multi_objective_importance import get_weightings


class MOAblation(Ablation):
"""
Provide an evaluator of the ablation paths.
Override: Multi-Objective case
Properties
----------
run : AbstractRun
The run to analyze.
cs : ConfigurationSpace
The configuration space of the run.
hp_names : List[str]
A list of the hyperparameter names.
performances : Optional[Dict[Any, Any]]
A dictionary containing the performances for each HP.
improvements : Optional[Dict[Any, Any]]
A dictionary containing the improvements over the respective previous step for each HP.
objectives : Optional[Union[Objective, List[Objective]]]
The objective(s) of the run.
default_config : Configurations
The default configuration of this configuration space.
Gets changed step by step towards the incumbent configuration.
"""

def __init__(self, run: AbstractRun):
super().__init__(run)
self.models: List = []
self.df_importances = pd.DataFrame([])

def get_importances(self) -> str:
"""
Return the importance scores.
Returns
-------
Dict
Dictionary with Hyperparameter names and the corresponding importance scores and
variances.
Raises
------
RuntimeError
If the important scores are not calculated.
"""
if self.df_importances is None:
raise RuntimeError("Importance scores must be calculated first.")

return self.df_importances.to_json()

def predict(self, cfg: list[Any], weighting: np.ndarray) -> Tuple[float, float]:
"""
Predict the performance of the input configuration.
The model results are weighted by the input weightings and summed.
Parameters
----------
cfg : Dict
Configuration.
weighting : List[float]
Weightings.
Returns
-------
mean : float
The mean of the weighted sum of predictions.
var : float
The variance of the weighted sum of predictions.
"""
mean, var = 0, 0
for model, w in zip(self.models, weighting):
pred, var_ = model.predict(np.array([cfg]))
mean += w * pred[0]
var += w * var_[0]
return mean, var

def calculate(
self,
objectives: Optional[Union[Objective, List[Objective]]], # noqa
budget: Optional[Union[int, float]] = None, # noqa
n_trees: int = 50, # noqa
seed: int = 0, # noqa
) -> None:
"""
Calculate the MO ablation path performances and improvements.
Parameters
----------
objectives : Optional[Union[Objective, List[Objective]]]
The objective(s) to be considered.
budget : Optional[Union[int, float]]
The budget to be considered. If None, all budgets of the run are considered.
Default is None.
n_trees : int
The number of trees for the surrogate model.
Default is 50.
seed : int
The seed for the surrogate model.
Default is 0.
"""
assert isinstance(objectives, list)
for objective in objectives:
assert isinstance(objective, Objective)

df = self.run.get_encoded_data(objectives, budget, specific=True, include_config_ids=True)

# Obtain all configurations with theirs costs
df = df.dropna(subset=[obj.name for obj in objectives])
X = df[list(self.run.configspace.keys())].to_numpy()

# normalize objectives
objectives_normed = list()
for obj in objectives:
normed = obj.name + "_normed"
df[normed] = (df[obj.name] - df[obj.name].min()) / (
df[obj.name].max() - df[obj.name].min()
)

if obj.optimize == "upper":
df[normed] = 1 - df[normed]
objectives_normed.append(normed)

# train one model per objective
Y = df[normed].to_numpy()
model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
model._fit(X, Y)
self.models.append(model)

weightings = get_weightings(objectives_normed, df)

# calculate importance for each weighting generated from the pareto efficient points
for w in weightings:
df_res = self.calculate_ablation_path(df, objectives_normed, w, budget)
if df_res is None:
columns = ["hp_name", "importance", "variance", "new_performance", "weight"]
self.df_importances = pd.DataFrame(
0, index=np.arange(len(self.hp_names) + 1), columns=columns
)
self.df_importances["hp_name"] = ["Default"] + self.hp_names
return
df_res["weight"] = w[0]
self.df_importances = pd.concat([self.df_importances, df_res])
self.df_importances = self.df_importances.reset_index(drop=True)

def calculate_ablation_path(
self,
df: pd.DataFrame,
objectives_normed: List[str],
weighting: np.ndarray,
budget: Optional[Union[int, float]],
) -> pd.DataFrame:
"""
Calculate the ablation path performances.
Parameters
----------
df : pd.DataFrame
Dataframe with encoded data.
objectives_normed : List[str]
The normed objective names to be considered.
weighting : np.ndarray
The weighting of the objective values.
budget : Optional[Union[int, float]]
The budget to be considered. If None, all budgets of the run are considered.
Default is None.
Returns
-------
df : pd.DataFrame
Dataframe with results of the ablation calculation.
"""
# Get the incumbent configuration
incumbent_cfg_id = np.argmin(
sum(df[obj] * w for obj, w in zip(objectives_normed, weighting))
)
incumbent_config = self.run.get_config(df.iloc[incumbent_cfg_id]["config_id"])

# Get the default configuration
self.default_config = self.cs.get_default_configuration()
default_encode = self.run.encode_config(self.default_config, specific=True)

# Obtain the predicted cost of the default and incumbent configuration
def_cost, def_std = self.predict(default_encode, weighting)
inc_cost, _ = self.predict(
self.run.encode_config(incumbent_config, specific=True), weighting
)

if inc_cost > def_cost:
self.logger.warning(
"The predicted incumbent objective is worse than the predicted default "
f"objective for budget: {budget}. Aborting ablation path calculation."
)
return None
else:
# Copy the hps names as to not remove objects from the original list
hp_it = self.hp_names.copy()
df_abl = pd.DataFrame([])
df_abl = pd.concat(
[
df_abl,
pd.DataFrame(
{
"hp_name": "Default",
"importance": 0,
"variance": def_std,
"new_performance": def_cost,
},
index=[0],
),
]
)

for i in range(len(hp_it)):
# Get the results of the current ablation iteration
continue_ablation, max_hp, max_hp_cost, max_hp_std = self.ablation(
budget, incumbent_config, def_cost, hp_it, weighting
)

if not continue_ablation:
break

diff = def_cost - max_hp_cost
def_cost = max_hp_cost

df_abl = pd.concat(
[
df_abl,
pd.DataFrame(
{
"hp_name": max_hp,
"importance": diff,
"variance": max_hp_std,
"new_performance": max_hp_cost,
},
index=[i + 1],
),
]
)

# Remove the current best hp for keeping the order right
hp_it.remove(max_hp)
return df_abl.reset_index(drop=True)

def ablation(
self,
budget: Optional[Union[int, float]],
incumbent_config: Any,
def_cost: Any,
hp_it: List[str],
weighting: np.ndarray[Any, Any],
) -> Tuple[Any, Any, Any, Any]:
"""
Calculate the ablation importance for each hyperparameter.
Parameters
----------
budget: Optional[Union[int, float]]
The budget of the run.
incumbent_config: Any
The incumbent configuration.
def_cost: Any
The default cost.
hp_it: List[str]
A list of the HPs that still have to be looked at.
weighting : np.ndarray[Any, Any]
The weighting of the objective values.
Returns
-------
Tuple[Any, Any, Any, Any]
continue_ablation, max_hp, max_hp_performance, max_hp_std
"""
max_hp = ""
max_hp_difference = -np.inf

for hp in hp_it:
if hp in incumbent_config.keys() and hp in self.default_config.keys():
config_copy = copy.copy(self.default_config)
config_copy[hp] = incumbent_config[hp]

new_cost, _ = self.predict(
self.run.encode_config(config_copy, specific=True), weighting
)
difference = def_cost - new_cost

# Check for the maximum difference hyperparameter in this round
if difference > max_hp_difference:
max_hp = hp
max_hp_difference = difference
else:
continue
hp_count = len(list(self.cs.keys()))
if max_hp != "":
# For the maximum impact hyperparameter, switch the default with the incumbent value
self.default_config[max_hp] = incumbent_config[max_hp]
max_hp_cost, max_hp_std = self.predict(
self.run.encode_config(self.default_config, specific=True), weighting
)
return True, max_hp, max_hp_cost, max_hp_std
else:
self.logger.info(
f"End ablation at step {hp_count - len(hp_it) + 1}/{hp_count} "
f"for budget {budget} (remaining hyperparameters not activate in incumbent or "
"default configuration)."
)
return False, None, None, None
Loading

0 comments on commit 75e5f26

Please sign in to comment.