Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Batch sampling improvement #1154

Open
wants to merge 17 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# 2.3.0

## Features
- Improved batch sampling: Fantasize points in batch/parallel mode (#1154).

## Documentation
- Update windows install guide (#952)
- Correct intensifier for Algorithm Configuration Facade (#1162, #1165)
Expand Down
112 changes: 112 additions & 0 deletions examples/1_basics/7_0_parallelization_fantasize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Example of using SMAC with parallelization and fantasization vs. no estimation for pending evaluations.

This example will take some time because the target function is artificially slowed down to demonstrate the effect of
fantasization. The example will plot the incumbent found by SMAC with and without fantasization.
"""
from __future__ import annotations

import numpy as np
from ConfigSpace import Configuration, ConfigurationSpace, Float

from matplotlib import pyplot as plt

from smac import BlackBoxFacade, Scenario
from smac.facade import AbstractFacade

from rich import inspect
import time

def plot_trajectory(facades: list[AbstractFacade], names: list[str]) -> None:
# Plot incumbent
cmap = plt.get_cmap("tab10")

fig = plt.figure()
axes = fig.subplots(1, 2)

for ax_i, x_axis in zip(axes, ["walltime", "trial"]):
for i, facade in enumerate(facades):
X, Y = [], []
inspect(facade.intensifier.trajectory)
for item in facade.intensifier.trajectory:
# Single-objective optimization
assert len(item.config_ids) == 1
assert len(item.costs) == 1

y = item.costs[0]
x = getattr(item, x_axis)

X.append(x)
Y.append(y)

ax_i.plot(X, Y, label=names[i], color=cmap(i))
ax_i.scatter(X, Y, marker="x", color=cmap(i))
ax_i.set_xlabel(x_axis)
ax_i.set_ylabel(facades[0].scenario.objectives)
ax_i.set_yscale("log")
ax_i.legend()

plt.show()

class Branin():
@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges
cs = ConfigurationSpace(seed=0)

# First we create our hyperparameters
x1 = Float("x1", (-5, 10), default=0)
x2 = Float("x2", (0, 15), default=0)

# Add hyperparameters and conditions to our configspace
cs.add([x1, x2])

time.sleep(10)

return cs

def train(self, config: Configuration, seed: int) -> float:
x1 = config["x1"]
x2 = config["x2"]
a = 1.0
b = 5.1 / (4.0 * np.pi**2)
c = 5.0 / np.pi
r = 6.0
s = 10.0
t = 1.0 / (8.0 * np.pi)

cost = a * (x2 - b * x1**2 + c * x1 - r) ** 2 + s * (1 - t) * np.cos(x1) + s
regret = cost - 0.397887

return regret

if __name__ == "__main__":
seed = 345455
scenario = Scenario(n_trials=100, configspace=Branin().configspace, n_workers=4, seed=seed)
facade = BlackBoxFacade

smac_noestimation = facade(
scenario=scenario,
target_function=Branin().train,
overwrite=True,
)
smac_fantasize = facade(
scenario=scenario,
target_function=Branin().train,
config_selector=facade.get_config_selector(
scenario=scenario,
batch_sampling_estimation_strategy="kriging_believer"
),
overwrite=True,
logging_level=0
)

incumbent_noestimation = smac_noestimation.optimize()
incumbent_fantasize = smac_fantasize.optimize()

plot_trajectory(facades=[
smac_noestimation,
smac_fantasize,
], names=["No Estimation", "Fantasize"])

del smac_noestimation
del smac_fantasize
42 changes: 40 additions & 2 deletions smac/facade/abstract_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,47 @@ def get_config_selector(
*,
retrain_after: int = 8,
retries: int = 16,
min_trials: int = 1,
batch_sampling_estimation_strategy: str = "no_estimate",
) -> ConfigSelector:
"""Returns the default configuration selector."""
return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)
"""Returns the default configuration selector.

Parameters
----------
retrain_after : int, defaults to 8
How many configurations should be returned before the surrogate model is retrained.
retries : int, defaults to 16
How often to retry receiving a new configuration before giving up.
min_trials: int, defaults to 1
How many samples are required to train the surrogate model. If budgets are involved,
the highest budgets are checked first. For example, if min_trials is three, but we find only
two trials in the runhistory for the highest budget, we will use trials of a lower budget
instead.
batch_sampling_estimation_strategy: str, defaults to no_estimate

Warning: This is intended to work in the black box optimization setting with a Gaussian Process and
only works sensibly for non-multifidelity.

Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might
need to suggest new samples while some configurations are still running. This argument determines if we want
to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use
the information from the running configurations. If the strategy is kriging_believer, we use the predicted
mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we
use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy
is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations.

Returns
-------
ConfigSelector
The instantiated configuration selector proposing new configurations (optimize acquisition function).
"""
return ConfigSelector(
scenario,
retrain_after=retrain_after,
retries=retries,
min_trials=min_trials,
batch_sampling_estimation_strategy=batch_sampling_estimation_strategy,
)

def _get_optimizer(self) -> SMBO:
"""Fills the SMBO with all the pre-initialized components."""
Expand Down
36 changes: 34 additions & 2 deletions smac/facade/blackbox_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,9 +318,41 @@ def get_config_selector(
scenario: Scenario,
*,
retrain_after: int = 1,
min_trials: int = 1,
retries: int = 16,
batch_sampling_estimation_strategy: str = "no_estimate",
) -> ConfigSelector:
"""Returns the default configuration selector."""
"""Returns the default configuration selector.

Parameters
----------
retrain_after : int, defaults to 1
How many configurations should be returned before the surrogate model is retrained.
retries : int, defaults to 16
How often to retry receiving a new configuration before giving up.
min_trials: int, defaults to 1
How many samples are required to train the surrogate model. If budgets are involved,
the highest budgets are checked first. For example, if min_trials is three, but we find only
two trials in the runhistory for the highest budget, we will use trials of a lower budget
instead.
batch_sampling_estimation_strategy: str, defaults to no_estimation
Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might
need to suggest new samples while some configurations are still running. This argument determines if we want
to make use of this information and fantasize the new estimations. If no_estimate is applied, we do not use
the information from the running configurations. If the strategy is kriging_believer, we use the predicted
mean from our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we
use the min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy
is sample, we use our surrogate model (in this case, only GP is allowed) to sample new configurations.

Returns
-------
ConfigSelector
The instantiated configuration selector proposing new configurations (optimize acquisition function).
"""
return super(BlackBoxFacade, BlackBoxFacade).get_config_selector(
scenario, retrain_after=retrain_after, retries=retries
scenario,
retrain_after=retrain_after,
min_trials=min_trials,
retries=retries,
batch_sampling_estimation_strategy=batch_sampling_estimation_strategy,
)
102 changes: 102 additions & 0 deletions smac/main/config_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from smac.callback.callback import Callback
from smac.initial_design import AbstractInitialDesign
from smac.model.abstract_model import AbstractModel
from smac.model.gaussian_process import GaussianProcess
from smac.model.random_forest import RandomForest
from smac.random_design.abstract_random_design import AbstractRandomDesign
from smac.runhistory.encoder.abstract_encoder import AbstractRunHistoryEncoder
from smac.runhistory.runhistory import RunHistory
Expand Down Expand Up @@ -44,6 +46,14 @@ class ConfigSelector:
the highest budgets are checked first. For example, if min_trials is three, but we find only
two trials in the runhistory for the highest budget, we will use trials of a lower budget
instead.
batch_sampling_estimation_strategy: str, defaults to no_estimation
Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might need
to suggest new samples while some configurations are still running. This argument determines if we want to make
use of this information and fantasize the new estimations. If no_estimate is applied, we do not use the
information from the running configurations. If the strategy is kriging_believer, we use the predicted mean from
our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we use the
min/mean/max from the existing evaluations as the estimations for the new samples. If the strategy is sample,
we use our surrogate model (in this case, only GP is allowed) to sample new configurations.
"""

def __init__(
Expand All @@ -53,6 +63,7 @@ def __init__(
retrain_after: int = 8,
retries: int = 16,
min_trials: int = 1,
batch_sampling_estimation_strategy: str = "no_estimate",
) -> None:
# Those are the configs sampled from the passed initial design
# Selecting configurations from initial design
Expand Down Expand Up @@ -82,6 +93,9 @@ def __init__(
# Processed configurations should be stored here; this is important to not return the same configuration twice
self._processed_configs: list[Configuration] = []

# for batch sampling setting
self._batch_sampling_estimation_strategy = batch_sampling_estimation_strategy

def _set_components(
self,
initial_design: AbstractInitialDesign,
Expand Down Expand Up @@ -284,6 +298,24 @@ def _collect_data(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
# Possible add running configs?
configs_array = self._runhistory_encoder.get_configurations(budget_subset=self._considered_budgets)

# add running configurations
# If our batch size is 1, then no running configuration should exist, we could then skip this part.
# Therefore, there is no need to check the number of workers in this case

X_running = self._runhistory_encoder.transform_running_configs(budget_subset=[b])
if self._batch_sampling_estimation_strategy != "no_estimate":
Y_estimated = self.estimate_running_config_costs(
X_running, Y, self._batch_sampling_estimation_strategy
)
# if there is no running configurations, we directly return X, Y and configs_array
if Y_estimated is not None:
benjamc marked this conversation as resolved.
Show resolved Hide resolved
configs_array_running = self._runhistory_encoder.get_running_configurations(
budget_subset=self._considered_budgets
)
X = np.concatenate([X, X_running], axis=0)
Y = np.concatenate([Y, Y_estimated], axis=0)
configs_array = np.concatenate([configs_array, configs_array_running], axis=0)

return X, Y, configs_array

return (
Expand All @@ -300,6 +332,76 @@ def _get_evaluated_configs(self) -> list[Configuration]:
assert self._runhistory is not None
return self._runhistory.get_configs_per_budget(budget_subset=self._considered_budgets)

def estimate_running_config_costs(
self, X_running: np.ndarray, Y_evaluated: np.ndarray, estimation_strategy: str = "CL_max"
) -> np.ndarray:
"""This function is implemented to estimate the still pending/ running configurations

Parameters
----------
X_running : np.ndarray
a np array with size (n_running_configs, D) that represents the array values of the running configurations
Y_evaluated : np.ndarray
a np array with size (n_evaluated_configs, n_obj) that records the costs of all the previous evaluated
configurations

estimation_strategy: str
how do we estimate the target y_running values, we have the following strategy:
CL_max: constant liar max, we take the maximal of all the evaluated Y and apply them to the running X
CL_min: constant liar min, we take the minimal of all the evaluated Y and apply them to the running X
CL_mean: constant liar mean, we take the mean of all the evaluated Y and apply them to the running X
kriging_believer: kriging believer, we apply the predicted means from the surrogate model to running X
values
sample: estimations for X are sampled from the surrogate models. Since the samples need to be sampled from a
joint distribution for all X, we only allow sample strategy with GP as surrogate models.

Returns
-------
Y_running_estimated : np.ndarray
the estimated running y values
"""
n_running_points = len(X_running)
if n_running_points == 0:
return None
if estimation_strategy == "CL_max":
# constant liar max, we take the maximal values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmax(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimation_strategy == "CL_min":
# constant liar min, we take the minimal values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmin(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimation_strategy == "CL_mean":
# constant liar mean, we take the mean values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimation_strategy == "kriging_believer":
# kriging believer, we apply the predicted means of the surrogate model to estimate the running X
# Check whether model has been trained already
if (
isinstance(self._model, GaussianProcess)
and not self._model._is_trained
or isinstance(self._model, RandomForest)
and self._model._rf is None
):
logger.debug(
"Model has not been trained yet. Skip estimation and use constant liar mean "
"(mean of all samples)."
)
Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
return self._model.predict_marginalized(X_running)[0] # type: ignore[union-attr]
elif estimation_strategy == "sample":
# https://papers.nips.cc/paper_files/paper/2012/file/05311655a15b75fab86956663e1819cd-Paper.pdf
# since this requires a multi-variant gaussian distribution for the candidates, we need to restrict the
# model to be a gaussian process
assert isinstance(
self._model, GaussianProcess
), "Sample based estimate strategy only allows GP as surrogate model!"
return self._model.sample_functions(X_test=X_running, n_funcs=1)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cannot we sample from the random forest?

else:
raise ValueError(f"Unknown estimating strategy: {estimation_strategy}")

def _get_x_best(self, X: np.ndarray) -> tuple[np.ndarray, float]:
"""Get value, configuration, and array representation of the *best* configuration.

Expand Down
Loading