From 83a9bbe57e64968ee0e79d5bbedf21a1f835719e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Sass?= <mail@renesass.de>
Date: Thu, 14 Jul 2022 10:00:56 +0200
Subject: [PATCH] Version 1.4.0 (#730, #855, #869, #872)

## Features
* [BOinG](https://arxiv.org/abs/2111.05834): A two-stage Bayesian optimization approach to allow the
optimizer to focus on the most promising regions.
* [TurBO](https://arxiv.org/abs/1910.01739): Reimplementaion of TurBO-1 algorithm.
* Updated pSMAC: Can pass arbitrary SMAC facades now. Added example and fixed tests.

## Improvements
* Enabled caching for multi-objectives (#872). Costs are now normalized in `get_cost`
or optionally in `average_cost`/`sum_cost`/`min_cost` to receive a single float value. Therefore,
the cached cost values do not need to be updated everytime a new entry to the runhistory was added.

## Interface changes
* We changed the location of Gaussian processes and random forests. They are in the folders
`epm/gaussian_process` and `epm/random_forest` now.
* Also, we restructured the optimizer folder and therefore the location of the acquisition functions
and configuration chooser.
* Multi-objective functions are located in the folder `multi_objective`.
* pSMAC facade was moved to the facade directory.

Co-authored-by: Difan Deng <deng@dengster.tnt.uni-hannover.de>
Co-authored-by: Eddie Bergman <eddiebergmanhs@gmail.com>
Co-authored-by: Carolin Benjamins <benjamins@tnt.uni-hannover.de>
Co-authored-by: timruhkopf <timruhkopf@gmail.com>
---
 .github/workflows/docs.yml                    |   2 +-
 .github/workflows/pytest.yml                  |   2 +-
 .gitignore                                    |   3 +
 README.md                                     |   4 +-
 changelog.md                                  |  22 +
 docs/details/parallelism.rst                  |   8 +-
 .../python/plot_scalarized_multi_objective.py |   2 +-
 .../python/plot_simple_multi_objective.py     |   5 +-
 examples/python/plot_svm_eips.py              |   6 +-
 .../python/plot_synthetic_function_boing.py   |  74 ++
 .../plot_synthetic_function_parallel.py       | 109 +++
 .../python/plot_synthetic_function_turbo.py   |  80 ++
 examples/python/plot_user_prior_mlp.py        |   2 -
 setup.py                                      |   9 +-
 smac/__init__.py                              |   2 +-
 smac/epm/base_epm.py                          |   8 +-
 .../__init__.py}                              |  18 +-
 smac/epm/gaussian_process/augmented.py        | 496 +++++++++++
 .../gp.py}                                    |   4 +-
 smac/epm/gaussian_process/gpytorch.py         | 365 ++++++++
 .../kernels/__init__.py}                      |   2 +-
 smac/epm/gaussian_process/kernels/boing.py    | 560 ++++++++++++
 .../mcmc.py}                                  |  13 +-
 .../gaussian_process/utils}/__init__.py       |   0
 .../utils/prior.py}                           |   0
 ...ted_mo_model.py => multi_objective_epm.py} |  14 +-
 smac/epm/random_epm.py                        |   4 +-
 .../{base_rf.py => random_forest/__init__.py} |  12 +-
 .../rf_mo.py}                                 |  12 +-
 .../{ => random_forest}/rf_with_instances.py  |   2 +-
 smac/epm/{ => random_forest}/rfr_imputator.py |   8 +-
 smac/epm/{util_funcs.py => utils.py}          |  73 ++
 smac/facade/experimental/hydra_facade.py      |   4 +-
 .../facade/{experimental => }/psmac_facade.py | 210 +++--
 smac/facade/roar_facade.py                    |   7 +-
 smac/facade/smac_ac_facade.py                 |  51 +-
 smac/facade/smac_bb_facade.py                 |  17 +-
 smac/facade/smac_boing_facade.py              | 111 +++
 smac/facade/smac_hpo_facade.py                |   2 +-
 smac/initial_design/initial_design.py         |   2 +
 smac/intensification/abstract_racer.py        |  21 +-
 smac/intensification/hyperband.py             |   4 +-
 smac/intensification/intensification.py       |  12 +-
 smac/intensification/parallel_scheduling.py   |   4 +-
 smac/intensification/simple_intensifier.py    |   6 +-
 smac/intensification/successive_halving.py    |  10 +-
 smac/multi_objective/__init__.py              |   0
 .../abstract_multi_objective_algorithm.py     |   6 +-
 .../multi_objective/aggregation_strategy.py   |  26 +-
 .../{optimizer => }/multi_objective/parego.py |  20 +-
 smac/multi_objective/utils.py                 |  40 +
 .../__init__.py}                              |  40 +-
 .../maximizer.py}                             | 125 ++-
 .../configuration_chooser/__init__.py         |   0
 .../configuration_chooser/boing_chooser.py    | 658 ++++++++++++++
 .../epm_chooser.py}                           |  22 +-
 .../random_chooser.py}                        |  12 +-
 .../configuration_chooser/turbo_chooser.py    | 131 +++
 smac/optimizer/multi_objective/README.MD      |   1 -
 smac/optimizer/smbo.py                        |  26 +-
 smac/optimizer/subspaces/__init__.py          | 656 ++++++++++++++
 smac/optimizer/subspaces/boing_subspace.py    | 175 ++++
 smac/optimizer/subspaces/turbo_subspace.py    | 319 +++++++
 smac/runhistory/runhistory.py                 | 842 ++++++++++--------
 smac/runhistory/runhistory2epm.py             |   9 +-
 smac/runhistory/runhistory2epm_boing.py       |  92 ++
 smac/smac_cli.py                              |   4 +-
 smac/utils/io/result_merging.py               | 207 +++++
 smac/utils/multi_objective.py                 |  66 --
 smac/utils/validate.py                        |   6 +-
 .../random_configuration_chooser_impl.py      |   2 +-
 tests/test_cli/test_deterministic_smac.py     |   2 +-
 tests/test_epm/test_base_epm.py               |  10 +-
 tests/test_epm/test_boing_kernel.py           | 217 +++++
 tests/test_epm/test_gp.py                     |  11 +-
 tests/test_epm/test_gp_gpytorch.py            | 341 +++++++
 tests/test_epm/test_gp_mcmc.py                |  10 +-
 tests/test_epm/test_gp_priors.py              |   2 +-
 tests/test_epm/test_lgpga.py                  | 240 +++++
 tests/test_epm/test_rf_with_instances.py      |   4 +-
 .../test_uncorrelated_mo_rf_with_instances.py |  10 +-
 tests/test_epm/test_util_funcs.py             |  54 +-
 tests/test_facade/test_boing_facade.py        |  61 ++
 tests/test_facade/test_hydra_facade.py        |   2 +-
 tests/test_facade/test_psmac_facade.py        |  39 +-
 tests/test_facade/test_smac_facade.py         |  21 +-
 tests/test_files/example_run/configspace.json |  24 +
 tests/test_files/example_run/configspace.pcs  |   2 +
 tests/test_files/example_run/runhistory.json  | 467 ++++++++++
 tests/test_files/example_run/scenario.txt     |  14 +
 tests/test_files/example_run/stats.json       |   1 +
 tests/test_files/example_run/traj.json        |   8 +
 tests/test_files/example_run/traj_aclib2.json |   8 +
 tests/test_files/example_run/traj_old.csv     |   9 +
 tests/test_local_bo/__init__.py               |   0
 tests/test_local_bo/test_abstract_subspace.py | 470 ++++++++++
 tests/test_local_bo/test_epm_chooser_boing.py | 283 ++++++
 tests/test_local_bo/test_epm_chooser_turbo.py |  41 +
 tests/test_local_bo/test_rh2epm_boing.py      |  62 ++
 tests/test_local_bo/test_subspace_boing.py    |  68 ++
 tests/test_local_bo/test_turbo_subspace.py    | 185 ++++
 tests/test_multi_objective/test_schaffer.py   |  13 +-
 tests/test_runhistory/test_rfr_imputor.py     |   6 +-
 tests/test_runhistory/test_runhistory2epm.py  |   6 +-
 .../test_runhistory_multi_objective.py        |  18 +-
 tests/test_smbo/test_ei_optimization.py       |   2 +-
 .../test_epm_configuration_chooser.py         |   2 +-
 .../test_random_configuration_chooser.py      |   2 +-
 tests/test_smbo/test_smbo.py                  |   2 +-
 tests/test_utils/io/test_result_merging.py    |  48 +
 tests/test_utils/test_multi_objective.py      |  77 +-
 111 files changed, 7815 insertions(+), 894 deletions(-)
 create mode 100644 examples/python/plot_synthetic_function_boing.py
 create mode 100644 examples/python/plot_synthetic_function_parallel.py
 create mode 100644 examples/python/plot_synthetic_function_turbo.py
 rename smac/epm/{base_gp.py => gaussian_process/__init__.py} (91%)
 create mode 100644 smac/epm/gaussian_process/augmented.py
 rename smac/epm/{gaussian_process.py => gaussian_process/gp.py} (99%)
 create mode 100644 smac/epm/gaussian_process/gpytorch.py
 rename smac/epm/{gp_kernels.py => gaussian_process/kernels/__init__.py} (99%)
 create mode 100644 smac/epm/gaussian_process/kernels/boing.py
 rename smac/epm/{gaussian_process_mcmc.py => gaussian_process/mcmc.py} (97%)
 rename smac/{optimizer/multi_objective => epm/gaussian_process/utils}/__init__.py (100%)
 rename smac/epm/{gp_base_prior.py => gaussian_process/utils/prior.py} (100%)
 rename smac/epm/{base_uncorrelated_mo_model.py => multi_objective_epm.py} (94%)
 rename smac/epm/{base_rf.py => random_forest/__init__.py} (90%)
 rename smac/epm/{uncorrelated_mo_rf_with_instances.py => random_forest/rf_mo.py} (84%)
 rename smac/epm/{ => random_forest}/rf_with_instances.py (99%)
 rename smac/epm/{ => random_forest}/rfr_imputator.py (98%)
 rename smac/epm/{util_funcs.py => utils.py} (69%)
 rename smac/facade/{experimental => }/psmac_facade.py (50%)
 create mode 100644 smac/facade/smac_boing_facade.py
 create mode 100644 smac/multi_objective/__init__.py
 rename smac/{optimizer => }/multi_objective/abstract_multi_objective_algorithm.py (75%)
 rename smac/{optimizer => }/multi_objective/aggregation_strategy.py (59%)
 rename smac/{optimizer => }/multi_objective/parego.py (57%)
 create mode 100644 smac/multi_objective/utils.py
 rename smac/optimizer/{acquisition.py => acquisition/__init__.py} (96%)
 rename smac/optimizer/{ei_optimization.py => acquisition/maximizer.py} (89%)
 create mode 100644 smac/optimizer/configuration_chooser/__init__.py
 create mode 100644 smac/optimizer/configuration_chooser/boing_chooser.py
 rename smac/optimizer/{epm_configuration_chooser.py => configuration_chooser/epm_chooser.py} (92%)
 rename smac/optimizer/{random_configuration_chooser.py => configuration_chooser/random_chooser.py} (95%)
 create mode 100644 smac/optimizer/configuration_chooser/turbo_chooser.py
 delete mode 100644 smac/optimizer/multi_objective/README.MD
 create mode 100644 smac/optimizer/subspaces/__init__.py
 create mode 100644 smac/optimizer/subspaces/boing_subspace.py
 create mode 100644 smac/optimizer/subspaces/turbo_subspace.py
 create mode 100644 smac/runhistory/runhistory2epm_boing.py
 create mode 100644 smac/utils/io/result_merging.py
 delete mode 100644 smac/utils/multi_objective.py
 create mode 100644 tests/test_epm/test_boing_kernel.py
 create mode 100644 tests/test_epm/test_gp_gpytorch.py
 create mode 100644 tests/test_epm/test_lgpga.py
 create mode 100644 tests/test_facade/test_boing_facade.py
 create mode 100644 tests/test_files/example_run/configspace.json
 create mode 100644 tests/test_files/example_run/configspace.pcs
 create mode 100644 tests/test_files/example_run/runhistory.json
 create mode 100644 tests/test_files/example_run/scenario.txt
 create mode 100644 tests/test_files/example_run/stats.json
 create mode 100644 tests/test_files/example_run/traj.json
 create mode 100644 tests/test_files/example_run/traj_aclib2.json
 create mode 100644 tests/test_files/example_run/traj_old.csv
 create mode 100644 tests/test_local_bo/__init__.py
 create mode 100644 tests/test_local_bo/test_abstract_subspace.py
 create mode 100644 tests/test_local_bo/test_epm_chooser_boing.py
 create mode 100644 tests/test_local_bo/test_epm_chooser_turbo.py
 create mode 100644 tests/test_local_bo/test_rh2epm_boing.py
 create mode 100644 tests/test_local_bo/test_subspace_boing.py
 create mode 100644 tests/test_local_bo/test_turbo_subspace.py
 create mode 100644 tests/test_utils/io/test_result_merging.py

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index bbe410603..4f8f0f174 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -35,7 +35,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        pip install ".[dev]"
+        pip install ".[gpytorch,dev]"
 
     - name: Make docs
       run: |
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f40927270..620da251b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -23,7 +23,7 @@ on:
 env:
   package-name: smac
   test-dir: tests
-  extra-requires: "[dev]"
+  extra-requires: "[gpytorch,dev]"
 
   # Arguments used for pytest
   pytest-args: >-
diff --git a/.gitignore b/.gitignore
index 10f0fcdd6..8b391fae8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -137,3 +137,6 @@ dmypy.json
 
 # macOS files
 .DS_Store
+
+# Remove docker files
+docker
\ No newline at end of file
diff --git a/README.md b/README.md
index af1a5644b..9db198007 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,9 @@ efficiently decide which of two configurations performs better.
 SMAC3 is written in Python3 and continuously tested with Python 3.7, 3.8, 3.9, and 3.10. Its Random
 Forest is written in C++. In further texts, SMAC is representatively mentioned for SMAC3.
 
-[Documention](https://automl.github.io/SMAC3)
+> [Documention](https://automl.github.io/SMAC3)
+
+> [Roadmap](https://github.com/orgs/automl/projects/5/views/2)
 
 
 ## Installation
diff --git a/changelog.md b/changelog.md
index 07965ca22..109d06023 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,25 @@
+# 1.4.0
+
+## Features
+* [BOinG](https://arxiv.org/abs/2111.05834): A two-stage Bayesian optimization approach to allow the 
+optimizer to focus on the most promising regions.
+* [TurBO](https://arxiv.org/abs/1910.01739): Reimplementaion of TurBO-1 algorithm.
+* Updated pSMAC: Can pass arbitrary SMAC facades now. Added example and fixed tests.
+
+## Improvements
+* Enabled caching for multi-objectives (#872). Costs are now normalized in `get_cost` 
+or optionally in `average_cost`/`sum_cost`/`min_cost` to receive a single float value. Therefore,
+the cached cost values do not need to be updated everytime a new entry to the runhistory was added.
+
+## Interface changes
+* We changed the location of Gaussian processes and random forests. They are in the folders
+`epm/gaussian_process` and `epm/random_forest` now.
+* Also, we restructured the optimizer folder and therefore the location of the acquisition functions
+and configuration chooser.
+* Multi-objective functions are located in the folder `multi_objective`.
+* pSMAC facade was moved to the facade directory.
+
+
 # 1.3.4
 * Added reference to JMLR paper.
 * Typos in documentations.
diff --git a/docs/details/parallelism.rst b/docs/details/parallelism.rst
index 8b145a828..28e100733 100644
--- a/docs/details/parallelism.rst
+++ b/docs/details/parallelism.rst
@@ -2,7 +2,7 @@ Parallelism
 ===========
 
 SMAC also provides a parallel mode to use several parallel computational resources (such as CPU cores).
-This variant of SMAC is called pSMAC (parallel SMAC).
+This variant of SMAC is called pSMAC (parallel SMAC) [1]_.
 The general idea is that all target algorithm run evaluations are shared between the individual SMAC runs
 such that all SMAC runs are better informed and can work together.
 
@@ -19,6 +19,12 @@ such that all SMAC runs are better informed and can work together.
 	SMAC also supports DASH. The documentation is in progress.
 
 
+.. [1] Ramage, S. E. A. (2015). Advances in meta-algorithmic software libraries for
+        distributed automated algorithm configuration (T). University of British
+        Columbia. Retrieved from
+        https://open.library.ubc.ca/collections/ubctheses/24/items/1.0167184.
+
+
 Commandline 
 ~~~~~~~~~~~
 To use pSMAC via the commandline interface, please specify the following two arguments:
diff --git a/examples/python/plot_scalarized_multi_objective.py b/examples/python/plot_scalarized_multi_objective.py
index 0854a4343..c971e957c 100644
--- a/examples/python/plot_scalarized_multi_objective.py
+++ b/examples/python/plot_scalarized_multi_objective.py
@@ -9,7 +9,7 @@
 
 import logging
 
-from smac.optimizer.multi_objective.parego import ParEGO
+from smac.multi_objective.parego import ParEGO
 
 logging.basicConfig(level=logging.INFO)
 
diff --git a/examples/python/plot_simple_multi_objective.py b/examples/python/plot_simple_multi_objective.py
index df87556bd..af5476610 100644
--- a/examples/python/plot_simple_multi_objective.py
+++ b/examples/python/plot_simple_multi_objective.py
@@ -27,7 +27,10 @@ def plot(all_x):
     plt.figure()
     for x in all_x:
         f1, f2 = schaffer(x)
-        plt.scatter(f1, f2, c="blue", alpha=0.1)
+        plt.scatter(f1, f2, c="blue", alpha=0.1, zorder=3000)
+
+    plt.vlines([1], 0, 4, linestyles="dashed", colors=["red"])
+    plt.hlines([1], 0, 4, linestyles="dashed", colors=["red"])
 
     plt.show()
 
diff --git a/examples/python/plot_svm_eips.py b/examples/python/plot_svm_eips.py
index b4fc5bf70..400c68973 100644
--- a/examples/python/plot_svm_eips.py
+++ b/examples/python/plot_svm_eips.py
@@ -21,9 +21,7 @@
 from sklearn.model_selection import cross_val_score
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.uncorrelated_mo_rf_with_instances import (
-    UncorrelatedMultiObjectiveRandomForestWithInstances,
-)
+from smac.epm.random_forest.rf_mo import MultiObjectiveRandomForest
 from smac.facade.smac_ac_facade import SMAC4AC
 
 # EIPS related
@@ -104,7 +102,7 @@ def svm_from_cfg(cfg):
     model_kwargs = {"target_names": ["loss", "time"], "model_kwargs": {"seed": 1}}
     smac = SMAC4AC(
         scenario=scenario,
-        model=UncorrelatedMultiObjectiveRandomForestWithInstances,
+        model=MultiObjectiveRandomForest,
         rng=np.random.RandomState(42),
         model_kwargs=model_kwargs,
         tae_runner=svm_from_cfg,
diff --git a/examples/python/plot_synthetic_function_boing.py b/examples/python/plot_synthetic_function_boing.py
new file mode 100644
index 000000000..e093229d8
--- /dev/null
+++ b/examples/python/plot_synthetic_function_boing.py
@@ -0,0 +1,74 @@
+"""
+Synthetic Function with BOinG as optimizer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An example of applying SMAC with BO inside Grove (BOinG) to optimize a
+synthetic function (2d rosenbrock function).
+
+BOinG optimizer requires a SMAC4BOING wrapper to optimize the target algorithm. It is a two stage BO algorithm.
+In the first stage, BOinG constructs an RF to capture the global loss landscape. Then in the second stage, it only
+optimizes inside a subregion near the candidate suggested by the RF model with a GP model to focus only on the most
+promising region.
+"""
+
+import logging
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter
+
+from smac.facade.smac_boing_facade import SMAC4BOING
+
+# Import SMAC-utilities
+from smac.scenario.scenario import Scenario
+
+
+def rosenbrock_2d(x):
+    """The 2 dimensional Rosenbrock function as a toy model
+    The Rosenbrock function is well know in the optimization community and
+    often serves as a toy problem. It can be defined for arbitrary
+    dimensions. The minimium is always at x_i = 1 with a function value of
+    zero. All input parameters are continuous. The search domain for
+    all x's is the interval [-5, 10].
+    """
+    x1 = x["x0"]
+    x2 = x["x1"]
+
+    val = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0
+    return val
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output
+
+    # Build Configuration Space which defines all parameters and their ranges
+    cs = ConfigurationSpace()
+    x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3)
+    x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4)
+    cs.add_hyperparameters([x0, x1])
+    # Scenario object
+    scenario = Scenario(
+        {
+            "run_obj": "quality",  # we optimize quality (alternatively runtime)
+            "runcount-limit": 20,
+            # max. number of function evaluations; for this example set to a low number
+            "cs": cs,  # configuration space
+            "deterministic": "true",
+        }
+    )
+
+    # Example call of the function
+    # It returns: Status, Cost, Runtime, Additional Infos
+    def_value = rosenbrock_2d(cs.get_default_configuration())
+    print("Default Value: %.2f" % def_value)
+
+    # Optimize, using a SMAC-object
+    print("Optimizing! Depending on your machine, this might take a few minutes.")
+
+    smac = SMAC4BOING(
+        scenario=scenario,
+        rng=np.random.RandomState(42),
+        tae_runner=rosenbrock_2d,
+    )
+
+    smac.optimize()
diff --git a/examples/python/plot_synthetic_function_parallel.py b/examples/python/plot_synthetic_function_parallel.py
new file mode 100644
index 000000000..93ea864e3
--- /dev/null
+++ b/examples/python/plot_synthetic_function_parallel.py
@@ -0,0 +1,109 @@
+"""
+Synthetic Function with few Hyperparameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An example of applying SMAC to optimize a synthetic function (2d rosenbrock function).
+
+We use the pSMAC [1]_ facade to demonstrate the parallelization of SMAC.
+Other than that, we use a :term:`Gaussian Process<GP>` to optimize our black-box
+function.
+
+
+.. [1] Ramage, S. E. A. (2015). Advances in meta-algorithmic software libraries for
+    distributed automated algorithm configuration (T). University of British
+    Columbia. Retrieved from
+    https://open.library.ubc.ca/collections/ubctheses/24/items/1.0167184.
+"""
+import importlib
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+import numpy as np
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter
+
+# Import ConfigSpace and different types of parameters
+from smac.configspace import ConfigurationSpace
+from smac.facade.psmac_facade import PSMAC
+from smac.facade.smac_bb_facade import SMAC4BB
+import smac
+
+importlib.reload(smac.facade.psmac_facade)
+from smac.facade.psmac_facade import PSMAC
+
+from smac.optimizer.acquisition import EI
+
+# Import SMAC-utilities
+from smac.scenario.scenario import Scenario
+
+__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
+__license__ = "3-clause BSD"
+
+
+def rosenbrock_2d(x):
+    """The 2 dimensional Rosenbrock function as a toy model
+    The Rosenbrock function is well know in the optimization community and
+    often serves as a toy problem. It can be defined for arbitrary
+    dimensions. The minimium is always at x_i = 1 with a function value of
+    zero. All input parameters are continuous. The search domain for
+    all x's is the interval [-5, 10].
+    """
+
+    x1 = x["x0"]
+    x2 = x["x1"]
+
+    val = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0
+    return val
+
+
+if __name__ == "__main__":
+    # Build Configuration Space which defines all parameters and their ranges
+    cs = ConfigurationSpace()
+    x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3)
+    x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4)
+    cs.add_hyperparameters([x0, x1])
+
+    # Scenario object
+    scenario = Scenario(
+        {
+            "run_obj": "quality",  # we optimize quality (alternatively runtime)
+            "runcount-limit": 20,  # max. number of function evaluations PER WORKER
+            "cs": cs,  # configuration space
+            "deterministic": True,
+        }
+    )
+
+    # Use 'gp' or 'gp_mcmc' here
+    model_type = "gp"
+
+    # Example call of the function
+    # It returns: Status, Cost, Runtime, Additional Infos
+    def_value = rosenbrock_2d(cs.get_default_configuration())
+    print("Default Value: %.2f" % def_value)
+
+    # Optimize, using a SMAC-object
+    print("Optimizing! Depending on your machine, this might take a few minutes.")
+    smac = PSMAC(
+        scenario=scenario,
+        facade_class=SMAC4BB,
+        model_type=model_type,
+        rng=np.random.RandomState(42),
+        acquisition_function=EI,  # or others like PI, LCB as acquisition functions
+        tae_runner=rosenbrock_2d,
+        n_workers=2,  # 2 parallel workers
+    )
+
+    incumbent = smac.optimize()
+    # Get trajectory of optimization (incumbent over time)
+    trajectory_json = smac.get_trajectory()  # trajectory in json format
+
+    # Plot trajectory: cost of incumbent against number of evaluations
+    # import matplotlib.pyplot as plt
+    # X = [t["evaluations"] for t in trajectory_json]
+    # Y = [t["cost"] for t in trajectory_json]
+    # plt.plot(X, Y)
+    # plt.yscale("log")
+    # plt.xlabel("Number of Evaluations")
+    # plt.ylabel("Cost of Incumbent")
+    # plt.show()
diff --git a/examples/python/plot_synthetic_function_turbo.py b/examples/python/plot_synthetic_function_turbo.py
new file mode 100644
index 000000000..753cae7ef
--- /dev/null
+++ b/examples/python/plot_synthetic_function_turbo.py
@@ -0,0 +1,80 @@
+"""
+Synthetic Function with TuRBO as optimizer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An example of applying SMAC with trust region BO (TuRBO) to optimize a
+synthetic function (2d rosenbrock function).
+
+Eriksson et al. Scalable Global Optimization via Local {Bayesian} Optimization,
+http://papers.nips.cc/paper/8788-scalable-global-optimization-via-local-bayesian-optimization.pdf
+
+TurBO gradually shrinks its search space to the vicinity of the optimum configuration that is ever optimized.
+TuRBO optimizer requires EPMChooserTurBO to suggest the next configuration. Currently, it only supports pure numerical
+hyperparameters.
+"""
+
+import logging
+
+import numpy as np
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter
+
+# Import ConfigSpace and different types of parameters
+from smac.configspace import ConfigurationSpace
+from smac.facade.smac_bb_facade import SMAC4BB
+from smac.optimizer.configuration_chooser.turbo_chooser import TurBOChooser
+
+# Import SMAC-utilities
+from smac.scenario.scenario import Scenario
+
+
+def rosenbrock_2d(x):
+    """The 2 dimensional Rosenbrock function as a toy model
+    The Rosenbrock function is well know in the optimization community and
+    often serves as a toy problem. It can be defined for arbitrary
+    dimensions. The minimium is always at x_i = 1 with a function value of
+    zero. All input parameters are continuous. The search domain for
+    all x's is the interval [-5, 10].
+    """
+    x1 = x["x0"]
+    x2 = x["x1"]
+
+    val = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0
+    return val
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)  # logging.DEBUG for debug output
+
+    # Build Configuration Space which defines all parameters and their ranges
+    cs = ConfigurationSpace()
+    x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3)
+    x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4)
+    cs.add_hyperparameters([x0, x1])
+
+    # Scenario object
+    scenario = Scenario(
+        {
+            "run_obj": "quality",  # we optimize quality (alternatively runtime)
+            "runcount-limit": 100,
+            "cs": cs,  # configuration space
+            "deterministic": "true",
+        }
+    )
+
+    # Example call of the function
+    # It returns: Status, Cost, Runtime, Additional Infos
+    def_value = rosenbrock_2d(cs.get_default_configuration())
+    print("Default Value: %.2f" % def_value)
+
+    # Optimize, using a SMAC-object
+    print("Optimizing! Depending on your machine, this might take a few minutes.")
+    smac = SMAC4BB(
+        scenario=scenario,
+        rng=np.random.RandomState(42),
+        model_type="gp",
+        smbo_kwargs={"epm_chooser": TurBOChooser},
+        initial_design_kwargs={"init_budget": 0},
+        tae_runner=rosenbrock_2d,
+    )
+
+    smac.optimize()
diff --git a/examples/python/plot_user_prior_mlp.py b/examples/python/plot_user_prior_mlp.py
index 390fb64f5..42bcae3e5 100644
--- a/examples/python/plot_user_prior_mlp.py
+++ b/examples/python/plot_user_prior_mlp.py
@@ -20,7 +20,6 @@
 
 import warnings
 
-import ConfigSpace as CS
 import numpy as np
 from ConfigSpace.hyperparameters import (
     BetaIntegerHyperparameter,
@@ -34,7 +33,6 @@
 from sklearn.neural_network import MLPClassifier
 
 from smac.configspace import ConfigurationSpace
-from smac.facade.smac_bb_facade import SMAC4BB
 from smac.facade.smac_hpo_facade import SMAC4HPO
 from smac.initial_design.random_configuration_design import RandomConfigurations
 from smac.scenario.scenario import Scenario
diff --git a/setup.py b/setup.py
index 2493b327a..b92f04ded 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,12 @@ def read_file(filepath: str) -> str:
 
 
 extras_require = {
+    "gpytorch": [
+        "torch>=1.9.0",
+        "gpytorch>=1.5.0",
+        "pyro-ppl>=1.7.0",
+        "botorch>=0.5.0"
+    ],
     "dev": [
         "setuptools",
         "types-setuptools",
@@ -63,10 +69,11 @@ def read_file(filepath: str) -> str:
         "ConfigSpace>=0.5.0",
         "joblib",
         "scikit-learn>=0.22.0",
-        "pyrfr>=0.8.0",
+        "pyrfr>=0.8.3",
         "dask",
         "distributed",
         "emcee>=3.0.0",
+        "regex",
     ],
     extras_require=extras_require,
     test_suite="pytest",
diff --git a/smac/__init__.py b/smac/__init__.py
index a004c2746..fa90f8557 100644
--- a/smac/__init__.py
+++ b/smac/__init__.py
@@ -22,7 +22,7 @@
     Matthias Feurer, André Biedenkapp, Difan Deng, Carolin Benjamins, Tim Ruhkopf, René Sass
     and Frank Hutter
 """
-version = "1.3.4"
+version = "1.4.0"
 
 
 if os.name != "posix":
diff --git a/smac/epm/base_epm.py b/smac/epm/base_epm.py
index 8120ea3a5..aa7dafc16 100644
--- a/smac/epm/base_epm.py
+++ b/smac/epm/base_epm.py
@@ -20,7 +20,7 @@
 __version__ = "0.0.1"
 
 
-class AbstractEPM(object):
+class BaseEPM:
     """Abstract implementation of the EPM API.
 
     **Note:** The input dimensionality of Y for training and the output dimensions
@@ -107,7 +107,7 @@ def __init__(
 
         self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
 
-    def train(self, X: np.ndarray, Y: np.ndarray) -> "AbstractEPM":
+    def train(self, X: np.ndarray, Y: np.ndarray) -> "BaseEPM":
         """Trains the EPM on X and Y.
 
         Parameters
@@ -120,7 +120,7 @@ def train(self, X: np.ndarray, Y: np.ndarray) -> "AbstractEPM":
 
         Returns
         -------
-        self : AbstractEPM
+        self : BaseEPM
         """
         if len(X.shape) != 2:
             raise ValueError("Expected 2d array, got %dd array!" % len(X.shape))
@@ -154,7 +154,7 @@ def train(self, X: np.ndarray, Y: np.ndarray) -> "AbstractEPM":
 
         return self._train(X, Y)
 
-    def _train(self, X: np.ndarray, Y: np.ndarray) -> "AbstractEPM":
+    def _train(self, X: np.ndarray, Y: np.ndarray) -> "BaseEPM":
         """Trains the random forest on X and y.
 
         Parameters
diff --git a/smac/epm/base_gp.py b/smac/epm/gaussian_process/__init__.py
similarity index 91%
rename from smac/epm/base_gp.py
rename to smac/epm/gaussian_process/__init__.py
index 9e18b61f6..8da500d5f 100644
--- a/smac/epm/base_gp.py
+++ b/smac/epm/gaussian_process/__init__.py
@@ -5,15 +5,16 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import Kernel, KernelOperator
 
-import smac.epm.gp_base_prior
+import smac.epm.gaussian_process.utils.prior
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
+from smac.epm.gaussian_process.utils.prior import Prior
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
-class BaseModel(AbstractEPM):
+class BaseModel(BaseEPM):
     def __init__(
         self,
         configspace: ConfigurationSpace,
@@ -91,7 +92,7 @@ def _get_all_priors(
         self,
         add_bound_priors: bool = True,
         add_soft_bounds: bool = False,
-    ) -> List[List[smac.epm.gp_base_prior.Prior]]:
+    ) -> List[List[Prior]]:
         """Returns all priors."""
         # Obtain a list of all priors for each tunable hyperparameter of the kernel
         all_priors = []
@@ -118,7 +119,7 @@ def _get_all_priors(
                     if add_bound_priors:
                         if add_soft_bounds:
                             priors_for_hp.append(
-                                smac.epm.gp_base_prior.SoftTopHatPrior(
+                                smac.epm.gaussian_process.utils.prior.SoftTopHatPrior(
                                     lower_bound=bounds[i][0],
                                     upper_bound=bounds[i][1],
                                     rng=self.rng,
@@ -127,7 +128,7 @@ def _get_all_priors(
                             )
                         else:
                             priors_for_hp.append(
-                                smac.epm.gp_base_prior.TophatPrior(
+                                smac.epm.gaussian_process.utils.prior.TophatPrior(
                                     lower_bound=bounds[i][0],
                                     upper_bound=bounds[i][1],
                                     rng=self.rng,
@@ -157,3 +158,8 @@ def _impute_inactive(self, X: np.ndarray) -> np.ndarray:
         X = X.copy()
         X[~np.isfinite(X)] = -1
         return X
+
+
+from smac.epm.gaussian_process.gp import GaussianProcess  # noqa
+
+__all__ = ["BaseModel", "GaussianProcess"]
diff --git a/smac/epm/gaussian_process/augmented.py b/smac/epm/gaussian_process/augmented.py
new file mode 100644
index 000000000..183000c9a
--- /dev/null
+++ b/smac/epm/gaussian_process/augmented.py
@@ -0,0 +1,496 @@
+from typing import Dict, List, Optional, Tuple, Union
+
+from collections import OrderedDict
+
+import gpytorch
+import numpy as np
+import torch
+from botorch.optim.numpy_converter import module_to_array, set_params_with_array
+from botorch.optim.utils import _get_extra_mll_args
+from gpytorch.constraints.constraints import Interval
+from gpytorch.distributions import MultivariateNormal
+from gpytorch.kernels import Kernel
+from gpytorch.likelihoods import GaussianLikelihood
+from gpytorch.means import ZeroMean
+from gpytorch.mlls import ExactMarginalLogLikelihood
+from gpytorch.models import ExactGP
+from gpytorch.utils.errors import NanError
+from scipy import optimize
+from scipy.stats.qmc import LatinHypercube
+
+from smac.configspace import ConfigurationSpace
+from smac.epm.gaussian_process.gpytorch import ExactGPModel, GPyTorchGaussianProcess
+from smac.epm.gaussian_process.kernels.boing import FITCKernel, FITCMean
+from smac.epm.utils import check_subspace_points
+
+gpytorch.settings.debug.off()
+
+
+class AugmentedLocalGaussianProcess(ExactGP):
+    def __init__(
+        self,
+        X_in: torch.Tensor,
+        y_in: torch.Tensor,
+        X_out: torch.Tensor,
+        y_out: torch.Tensor,
+        likelihood: GaussianLikelihood,
+        base_covar_kernel: Kernel,
+    ):
+        """
+        An Augmented Local GP, it is trained with the points inside a subregion while its prior is augemented by the
+        points outside the subregion (global configurations)
+
+        Parameters
+        ----------
+        X_in: torch.Tensor (N_in, D),
+            feature vector of the points inside the subregion
+        y_in: torch.Tensor (N_in, 1),
+            observation inside the subregion
+        X_out: torch.Tensor (N_out, D),
+            feature vector  of the points outside the subregion
+        y_out:torch.Tensor (N_out, 1),
+            observation inside the subregion
+        likelihood: GaussianLikelihood,
+            likelihood of the GP (noise)
+        base_covar_kernel: Kernel,
+            Covariance Kernel
+        """
+        X_in = X_in.unsqueeze(-1) if X_in.ndimension() == 1 else X_in
+        X_out = X_out.unsqueeze(-1) if X_out.ndimension() == 1 else X_out
+        assert X_in.shape[-1] == X_out.shape[-1]
+
+        super(AugmentedLocalGaussianProcess, self).__init__(X_in, y_in, likelihood)
+
+        self._mean_module = ZeroMean()
+        self.base_covar = base_covar_kernel
+
+        self.X_out = X_out
+        self.y_out = y_out
+        self.augmented = False
+
+    def set_augment_module(self, X_inducing: torch.Tensor) -> None:
+        """
+        Set an augmentation module, which will be used later for inference
+
+        Parameters
+        ----------
+        X_inducing: torch.Tensor(N_inducing, D)
+           inducing points, it needs to have the same number of dimensions as X_in
+        """
+        X_inducing = X_inducing.unsqueeze(-1) if X_inducing.ndimension() == 1 else X_inducing
+        # assert X_inducing.shape[-1] == self.X_out.shape[-1]
+        self.covar_module = FITCKernel(
+            self.base_covar, X_inducing=X_inducing, X_out=self.X_out, y_out=self.y_out, likelihood=self.likelihood
+        )
+        self.mean_module = FITCMean(covar_module=self.covar_module)
+        self.augmented = True
+
+    def forward(self, x: torch.Tensor) -> MultivariateNormal:
+        """
+        Compute the prior values. If optimize_kernel_hps is set True in the training phases, this model degenerates to
+        a vanilla GP model with ZeroMean and base_covar as covariance matrix. Otherwise, we apply partial sparse GP
+        mean and kernels here.
+        """
+        if not self.augmented:
+            # we only optimize for kernel hyperparameters
+            covar_x = self.base_covar(x)
+            mean_x = self._mean_module(x)
+        else:
+            covar_x = self.covar_module(x)
+            mean_x = self.mean_module(x)
+        return MultivariateNormal(mean_x, covar_x)
+
+
+class VariationalGaussianProcess(gpytorch.models.ApproximateGP):
+    """
+    A variational GP to compute the position of the inducing points.
+    We only optimize for the position of the continuous dimensions and keep the categorical dimensions constant.
+    """
+
+    def __init__(self, kernel: Kernel, X_inducing: torch.Tensor):
+        """
+        Initialize a Variational GP
+        we set the lower bound and upper bounds of inducing points for numerical hyperparameters between 0 and 1,
+        that is, we constrain the inducing points to lay inside the subregion.
+
+        Parameters
+        ----------
+        kernel: Kernel
+            kernel of the variational GP, its hyperparameter needs to be fixed when it is by LGPGA
+        X_inducing: torch.tensor (N_inducing, D)
+            inducing points
+        """
+        variational_distribution = gpytorch.variational.TrilNaturalVariationalDistribution(X_inducing.size(0))
+        variational_strategy = gpytorch.variational.VariationalStrategy(
+            self, X_inducing, variational_distribution, learn_inducing_locations=True
+        )
+        super(VariationalGaussianProcess, self).__init__(variational_strategy)
+        self.mean_module = gpytorch.means.ZeroMean()
+        self.covar_module = kernel
+
+        shape_X_inducing = X_inducing.shape
+        lower_X_inducing = torch.zeros([shape_X_inducing[-1]]).repeat(shape_X_inducing[0])
+        upper_X_inducing = torch.ones([shape_X_inducing[-1]]).repeat(shape_X_inducing[0])
+
+        self.variational_strategy.register_constraint(
+            param_name="inducing_points",
+            constraint=Interval(lower_X_inducing, upper_X_inducing, transform=None),
+        )
+        self.double()
+
+        for p_name, t in self.named_hyperparameters():
+            if p_name != "variational_strategy.inducing_points":
+                t.requires_grad = False
+
+    def forward(self, x: torch.Tensor) -> MultivariateNormal:
+        """
+        Pass the posterior mean and variance given input X
+
+        Parameters
+        ----------
+        x: torch.Tensor
+            Input data
+        Returns
+        -------
+        """
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x, cont_only=True)
+        return MultivariateNormal(mean_x, covar_x)
+
+
+class GloballyAugmentedLocalGaussianProcess(GPyTorchGaussianProcess):
+    def __init__(
+        self,
+        configspace: ConfigurationSpace,
+        types: List[int],
+        bounds: List[Tuple[float, float]],
+        bounds_cont: np.ndarray,
+        bounds_cat: List[Tuple],
+        seed: int,
+        kernel: Kernel,
+        num_inducing_points: int = 2,
+        likelihood: Optional[GaussianLikelihood] = None,
+        normalize_y: bool = True,
+        n_opt_restarts: int = 10,
+        instance_features: Optional[np.ndarray] = None,
+        pca_components: Optional[int] = None,
+    ):
+        """
+        The GP hyperparameters are obtained by optimizing the marginal log-likelihood and optimized with botorch
+        We train an LGPGA in two stages:
+        In the first stage, we only train the kernel hyperparameter and thus deactivate the gradient w.r.t the position
+        of the inducing points.
+        In the second stage, we use the kernel hyperparameter acquired in the first stage to initialize a new
+        variational Gaussian process and only optimize its inducing points' position with natural gradients.
+        Finally, we update the position of the inducing points and use it for evaluation.
+
+
+        Parameters
+        ----------
+        bounds_cont: np.ndarray(N_cont, 2),
+           bounds of the continuous hyperparameters, store as [[0,1] * N_cont]
+        bounds_cat: List[Tuple],
+           bounds of categorical hyperparameters
+        kernel : gpytorch kernel object
+           Specifies the kernel that is used for all Gaussian Process
+        num_inducing_points: int
+           Number of inducing points
+        likelihood: Optional[GaussianLikelihood]
+           Likelihood values
+        normalize_y : bool
+           Zero mean unit variance normalization of the output values when the model is a partial sparse GP model.
+        """
+        super(GloballyAugmentedLocalGaussianProcess, self).__init__(
+            configspace=configspace,
+            types=types,
+            bounds=bounds,
+            seed=seed,
+            kernel=kernel,
+            likelihood=likelihood,
+            normalize_y=normalize_y,
+            n_opt_restarts=n_opt_restarts,
+            instance_features=instance_features,
+            pca_components=pca_components,
+        )
+        self.cont_dims = np.where(np.array(types) == 0)[0]
+        self.cat_dims = np.where(np.array(types) != 0)[0]
+        self.bounds_cont = bounds_cont
+        self.bounds_cat = bounds_cat
+        self.num_inducing_points = num_inducing_points
+
+    def update_attribute(self, **kwargs: Dict) -> None:
+        """We update the class attribute (for instance, number of inducing points)"""
+        for key in kwargs:
+            if not hasattr(self, key):
+                raise AttributeError(f"{self.__class__.__name__} has no attribute named {key}")
+            setattr(self, key, kwargs[key])
+
+    def _train(
+        self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True
+    ) -> Union[AugmentedLocalGaussianProcess, GPyTorchGaussianProcess]:
+        """
+        Update the hyperparameters of the partial sparse kernel. Depending on the number of inputs inside and
+        outside the subregion, we initialize a  PartialSparseGaussianProcess or a GaussianProcessGPyTorch
+
+        Parameters
+        ----------
+        X: np.ndarray (N, D)
+            Input data points. The dimensionality of X is (N, D),
+            with N as the number of points and D is the number of features., N = N_in + N_out
+        y: np.ndarray (N,)
+            The corresponding target values.
+        do_optimize: boolean
+                If set to true, the hyperparameters are optimized otherwise,
+                the default hyperparameters of the kernel are used.
+        """
+        X = self._impute_inactive(X)
+        if len(y.shape) == 1:
+            self.n_objectives_ = 1
+        else:
+            self.n_objectives_ = y.shape[1]
+        if self.n_objectives_ == 1:
+            y = y.flatten()
+
+        ss_data_indices = check_subspace_points(
+            X,
+            cont_dims=self.cont_dims,
+            cat_dims=self.cat_dims,
+            bounds_cont=self.bounds_cont,
+            bounds_cat=self.bounds_cat,
+            expand_bound=True,
+        )
+
+        if np.sum(ss_data_indices) > np.shape(y)[0] - self.num_inducing_points:
+            # we initialize a vanilla GaussianProcessGPyTorch
+            if self.normalize_y:
+                y = self._normalize_y(y)
+            self.num_points = np.shape(y)[0]
+            get_gp_kwargs = {"X_in": X, "y_in": y, "X_out": None, "y_out": None}
+        else:
+            # we initialize a PartialSparseGaussianProcess object
+            X_in = X[ss_data_indices]
+            y_in = y[ss_data_indices]
+            X_out = X[~ss_data_indices]
+            y_out = y[~ss_data_indices]
+            self.num_points = np.shape(y_in)[0]
+            if self.normalize_y:
+                y_in = self._normalize_y(y_in)
+                y_out = (y_out - self.mean_y_) / self.std_y_
+            get_gp_kwargs = {"X_in": X_in, "y_in": y_in, "X_out": X_out, "y_out": y_out}
+
+        n_tries = 10
+
+        for i in range(n_tries):
+            try:
+                self.gp = self._get_gp(**get_gp_kwargs)
+                break
+            except Exception as e:
+                if i == n_tries - 1:
+                    raise RuntimeError(f"Fails to initialize a GP model, {e}")
+
+        if do_optimize:
+            self.hypers = self._optimize()
+            self.gp = set_params_with_array(self.gp, self.hypers, self.property_dict)
+            if isinstance(self.gp.model, AugmentedLocalGaussianProcess):
+                # we optimize the position of the inducing points and thus needs to deactivate the gradient of kernel
+                # hyperparameters
+                lhd = LatinHypercube(d=X.shape[-1], seed=self.rng.randint(0, 1000000))
+
+                inducing_points = torch.from_numpy(lhd.random(n=self.num_inducing_points))
+
+                kernel = self.gp.model.base_covar
+                var_gp = VariationalGaussianProcess(kernel, X_inducing=inducing_points)
+
+                X_out_ = torch.from_numpy(X_out)
+                y_out_ = torch.from_numpy(y_out)
+
+                variational_ngd_optimizer = gpytorch.optim.NGD(
+                    var_gp.variational_parameters(), num_data=y_out_.size(0), lr=0.1
+                )
+
+                var_gp.train()
+                likelihood = GaussianLikelihood().double()
+                likelihood.train()
+
+                mll_func = gpytorch.mlls.PredictiveLogLikelihood
+
+                var_mll = mll_func(likelihood, var_gp, num_data=y_out_.size(0))
+
+                for t in var_gp.variational_parameters():
+                    t.requires_grad = False
+
+                x0, property_dict, bounds = module_to_array(module=var_mll)
+                for t in var_gp.variational_parameters():
+                    t.requires_grad = True
+                bounds = np.asarray(bounds).transpose().tolist()
+
+                start_points = [x0]
+
+                inducing_idx = 0
+
+                inducing_size = X_out.shape[-1] * self.num_inducing_points
+                for p_name, attrs in property_dict.items():
+                    if p_name != "model.variational_strategy.inducing_points":
+                        # Construct the new tensor
+                        if len(attrs.shape) == 0:  # deal with scalar tensors
+                            inducing_idx = inducing_idx + 1
+                        else:
+                            inducing_idx = inducing_idx + np.prod(attrs.shape)
+                    else:
+                        break
+                while len(start_points) < 3:
+                    new_start_point = np.random.rand(*x0.shape)
+                    new_inducing_points = torch.from_numpy(lhd.random(n=self.num_inducing_points)).flatten()
+                    new_start_point[inducing_idx : inducing_idx + inducing_size] = new_inducing_points
+                    start_points.append(new_start_point)
+
+                def sci_opi_wrapper(
+                    x: np.ndarray,
+                    mll: gpytorch.module,
+                    property_dict: Dict,
+                    train_inputs: torch.Tensor,
+                    train_targets: torch.Tensor,
+                ) -> Tuple[float, np.ndarray]:
+                    """
+                    A modification of from botorch.optim.utils._scipy_objective_and_grad, the key difference is that
+                    we do an additional natural gradient update before computing the gradient values
+                    Parameters
+                    ----------
+                    x: np.ndarray
+                        optimizer input
+                    mll: gpytorch.module
+                        a gpytorch module whose hyperparameters are defined by x
+                    property_dict: Dict
+                        a dict describing how x is mapped to initialize mll
+                    train_inputs: torch.Tensor (N_input, D)
+                        input points of the GP model
+                    train_targets: torch.Tensor (N_input, 1)
+                        target value of the GP model
+                    Returns
+                    ----------
+                    loss: np.ndarray
+                        loss value
+                    grad: np.ndarray
+                        gradient w.r.t. the inputs
+                    ----------
+                    """
+                    # A modification of from botorch.optim.utils._scipy_objective_and_grad:
+                    # https://botorch.org/api/_modules/botorch/optim/utils.html
+                    # The key difference is that we do an additional natural gradient update here
+                    variational_ngd_optimizer.zero_grad()
+
+                    mll = set_params_with_array(mll, x, property_dict)
+                    mll.zero_grad()
+                    try:  # catch linear algebra errors in gpytorch
+                        output = mll.model(train_inputs)
+                        args = [output, train_targets] + _get_extra_mll_args(mll)
+                        loss = -mll(*args).sum()
+                    except RuntimeError as e:
+                        if isinstance(e, NanError) or "singular" in e.args[0]:
+                            return float("nan"), np.full_like(x, "nan")
+                        else:
+                            raise e  # pragma: nocover
+                    loss.backward()
+                    variational_ngd_optimizer.step()
+                    param_dict = OrderedDict(mll.named_parameters())
+                    grad = []
+                    for p_name in property_dict:
+                        t = param_dict[p_name].grad
+                        if t is None:
+                            # this deals with parameters that do not affect the loss
+                            grad.append(np.zeros(property_dict[p_name].shape.numel()))
+                        else:
+                            grad.append(t.detach().view(-1).cpu().double().clone().numpy())
+                    mll.zero_grad()
+                    return loss.item(), np.concatenate(grad)
+
+                theta_star = x0
+                f_opt_star = np.inf
+                for start_point in start_points:
+                    try:
+                        theta, f_opt, res_dict = optimize.fmin_l_bfgs_b(
+                            sci_opi_wrapper,
+                            start_point,
+                            args=(var_mll, property_dict, X_out_, y_out_),
+                            bounds=bounds,
+                            maxiter=50,
+                        )
+                        if f_opt < f_opt_star:
+                            f_opt_star = f_opt
+                            theta_star = theta
+                    except Exception as e:
+                        self.logger.warning(f"An exception {e} occurs during the optimizaiton")
+
+                start_idx = 0
+                # modification on botorch.optim.numpy_converter.set_params_with_array as we only need to extract the
+                # positions of inducing points
+                for p_name, attrs in property_dict.items():
+                    if p_name != "model.variational_strategy.inducing_points":
+                        # Construct the new tensor
+                        if len(attrs.shape) == 0:  # deal with scalar tensors
+                            start_idx = start_idx + 1
+                        else:
+                            start_idx = start_idx + np.prod(attrs.shape)
+                    else:
+                        end_idx = start_idx + np.prod(attrs.shape)
+                        X_inducing = torch.tensor(
+                            theta_star[start_idx:end_idx], dtype=attrs.dtype, device=attrs.device
+                        ).view(*attrs.shape)
+                        break
+                # set inducing points for covariance module here
+                self.gp_model.set_augment_module(X_inducing)
+        else:
+            self.hypers, self.property_dict, _ = module_to_array(module=self.gp)
+
+        self.is_trained = True
+        return self
+
+    def _get_gp(
+        self,
+        X_in: Optional[np.ndarray] = None,
+        y_in: Optional[np.ndarray] = None,
+        X_out: Optional[np.ndarray] = None,
+        y_out: Optional[np.ndarray] = None,
+    ) -> Optional[ExactMarginalLogLikelihood]:
+        """
+        Construct a new GP model based on the inputs
+        If both in and out are None: return an empty model
+        If only in_x and in_y are given: return a vanilla GP model
+        If in_x, in_y, out_x, out_y are given: return a partial sparse GP model.
+
+        Parameters
+        ----------
+        X_in: Optional[np.ndarray (N_in, D)]
+            Input data points inside the subregion. The dimensionality of X_in is (N_in, D),
+            with N_in as the number of points inside the subregion and D is the number of features. If it is not given,
+            this function will return None to be compatible with the implementation of its parent class
+        y_in: Optional[np.ndarray (N_in,)]
+            The corresponding target values inside the subregion.
+        X_out: Optional[np.ndarray (N_out, D).
+            Input data points outside the subregion. The dimensionality of X_out is (N_out, D). If it is not given, this
+        function will return a vanilla Gaussian Process
+        y_out: Optional[np.ndarray (N_out)]
+            The corresponding target values outside the subregion.
+
+        Returns
+        -------
+        mll: ExactMarginalLogLikelihood
+            a gp module
+        """
+        if X_in is None:
+            return None
+
+        X_in = torch.from_numpy(X_in)
+        y_in = torch.from_numpy(y_in)
+        if X_out is None:
+            self.gp_model = ExactGPModel(X_in, y_in, likelihood=self.likelihood, base_covar_kernel=self.kernel).double()
+        else:
+            X_out = torch.from_numpy(X_out)
+            y_out = torch.from_numpy(y_out)
+
+            self.gp_model = AugmentedLocalGaussianProcess(
+                X_in, y_in, X_out, y_out, likelihood=self.likelihood, base_covar_kernel=self.kernel  # type:ignore
+            ).double()
+        mll = ExactMarginalLogLikelihood(self.likelihood, self.gp_model)
+        mll.double()
+        return mll
diff --git a/smac/epm/gaussian_process.py b/smac/epm/gaussian_process/gp.py
similarity index 99%
rename from smac/epm/gaussian_process.py
rename to smac/epm/gaussian_process/gp.py
index 56fed50c9..cdfcf99f1 100644
--- a/smac/epm/gaussian_process.py
+++ b/smac/epm/gaussian_process/gp.py
@@ -8,8 +8,8 @@
 from sklearn.gaussian_process.kernels import Kernel
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_gp import BaseModel
-from smac.epm.gp_base_prior import Prior
+from smac.epm.gaussian_process import BaseModel
+from smac.epm.gaussian_process.utils.prior import Prior
 from smac.utils.constants import VERY_SMALL_NUMBER
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
diff --git a/smac/epm/gaussian_process/gpytorch.py b/smac/epm/gaussian_process/gpytorch.py
new file mode 100644
index 000000000..2cf0f0539
--- /dev/null
+++ b/smac/epm/gaussian_process/gpytorch.py
@@ -0,0 +1,365 @@
+from typing import List, Optional, Tuple
+
+import warnings
+from collections import OrderedDict
+
+import gpytorch
+import numpy as np
+import torch
+from botorch.optim.numpy_converter import module_to_array, set_params_with_array
+from botorch.optim.utils import _scipy_objective_and_grad
+from gpytorch.constraints.constraints import Interval
+from gpytorch.distributions.multivariate_normal import MultivariateNormal
+from gpytorch.kernels import Kernel
+from gpytorch.likelihoods import FixedNoiseGaussianLikelihood, GaussianLikelihood
+from gpytorch.means import ZeroMean
+from gpytorch.mlls import ExactMarginalLogLikelihood
+from gpytorch.models import ExactGP
+from gpytorch.priors import HorseshoePrior
+from gpytorch.utils.errors import NotPSDError
+from scipy import optimize
+
+from smac.configspace import ConfigurationSpace
+from smac.epm.gaussian_process import BaseModel
+from smac.utils.constants import VERY_SMALL_NUMBER
+
+warnings.filterwarnings("ignore", module="gpytorch")
+
+
+class ExactGPModel(ExactGP):
+    """Exact GP model serves as a backbone of the class GaussianProcessGPyTorch"""
+
+    def __init__(
+        self, train_X: torch.Tensor, train_y: torch.Tensor, base_covar_kernel: Kernel, likelihood: GaussianLikelihood
+    ):
+        """
+        Initialization function
+
+        Parameters
+        ----------
+        train_X: torch.tenor
+            input feature
+        train_y: torch.tensor
+            input observations
+        base_covar_kernel: Kernel
+            covariance kernel used to compute covariance matrix
+        likelihood: GaussianLikelihood
+            GP likelihood
+        """
+        super(ExactGPModel, self).__init__(train_X, train_y, likelihood)
+        # in our experiments we find that ZeroMean more robust than ConstantMean when y is normalized
+        self.mean_module = ZeroMean()
+        self.covar_module = base_covar_kernel
+
+    def forward(self, x: torch.Tensor) -> MultivariateNormal:
+        """Compute the posterior mean and variance"""
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+        return MultivariateNormal(mean_x, covar_x)
+
+
+class GPyTorchGaussianProcess(BaseModel):
+    def __init__(
+        self,
+        configspace: ConfigurationSpace,
+        types: List[int],
+        bounds: List[Tuple[float, float]],
+        seed: int,
+        kernel: Kernel,
+        normalize_y: bool = True,
+        n_opt_restarts: int = 10,
+        likelihood: Optional[FixedNoiseGaussianLikelihood] = None,
+        instance_features: Optional[np.ndarray] = None,
+        pca_components: Optional[int] = None,
+    ):
+        """
+        A Gaussian Process written with GPyTorch, its interface is written to be compatible with partial sparse gaussian
+        process
+
+        Parameters
+        ----------
+        configspace: ConfigurationSpace
+            Configuration space
+        types : List[int]
+             Specifies the number of categorical values of an input dimension where
+             the i-th entry corresponds to the i-th input dimension. Let's say we
+             have 2 dimensions where the first dimension consists of 3 different
+             categorical choices, and the second dimension is continuous than we
+             have to pass [3, 0]. Note that we count starting from 0.
+        bounds : List[Tuple[float, float]]
+            bounds of input dimensions: (lower, uppper) for continuous dims; (n_cat, np.nan) for categorical dims
+        seed : int
+            Model seed.
+        kernel : Kernel
+            Specifies the kernel that is used for all Gaussian Process
+        normalize_y : bool
+            Zero mean unit variance normalization of the output values
+        n_opt_restarts : int
+            Number of restarts for GP hyperparameter optimization
+        likelihood: Optional[FixedNoiseGaussianLikelihood] = None,
+            Gaussian Likelihood (or noise)
+        instance_features : np.ndarray (I, K)
+            Contains the K dimensional instance features of the I different instances
+        pca_components : float
+            The number of components to keep when using PCA to reduce dimensionality of instance features. Requires to
+            set n_feats (> pca_dims).
+        """
+        super(GPyTorchGaussianProcess, self).__init__(
+            configspace,
+            types,
+            bounds,
+            seed,
+            kernel,
+            instance_features,
+            pca_components,
+        )
+        if likelihood is None:
+            noise_prior = HorseshoePrior(0.1)
+            likelihood = GaussianLikelihood(
+                noise_prior=noise_prior, noise_constraint=Interval(np.exp(-25), np.exp(2), transform=None)
+            ).double()
+        self.likelihood = likelihood
+
+        self.normalize_y = normalize_y
+
+        n_opt_restarts = int(n_opt_restarts)
+        if n_opt_restarts <= 0:
+            raise ValueError(f"n_opt_restarts needs to be positive, however, it get {n_opt_restarts}")
+        self.n_opt_restarts = n_opt_restarts
+
+        self.hypers = np.empty((0,))
+        self.property_dict = OrderedDict()  # type: OrderedDict
+        self.is_trained = False
+
+    def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> "GPyTorchGaussianProcess":
+        """
+        Computes the Cholesky decomposition of the covariance of X and
+        estimates the GP hyperparameters by optimizing the marginal
+        loglikelihood. The prior mean of the GP is set to the empirical
+        mean of X.
+
+        Parameters
+        ----------
+        X: np.ndarray (N, D)
+            Input data points. The dimensionality of X is (N, D),
+            with N as the number of points and D is the number of features.
+        y: np.ndarray (N,)
+            The corresponding target values, N as the number of points
+        do_optimize: boolean
+            If set to true, the hyperparameters are optimized otherwise
+            the default hyperparameters of the kernel are used.
+        """
+        X = self._impute_inactive(X)
+        if len(y.shape) == 1:
+            self.n_objectives_ = 1
+        else:
+            self.n_objectives_ = y.shape[1]
+        if self.n_objectives_ == 1:
+            y = y.flatten()
+
+        if self.normalize_y:
+            y = self._normalize_y(y)
+
+        n_tries = 10
+        for i in range(n_tries):
+            try:
+                self.gp = self._get_gp(X, y)
+                break
+            except Exception as e:
+                if i == n_tries - 1:
+                    # To avoid Endless loop, we need to stop it when we have n_tries unsuccessful tries.
+                    raise e
+
+        if do_optimize:
+            self.hypers = self._optimize()
+            self.gp = set_params_with_array(self.gp, self.hypers, self.property_dict)
+        else:
+            self.hypers, self.property_dict, _ = module_to_array(module=self.gp)
+        self.is_trained = True
+        return self
+
+    def _get_gp(
+        self, X: Optional[np.ndarray] = None, y: Optional[np.ndarray] = None
+    ) -> Optional[ExactMarginalLogLikelihood]:
+        """
+        Get the GP model with the given X and y values. As GPyTorch requires the input data to initialize a new
+        model, we also pass X and y here. X and y are set optional to ensure compatibility.
+
+        Parameters
+        ----------
+        X: Optional[np.ndarray(N, D)]
+            input feature vectors, N is number of data points, and D is number of feature dimensions
+        y: Optional[np.ndarray(N,)]
+            input observations, N is number of data points
+        Returns
+        -------
+        mll : Optional[ExactMarginalLogLikelihood]
+            a GPyTorch model with Zero Mean and user specified covariance
+        """
+        if X is None:
+            # To be compatible with the base model
+            return None
+
+        X = torch.from_numpy(X)
+        y = torch.from_numpy(y)
+        self.gp_model = ExactGPModel(X, y, likelihood=self.likelihood, base_covar_kernel=self.kernel).double()
+
+        mll = ExactMarginalLogLikelihood(self.likelihood, self.gp_model)
+        mll.double()
+        return mll
+
+    def _optimize(self) -> np.ndarray:
+        """
+        Optimizes the marginal log likelihood and returns the best found
+        hyperparameter configuration theta.
+
+        Returns
+        -------
+        theta : np.ndarray(H)
+            Hyperparameter vector that maximizes the marginal log likelihood
+        """
+        x0, property_dict, bounds = module_to_array(module=self.gp)
+
+        bounds = np.asarray(bounds).transpose().tolist()
+
+        self.property_dict = property_dict
+
+        p0 = [x0]
+
+        # Avoid infinite sampling
+        n_tries = 5000
+        for i in range(n_tries):
+            try:
+                gp_model = self.gp.pyro_sample_from_prior()
+                x_out = []
+                for key in property_dict.keys():
+                    param = gp_model
+                    param_names = key.split(".")
+                    for name in param_names:
+                        param = getattr(param, name)
+                    x_out.append(param.detach().view(-1).cpu().double().clone().numpy())
+                sample = np.concatenate(x_out)
+                p0.append(sample.astype(np.float64))
+            except Exception as e:
+                if i == n_tries - 1:
+                    self.logger.debug(f"Fails to sample new hyperparameters because of {e}")
+                    raise e
+                continue
+            if len(p0) == self.n_opt_restarts:
+                break
+
+        self.gp_model.train()
+        self.likelihood.train()
+
+        theta_star = x0
+        f_opt_star = np.inf
+        for i, start_point in enumerate(p0):
+            try:
+                theta, f_opt, _ = optimize.fmin_l_bfgs_b(
+                    _scipy_objective_and_grad,
+                    start_point,
+                    args=(self.gp, property_dict),
+                    bounds=bounds,
+                )
+            except NotPSDError as e:
+                self.logger.warning(f"Fail to optimize the GP hyperparameters as an Error occurs: {e}")
+                f_opt = np.inf
+                theta = start_point
+            if f_opt < f_opt_star:
+                f_opt_star = f_opt
+                theta_star = theta
+        return theta_star
+
+    def _predict(
+        self, X_test: np.ndarray, cov_return_type: Optional[str] = "diagonal_cov"
+    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        r"""
+        Returns the predictive mean and variance of the objective function at
+        the given test points.
+
+        Parameters
+        ----------
+        X_test: np.ndarray (N, D)
+            Input test points
+        cov_return_type: Optional[str]
+            Specifies what to return along with the mean. Refer ``predict()`` for more information.
+
+        Returns
+        -------
+        np.array(N,)
+            predictive mean
+        np.array(N,) or np.array(N, N) or None
+            predictive variance or standard deviation
+
+        """
+        if not self.is_trained:
+            raise Exception("Model has to be trained first!")
+
+        X_test = torch.from_numpy(self._impute_inactive(X_test))
+        self.likelihood.eval()
+        self.gp_model.eval()
+
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+
+            observed_pred = self.likelihood(self.gp_model(X_test))
+
+            mu = observed_pred.mean.numpy()
+            if cov_return_type is None:
+                var = None
+
+                if self.normalize_y:
+                    mu = self._untransform_y(mu)
+
+            else:
+                if cov_return_type != "full_cov":
+                    var = observed_pred.stddev.numpy()
+                    var = var**2  # since we get standard deviation for faster computation
+                else:
+                    # output full covariance
+                    var = observed_pred.covariance_matrix.numpy()
+
+                # Clip negative variances and set them to the smallest
+                # positive float value
+                var = np.clip(var, VERY_SMALL_NUMBER, np.inf)
+
+                if self.normalize_y:
+                    mu, var = self._untransform_y(mu, var)
+
+                if cov_return_type == "diagonal_std":
+                    var = np.sqrt(var)  # converting variance to std deviation if specified
+
+        return mu, var
+
+    def sample_functions(self, X_test: np.ndarray, n_funcs: int = 1) -> np.ndarray:
+        """
+        Samples F function values from the current posterior at the N
+        specified test points.
+
+        Parameters
+        ----------
+        X_test: np.ndarray (N, D)
+            Input test points
+        n_funcs: int
+            The number of function values that are drawn at each test point.
+
+        Returns
+        -------
+        function_samples: np.array(F, N)
+            The F function values drawn at the N test points.
+        """
+        if not self.is_trained:
+            raise Exception("Model has to be trained first!")
+        self.likelihood.eval()
+        self.gp_model.eval()
+
+        X_test = torch.from_numpy(self._impute_inactive(X_test))
+        with torch.no_grad():
+            funcs = self.likelihood(self.gp_model(X_test)).sample(torch.Size([n_funcs])).t().cpu().numpy()
+
+        if self.normalize_y:
+            funcs = self._untransform_y(funcs)
+
+        if len(funcs.shape) == 1:
+            return funcs[None, :]
+        else:
+            return funcs
diff --git a/smac/epm/gp_kernels.py b/smac/epm/gaussian_process/kernels/__init__.py
similarity index 99%
rename from smac/epm/gp_kernels.py
rename to smac/epm/gaussian_process/kernels/__init__.py
index 2645cfc9a..9f9bea9c6 100644
--- a/smac/epm/gp_kernels.py
+++ b/smac/epm/gaussian_process/kernels/__init__.py
@@ -9,7 +9,7 @@
 import scipy.special
 import sklearn.gaussian_process.kernels as kernels
 
-from smac.epm.gp_base_prior import Prior
+from smac.epm.gaussian_process.utils.prior import Prior
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
diff --git a/smac/epm/gaussian_process/kernels/boing.py b/smac/epm/gaussian_process/kernels/boing.py
new file mode 100644
index 000000000..33d955e1a
--- /dev/null
+++ b/smac/epm/gaussian_process/kernels/boing.py
@@ -0,0 +1,560 @@
+from typing import Any, Dict, Optional, Tuple, Union
+
+import copy
+import math
+
+import gpytorch
+import numpy as np
+import torch
+from gpytorch import settings
+from gpytorch.kernels import Kernel, MaternKernel, ProductKernel, ScaleKernel
+from gpytorch.lazy import (
+    DiagLazyTensor,
+    MatmulLazyTensor,
+    PsdSumLazyTensor,
+    RootLazyTensor,
+    delazify,
+)
+from gpytorch.likelihoods import GaussianLikelihood
+from gpytorch.means.mean import Mean
+from gpytorch.utils.cholesky import psd_safe_cholesky
+from sklearn.gaussian_process.kernels import Kernel as SKLKernels
+
+from smac.epm.gaussian_process.kernels import ConstantKernel, WhiteKernel
+
+
+class MixedKernel(ProductKernel):
+    """
+    A special form of ProductKernel. It is composed of a cont_kernel and a cat_kernel that work with continuous and
+    categorical parameters, respectively. Its forward pass allows an additional parameter to determine if only
+    cont_kernel is applied to the input.
+    """
+
+    def __init__(self, cont_kernel: Kernel, cat_kernel: Kernel):
+        kernels = cont_kernel.kernels if isinstance(cont_kernel, ProductKernel) else [cont_kernel]
+        kernels += cat_kernel.kernels if isinstance(cat_kernel, ProductKernel) else [cat_kernel]
+        super().__init__(*kernels)
+        self.cont_kernel = cont_kernel
+        self.cat_kernel = cat_kernel
+
+    def forward(
+        self, x1: torch.Tensor, x2: torch.Tensor, diag: bool = False, cont_only: bool = False, **params: Any
+    ) -> gpytorch.lazy.LazyTensor:
+        """Compute kernel values, if cont_only is True, then the categorical kernel is omitted"""
+        if not cont_only:
+            return super().forward(x1, x2, diag, **params)
+        else:
+            return self.cont_kernel(x1, x2, diag, **params)
+
+
+def construct_gp_kernel(
+    kernel_kwargs: Dict[str, Any], cont_dims: np.ndarray, cat_dims: np.ndarray
+) -> Union[Kernel, SKLKernels]:
+    """
+    Construct a GP kernel with the given kernel init argument, the cont_dims, and cat_dims of the problem. Since the
+    subspace might not have the same number of dimensions as the global search space.
+    We need to reconstruct the kernel every time when a new subspace is generated.
+
+    Parameters
+    ----------
+    kernel_kwargs: Dict[str, Any]
+        kernel kwargs. Arguments to initialize the kernels. It needs to contain the following items:
+            cont_kernel: type of continuous kernels
+            cont_kernel_kwargs: additional arguments for continuous kernels, for instance, length constraints and prior
+            cat_kernel: type of categorical kernels
+            cat_kernel_kwargs: additional arguments for categorical kernels, for instance, length constraints and prior
+            scale_kernel: type of scale kernels
+            scale_kernel_kwargs: additional arguments for scale kernels,  for instance, length constraints and prior
+    cont_dims: np.ndarray
+        dimensions of continuous hyperparameters
+    cat_dims: np.ndarray
+        dimensions of categorical hyperparameters
+    Returns
+    -------
+    kernel: Union[Kernel, SKLKernels]
+        constructed kernels
+
+    """
+    if len(cont_dims) > 0:
+        cont_kernel_class = kernel_kwargs.get("cont_kernel", MaternKernel)
+        cont_kernel_kwargs = kernel_kwargs.get("cont_kernel_kwargs", {})
+        cont_kernel = cont_kernel_class(
+            ard_num_dims=cont_dims.shape[-1], active_dims=tuple(cont_dims), **cont_kernel_kwargs
+        ).double()
+
+    if len(cat_dims) > 0:
+        cat_kernel_class = kernel_kwargs.get("cat_kernel", MaternKernel)
+        cat_kernel_kwargs = kernel_kwargs.get("cat_kernel_kwargs", {})
+        cat_kernel = cat_kernel_class(
+            ard_num_dims=cat_dims.shape[-1], active_dims=tuple(cat_dims), **cat_kernel_kwargs
+        ).double()
+
+    if len(cont_dims) > 0 and len(cat_dims) > 0:
+        if isinstance(cont_kernel, SKLKernels):
+            base_kernel = cont_kernel * cat_kernel
+        else:
+            base_kernel = MixedKernel(cont_kernel=cont_kernel, cat_kernel=cat_kernel)
+    elif len(cont_dims) > 0 and len(cat_dims) == 0:
+        base_kernel = cont_kernel
+    elif len(cont_dims) == 0 and len(cat_dims) > 0:
+        base_kernel = cat_kernel
+    else:
+        raise ValueError("Either cont_dims or cat_dims must exist!")
+    if isinstance(base_kernel, SKLKernels):
+        scale_kernel_class = kernel_kwargs.get("scale_kernel", ConstantKernel)
+        scale_kernel_kwargs = kernel_kwargs.get("scale_kernel_kwargs", {})
+        scale_kernel = scale_kernel_class(**scale_kernel_kwargs)
+
+        noise_kernel_class = kernel_kwargs.get("noise_kernel", WhiteKernel)
+        noise_kernel_kwargs = kernel_kwargs.get("noise_kernel_kwargs", {})
+        noise_kernel = noise_kernel_class(**noise_kernel_kwargs)
+
+        gp_kernel = scale_kernel * base_kernel + noise_kernel
+    else:
+        scale_kernel_class = kernel_kwargs.get("scale_kernel", ScaleKernel)
+        scale_kernel_kwargs = kernel_kwargs.get("scale_kernel_kwargs", {})
+        gp_kernel = scale_kernel_class(base_kernel=base_kernel, **scale_kernel_kwargs)
+    return gp_kernel
+
+
+class FITCKernel(Kernel):
+    def __init__(
+        self,
+        base_kernel: Kernel,
+        X_inducing: torch.Tensor,
+        likelihood: GaussianLikelihood,
+        X_out: torch.Tensor,
+        y_out: torch.Tensor,
+        active_dims: Optional[Tuple[int]] = None,
+    ):
+        r"""A reimplementation of FITC Kernel that computes the posterior explicitly for globally augmented local GP.
+        This should work exactly the same as a gpytorch.kernel.InducingPointKernel.
+         However, it takes much less time when combined with LGPGA.
+         References: Edward Snelson and Zoubin Ghahramani. Sparse Gaussian processes using pseudo-inputs. Advances in
+         Neural Information Processing Systems 18, Cambridge, Massachusetts, 2006. The MIT Press.
+         https://papers.nips.cc/paper/2005/hash/4491777b1aa8b5b32c2e8666dbe1a495-Abstract.html
+
+        Mean value is computed with:
+        \mathbf{\mu_{l'}}  = \mathbf{K_{l',u} \Sigma K_{u,1} \Lambda}^{-1}\mathbf{y_g} \label{eq:mean_sgp}
+        and variance value:
+        \mathbf{\sigma}^2_{l'} = \mathbf{K_{l',l'}} - \mathbf{Q_{l', l'} + \mathbf{K_{l', u}\Sigma K_{u, l'}}}
+        \mathbf{\Sigma} = (\mathbf{K_{u,u}} + \mathbf{K_{u, g} \Lambda}^{-1}\mathbf{K_{g,u}})^{-1}
+        \mathbf{\Lambda} = diag[\mathbf{K_{g,g}-Q_{g,g}} + \sigma^2_{noise}\idenmat]
+        ----------
+        base_kernel: Kernel
+            base kernel function
+        X_inducing: torch.Tensor (N_inducing, D)
+            inducing points, a torch tensor with shape (N_inducing, D), N_inducing is the number of the inducing points
+        likelihood: GaussianLikelihood
+            GP likelihood
+        X_out: torch.Tensor (N_out,D)
+            data features outside the subregion, it needs to be of size (N_out, D), N_out is the number of points
+            outside the subspace
+        y_out: torch.Tensor
+            data observations outside the subregion
+        active_dims: typing.Optional[typing.Tuple[int]] = None
+            Set this if you want to compute the covariance of only a few input dimensions. The ints
+            corresponds to the indices of the dimensions. Default: `None`.
+        """
+        super(FITCKernel, self).__init__(active_dims=active_dims)
+        self.has_lengthscale = base_kernel.has_lengthscale
+        self.base_kernel = base_kernel
+        self.likelihood = likelihood
+
+        if X_inducing.ndimension() == 1:
+            X_inducing = X_inducing.unsqueeze(-1)
+
+        self.X_out = X_out
+        self.y_out = y_out
+        self.register_parameter(name="X_inducing", parameter=torch.nn.Parameter(X_inducing))
+
+    def _clear_cache(self) -> None:
+        if hasattr(self, "_cached_kernel_mat"):
+            del self._cached_kernel_mat
+        if hasattr(self, "_cached_inducing_sigma"):
+            del self._cached_inducing_sigma
+        if hasattr(self, "_cached_poster_mean_mat"):
+            del self._cached_poster_mean_mat
+        if hasattr(self, "_train_cached_k_u1"):
+            del self._train_cached_k_u1
+        if hasattr(self, "_train_cached_lambda_diag_inv"):
+            del self._train_cached_lambda_diag_inv
+        if hasattr(self, "_train_cached_posterior_mean"):
+            del self._train_cached_posterior_mean
+        if hasattr(self, "_cached_kernel_inv_root"):
+            del self._cached_kernel_inv_root
+
+    @property
+    def _inducing_mat(self) -> torch.Tensor:
+        """
+        Computes inducing matrix, K(X_inducing, X_inducing)
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, N_inducing)
+            K(X_inducing, X_inducing)
+        """
+        if not self.training and hasattr(self, "_cached_kernel_mat"):
+            return self._cached_kernel_mat
+        else:
+            res = delazify(self.base_kernel(self.X_inducing, self.X_inducing))
+            if not self.training:
+                self._cached_kernel_mat = res  # type: torch.Tensor
+            return res
+
+    @property
+    def _inducing_inv_root(self) -> torch.Tensor:
+        """
+        Computes the inverse of the inducing matrix: K_inv(X_inducing, X_inducing) = K(X_inducing, X_inducing)^(-1)
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, N_inducing)
+            K_inv(X_inducing, X_inducing)
+        """
+        if not self.training and hasattr(self, "_cached_kernel_inv_root"):
+            return self._cached_kernel_inv_root
+        else:
+            chol = psd_safe_cholesky(self._inducing_mat, upper=True, jitter=settings.cholesky_jitter.value())
+            eye = torch.eye(chol.size(-1), device=chol.device, dtype=chol.dtype)
+            inv_root = torch.triangular_solve(eye, chol)[0]
+
+            res = inv_root
+            if not self.training:
+                self._cached_kernel_inv_root = res  # type: torch.Tensor
+            return res
+
+    @property
+    def _k_u1(self) -> torch.Tensor:
+        """
+        Computes the covariance matrix between the X_inducing and X_out : K(X_inducing, X_out)
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, N_out)
+            K(X_inducing, X_out)
+        """
+        if not self.training and hasattr(self, "_cached_k_u1"):
+            return self._cached_k_u1
+        else:
+            res = delazify(self.base_kernel(self.X_inducing, self.X_out))
+            if not self.training:
+                self._cached_k_u1 = res  # type: torch.Tensor
+            else:
+                self._train_cached_k_u1 = res  # type: torch.Tensor
+            return res
+
+    @property
+    def _lambda_diag_inv(self) -> torch.Tensor:
+        r"""Computes the inverse of lambda matrix, it is computed by
+        \Lambda = diag[\mathbf{K_{X_out,X_out}-Q_{X_out,X_out}} + \sigma^2_{noise}\idenmat] and
+        Q{X_out, X_out} = K(X_out, X_inducing) K^{-1}(X_inducing,X_inducing) K(X_inducing, X_out)
+
+        Returns
+        -------
+        res: torch.Tensor (N_out, N_out)
+            inverse of the diagonal matrix lambda
+        """
+        if not self.training and hasattr(self, "_cached_lambda_diag_inv"):
+            return self._cached_lambda_diag_inv
+        else:
+            diag_k11 = delazify(self.base_kernel(self.X_out, diag=True))
+
+            diag_q11 = delazify(RootLazyTensor(self._k_u1.transpose(-1, -2).matmul(self._inducing_inv_root))).diag()
+
+            # Diagonal correction for predictive posterior
+            correction = (diag_k11 - diag_q11).clamp(0, math.inf)
+
+            sigma = self.likelihood._shaped_noise_covar(correction.shape).diag()
+
+            res = delazify(DiagLazyTensor((correction + sigma).reciprocal()))
+
+            if not self.training:
+                self._cached_lambda_diag_inv = res  # type: torch.Tensor
+            else:
+                self._train_cached_lambda_diag_inv = res  # type: torch.Tensor
+            return res
+
+    @property
+    def _inducing_sigma(self) -> torch.Tensor:
+        r"""Computes the inverse of lambda matrix, it is computed by
+        \mathbf{\Sigma} = (\mathbf{K_{X_inducing,X_inducing}} +
+         \mathbf{K_{X_inducing, X_out} \Lambda}^{-1}\mathbf{K_{X_out,X_inducing}})
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, N_inducing)
+            \Sigma
+        """
+        if not self.training and hasattr(self, "_cached_inducing_sigma"):
+            return self._cached_inducing_sigma
+        else:
+            k_u1 = self._k_u1
+            res = PsdSumLazyTensor(
+                self._inducing_mat,
+                MatmulLazyTensor(k_u1, MatmulLazyTensor(self._lambda_diag_inv, k_u1.transpose(-1, -2))),
+            )
+            res = delazify(res)
+            if not self.training:
+                self._cached_inducing_sigma = res  # type: torch.Tensor
+
+            return res
+
+    @property
+    def _inducing_sigma_inv_root(self) -> torch.Tensor:
+        r"""Inverse of Sigma matrix:
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, N_inducing)
+            \Sigma ^{-1}
+        """
+        if not self.training and hasattr(self, "_cached_inducing_sigma_inv_root"):
+            return self._cached_inducing_sigma_inv_root
+        else:
+            chol = psd_safe_cholesky(self._inducing_sigma, upper=True, jitter=settings.cholesky_jitter.value())
+
+            eye = torch.eye(chol.size(-1), device=chol.device, dtype=chol.dtype)
+            inv_root = torch.triangular_solve(eye, chol)[0]
+            res = inv_root
+            if not self.training:
+                self._cached_inducing_sigma_inv_root = res  # type: torch.Tensor
+            return res
+
+    @property
+    def _poster_mean_mat(self) -> torch.Tensor:
+        r"""A cached value for computing the posterior mean of a sparse kernel it is defined by
+        \Sigma K_{u, 1} \Lambda}^{-1}\mathbf{y_out}
+
+        Returns
+        -------
+        res: torch.Tensor (N_inducing, 1)
+            cached posterior mean
+        """
+        if not self.training and hasattr(self, "_cached_poster_mean_mat"):
+            return self._cached_poster_mean_mat
+        else:
+            inducing_sigma_inv_root = self._inducing_sigma_inv_root
+            sigma = RootLazyTensor(inducing_sigma_inv_root)
+
+            k_u1 = self._k_u1
+            lambda_diag_inv = self._lambda_diag_inv
+
+            res_mat = delazify(MatmulLazyTensor(sigma, MatmulLazyTensor(k_u1, lambda_diag_inv)))
+
+            res = torch.matmul(res_mat, self.y_out)
+
+            if not self.training:
+                self._cached_poster_mean_mat = res  # type: torch.Tensor
+            return res
+
+    def _get_covariance(self, x1: torch.Tensor, x2: torch.Tensor) -> gpytorch.lazy.LazyTensor:
+        r"""Compute the posterior covariance matrix of a sparse kernel explicitly
+
+        Parameters
+        ----------
+        x1: torch.Tensor(N_x1, D)
+            first input of the FITC kernel
+        x2: torch.Tensor(N_x2, D)
+            second input of the FITC kernel
+
+        Returns
+        -------
+        res: Optional[torch.Tensor (N_x1, 1), PsdSumLazyTensor]
+            a cached value for computing the posterior mean, it
+            is defined by  \Sigma K_{u, 1} \Lambda}^{-1}\mathbf{y_out}
+        """
+        k_x1x2 = self.base_kernel(x1, x2)
+        k_x1u = delazify(self.base_kernel(x1, self.X_inducing))
+        inducing_inv_root = self._inducing_inv_root
+        inducing_sigma_inv_root = self._inducing_sigma_inv_root
+        if torch.equal(x1, x2):
+            q_x1x2 = RootLazyTensor(k_x1u.matmul(inducing_inv_root))
+
+            s_x1x2 = RootLazyTensor(k_x1u.matmul(inducing_sigma_inv_root))
+        else:
+            k_x2u = delazify(self.base_kernel(x2, self.X_inducing))
+            q_x1x2 = MatmulLazyTensor(
+                k_x1u.matmul(inducing_inv_root), k_x2u.matmul(inducing_inv_root).transpose(-1, -2)
+            )
+            s_x1x2 = MatmulLazyTensor(
+                k_x1u.matmul(inducing_sigma_inv_root), k_x2u.matmul(inducing_sigma_inv_root).transpose(-1, -2)
+            )
+        covar = PsdSumLazyTensor(k_x1x2, -1.0 * q_x1x2, s_x1x2)
+
+        if self.training:
+            k_iu = self.base_kernel(x1, self.X_inducing)
+            sigma = RootLazyTensor(inducing_sigma_inv_root)
+
+            k_u1 = self._train_cached_k_u1 if hasattr(self, "_train_cached_k_u1") else self._k_u1
+            lambda_diag_inv = (
+                self._train_cached_lambda_diag_inv
+                if hasattr(self, "_train_cached_lambda_diag_inv")
+                else self._lambda_diag_inv
+            )
+
+            mean = torch.matmul(
+                delazify(MatmulLazyTensor(k_iu, MatmulLazyTensor(sigma, MatmulLazyTensor(k_u1, lambda_diag_inv)))),
+                self.y_out,
+            )
+
+            self._train_cached_posterior_mean = mean  # type: torch.Tensor
+        return covar
+
+    def posterior_mean(self, inputs: torch.Tensor) -> torch.Tensor:
+        """
+        The posterior mean of the FITC kernel, will serve as the prior mean of the dense kernel.
+
+        Parameters
+        ----------
+        inputs: torch.Tensor(N_inputs, D)
+            input of the FITC kernel
+
+        Returns
+        -------
+        res: Torch.Tensor (N_inputs, 1)
+            The posterior mean of the FITC Kernel
+        """
+        if self.training and hasattr(self, "_train_cached_posterior_mean"):
+            return self._train_cached_posterior_mean
+        if inputs.ndimension() == 1:
+            inputs = inputs.unsqueeze(1)
+
+        k_iu = delazify(self.base_kernel(inputs, self.X_inducing, cont_only=True))
+        poster_mean = self._poster_mean_mat
+        res = torch.matmul(k_iu, poster_mean)
+        return res
+
+    def forward(
+        self, x1: torch.Tensor, x2: torch.Tensor, diag: bool = False, **kwargs: Dict
+    ) -> gpytorch.lazy.LazyTensor:
+        """Compute the kernel function"""
+        covar = self._get_covariance(x1, x2)
+        if self.training:
+            if not torch.equal(x1, x2):
+                raise RuntimeError("x1 should equal x2 in training mode")
+
+        if diag:
+            return covar.diag()
+        else:
+            return covar
+
+    def num_outputs_per_input(self, x1: torch.Tensor, x2: torch.Tensor) -> int:
+        """
+        Number of outputs given the inputs
+        if x1 is of size `n x d` and x2 is size `m x d`, then the size of the kernel
+        will be `(n * num_outputs_per_input) x (m * num_outputs_per_input)`
+
+        Parameters
+        ----------
+        x1: torch.Tensor
+            the first input of the kernel
+        x2: torch.Tensor
+            the second input of the kernel
+        Returns
+        -------
+        res: int
+            for base kernels such as matern or RBF kernels, this value needs to be 1.
+        """
+        return self.base_kernel.num_outputs_per_input(x1, x2)
+
+    def __deepcopy__(self, memo: Dict) -> "FITCKernel":
+        replace_inv_root = False
+        replace_kernel_mat = False
+        replace_k_u1 = False
+        replace_lambda_diag_inv = False
+        replace_inducing_sigma = False
+        replace_inducing_sigma_inv_root = False
+        replace_poster_mean = False
+
+        if hasattr(self, "_cached_kernel_inv_root"):
+            replace_inv_root = True
+            kernel_inv_root = self._cached_kernel_inv_root
+        if hasattr(self, "_cached_kernel_mat"):
+            replace_kernel_mat = True
+            kernel_mat = self._cached_kernel_mat
+        if hasattr(self, "_cached_k_u1"):
+            replace_k_u1 = True
+            k_u1 = self._cached_k_u1
+        if hasattr(self, "_cached_lambda_diag_inv"):
+            replace_lambda_diag_inv = True
+            lambda_diag_inv = self._cached_lambda_diag_inv
+        if hasattr(self, "_cached_inducing_sigma"):
+            replace_inducing_sigma = True
+            inducing_sigma = self._cached_inducing_sigma
+        if hasattr(self, "_cached_inducing_sigma_inv_root"):
+            replace_inducing_sigma_inv_root = True
+            inducing_sigma_inv_root = self._cached_inducing_sigma_inv_root
+        if hasattr(self, "_cached_poster_mean_mat"):
+            replace_poster_mean = True
+            poster_mean_mat = self._cached_poster_mean_mat
+
+        cp = self.__class__(
+            base_kernel=copy.deepcopy(self.base_kernel),
+            X_inducing=copy.deepcopy(self.X_inducing),
+            X_out=self.X_out,
+            y_out=self.y_out,
+            likelihood=copy.deepcopy(self.likelihood),
+            active_dims=self.active_dims,
+        )
+
+        if replace_inv_root:
+            cp._cached_kernel_inv_root = kernel_inv_root
+
+        if replace_kernel_mat:
+            cp._cached_kernel_mat = kernel_mat
+
+        if replace_k_u1:
+            cp._cached_k_u1 = k_u1
+
+        if replace_lambda_diag_inv:
+            cp._cached_lambda_diag_inv = lambda_diag_inv
+
+        if replace_inducing_sigma:
+            cp._cached_inducing_sigma = inducing_sigma
+
+        if replace_inducing_sigma_inv_root:
+            cp._cached_inducing_sigma_inv_root = inducing_sigma_inv_root
+
+        if replace_poster_mean:
+            cp._cached_poster_mean_mat = poster_mean_mat
+
+        return cp
+
+
+class FITCMean(Mean):
+    def __init__(self, covar_module: FITCKernel, batch_shape: torch.Size = torch.Size(), **kwargs: Dict):
+        """
+        Read the posterior mean value of the given fitc kernel and serve as a prior mean value for the
+        second stage
+
+        Parameters
+        ----------
+        covar_module: FITCKernel
+            a FITC  kernel
+        batch_shape: torch.size
+            batch size
+        """
+        super(FITCMean, self).__init__()
+        self.covar_module = covar_module
+        self.batch_shape = batch_shape
+        self.covar_module = covar_module
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """
+        Compute the posterior mean from the cached value of FITC kernels
+
+        Parameters
+        ----------
+        input: torch.Tensor(N_xin, D)
+            input torch Tensor
+
+        Returns
+        -------
+        res: torch.Tensor(N_xin)
+            posterior mean value of FITC GP model
+        """
+        # detach is applied here to avoid updating the same parameter twice in the same iteration
+        # which might result in an error
+        res = self.covar_module.posterior_mean(input).detach()
+        return res
diff --git a/smac/epm/gaussian_process_mcmc.py b/smac/epm/gaussian_process/mcmc.py
similarity index 97%
rename from smac/epm/gaussian_process_mcmc.py
rename to smac/epm/gaussian_process/mcmc.py
index 5f3e21de4..e10694c53 100644
--- a/smac/epm/gaussian_process_mcmc.py
+++ b/smac/epm/gaussian_process/mcmc.py
@@ -10,9 +10,8 @@
 from sklearn.gaussian_process.kernels import Kernel
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_gp import BaseModel
-from smac.epm.gaussian_process import GaussianProcess
-from smac.epm.gp_base_prior import Prior
+from smac.epm.gaussian_process import BaseModel, GaussianProcess
+from smac.epm.gaussian_process.utils.prior import Prior
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -21,11 +20,11 @@
 logger = logging.getLogger(__name__)
 
 
-class GaussianProcessMCMC(BaseModel):
+class MCMCGaussianProcess(BaseModel):
     """Gaussian process model.
 
-    The GP hyperparameters are integrated out by MCMC. If you use this class
-    make sure that you also use an integrated acquisition function to
+    The GP hyperparameters are integrated out by Markow-Chain-Monte-Carlo (MCMC) you use this class
+    to make sure that you also use an integrated acquisition function to
     integrate over the GP's hyperparameter as proposed by Snoek et al.
 
     This code is based on the implementation of RoBO:
@@ -113,7 +112,7 @@ def __init__(
         # Internal statistics
         self._n_ll_evals = 0
 
-    def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> "GaussianProcessMCMC":
+    def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> "MCMCGaussianProcess":
         """Performs MCMC sampling to sample hyperparameter configurations from the likelihood and
         trains for each sample a GP on X and y.
 
diff --git a/smac/optimizer/multi_objective/__init__.py b/smac/epm/gaussian_process/utils/__init__.py
similarity index 100%
rename from smac/optimizer/multi_objective/__init__.py
rename to smac/epm/gaussian_process/utils/__init__.py
diff --git a/smac/epm/gp_base_prior.py b/smac/epm/gaussian_process/utils/prior.py
similarity index 100%
rename from smac/epm/gp_base_prior.py
rename to smac/epm/gaussian_process/utils/prior.py
diff --git a/smac/epm/base_uncorrelated_mo_model.py b/smac/epm/multi_objective_epm.py
similarity index 94%
rename from smac/epm/base_uncorrelated_mo_model.py
rename to smac/epm/multi_objective_epm.py
index 44cec9d8e..f294854ae 100644
--- a/smac/epm/base_uncorrelated_mo_model.py
+++ b/smac/epm/multi_objective_epm.py
@@ -4,13 +4,13 @@
 import numpy as np
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
-class UncorrelatedMultiObjectiveModel(AbstractEPM):
+class MultiObjectiveEPM(BaseEPM):
     """Wrapper for the surrogate models to predict multiple targets.
 
     Only a list with the target names and the types array for the
@@ -46,7 +46,7 @@ class UncorrelatedMultiObjectiveModel(AbstractEPM):
         target names
     num_targets: int
         number of targets
-    estimators: List[AbstractEPM]
+    estimators: List[BaseEPM]
         a list of estimators predicting different target values
     """
 
@@ -73,7 +73,7 @@ def __init__(
             model_kwargs = {}
         self.target_names = target_names
         self.num_targets = len(self.target_names)
-        self.estimators: List[AbstractEPM] = self.construct_estimators(configspace, types, bounds, model_kwargs)
+        self.estimators: List[BaseEPM] = self.construct_estimators(configspace, types, bounds, model_kwargs)
 
     @abstractmethod
     def construct_estimators(
@@ -82,7 +82,7 @@ def construct_estimators(
         types: List[int],
         bounds: List[Tuple[float, float]],
         model_kwargs: Dict[str, Any],
-    ) -> List[AbstractEPM]:
+    ) -> List[BaseEPM]:
         """
         Construct a list of estimators. The number of the estimators equals 'self.num_targets'
         Parameters
@@ -101,12 +101,12 @@ def construct_estimators(
             model kwargs for initializing models
         Returns
         -------
-        estimators: List[AbstractEPM]
+        estimators: List[BaseEPM]
             A list of estimators
         """
         raise NotImplementedError
 
-    def _train(self, X: np.ndarray, Y: np.ndarray) -> "UncorrelatedMultiObjectiveModel":
+    def _train(self, X: np.ndarray, Y: np.ndarray) -> "MultiObjectiveEPM":
         """Trains the models on X and y.
 
         Parameters
diff --git a/smac/epm/random_epm.py b/smac/epm/random_epm.py
index 77d553a77..d93e8c365 100644
--- a/smac/epm/random_epm.py
+++ b/smac/epm/random_epm.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 
 __author__ = "Katharina Eggensperger"
 __copyright__ = "Copyright 2015, ML4AAD"
@@ -13,7 +13,7 @@
 __version__ = "0.0.1"
 
 
-class RandomEPM(AbstractEPM):
+class RandomEPM(BaseEPM):
     """EPM which returns random values on a call to ``fit``.
 
     Parameters
diff --git a/smac/epm/base_rf.py b/smac/epm/random_forest/__init__.py
similarity index 90%
rename from smac/epm/base_rf.py
rename to smac/epm/random_forest/__init__.py
index 851444451..f106104ff 100644
--- a/smac/epm/base_rf.py
+++ b/smac/epm/random_forest/__init__.py
@@ -9,13 +9,13 @@
     UniformFloatHyperparameter,
     UniformIntegerHyperparameter,
 )
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
-class BaseModel(AbstractEPM):
+class BaseModel(BaseEPM):
     def __init__(
         self,
         configspace: ConfigurationSpace,
@@ -62,3 +62,11 @@ def _impute_inactive(self, X: np.ndarray) -> np.ndarray:
                 X[nonfinite_mask, idx] = self.impute_values[idx]
 
         return X
+
+
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances  # noqa
+
+__all__ = [
+    "BaseModel",
+    "RandomForestWithInstances",
+]
diff --git a/smac/epm/uncorrelated_mo_rf_with_instances.py b/smac/epm/random_forest/rf_mo.py
similarity index 84%
rename from smac/epm/uncorrelated_mo_rf_with_instances.py
rename to smac/epm/random_forest/rf_mo.py
index f628050d5..e7cf36f90 100644
--- a/smac/epm/uncorrelated_mo_rf_with_instances.py
+++ b/smac/epm/random_forest/rf_mo.py
@@ -1,15 +1,15 @@
 from typing import Any, Dict, List, Tuple
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_epm import AbstractEPM
-from smac.epm.base_uncorrelated_mo_model import UncorrelatedMultiObjectiveModel
-from smac.epm.rf_with_instances import RandomForestWithInstances
+from smac.epm.base_epm import BaseEPM
+from smac.epm.multi_objective_epm import MultiObjectiveEPM
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
-class UncorrelatedMultiObjectiveRandomForestWithInstances(UncorrelatedMultiObjectiveModel):
+class MultiObjectiveRandomForest(MultiObjectiveEPM):
     """Wrapper for the random forest to predict multiple targets.
 
     Only a list with the target names and the types array for the
@@ -24,7 +24,7 @@ def construct_estimators(
         types: List[int],
         bounds: List[Tuple[float, float]],
         model_kwargs: Dict[str, Any],
-    ) -> List[AbstractEPM]:
+    ) -> List[BaseEPM]:
         """
         Construct a list of estimators. The number of the estimators equals 'self.num_targets'
         Parameters
@@ -43,7 +43,7 @@ def construct_estimators(
             model kwargs for initializing models
         Returns
         -------
-        estimators: List[AbstractEPM]
+        estimators: List[BaseEPM]
             A list of Random Forests
         """
         return [RandomForestWithInstances(configspace, types, bounds, **model_kwargs) for _ in range(self.num_targets)]
diff --git a/smac/epm/rf_with_instances.py b/smac/epm/random_forest/rf_with_instances.py
similarity index 99%
rename from smac/epm/rf_with_instances.py
rename to smac/epm/random_forest/rf_with_instances.py
index 1deec6ea3..92647ad09 100644
--- a/smac/epm/rf_with_instances.py
+++ b/smac/epm/random_forest/rf_with_instances.py
@@ -4,7 +4,7 @@
 from pyrfr import regression
 
 from smac.configspace import ConfigurationSpace
-from smac.epm.base_rf import BaseModel
+from smac.epm.random_forest import BaseModel
 from smac.utils.constants import N_TREES, VERY_SMALL_NUMBER
 
 __author__ = "Aaron Klein"
diff --git a/smac/epm/rfr_imputator.py b/smac/epm/random_forest/rfr_imputator.py
similarity index 98%
rename from smac/epm/rfr_imputator.py
rename to smac/epm/random_forest/rfr_imputator.py
index e61ae1be4..0fe4d1282 100644
--- a/smac/epm/rfr_imputator.py
+++ b/smac/epm/random_forest/rfr_imputator.py
@@ -7,7 +7,7 @@
 from scipy.stats import truncnorm
 
 import smac.epm.base_imputor
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 
 __author__ = "Katharina Eggensperger"
 __copyright__ = "Copyright 2015, ML4AAD"
@@ -32,7 +32,7 @@ class RFRImputator(smac.epm.base_imputor.BaseImputor):
         Cutoff value for this scenario (upper runnning time limit)
     threshold : float
         Highest possible values (e.g. cutoff * parX).
-    model : AbstractEPM
+    model : BaseEPM
         Predictive model (i.e. RandomForestWithInstances)
     change_threshold : float
         Stop imputation if change is less than this.
@@ -49,7 +49,7 @@ class RFRImputator(smac.epm.base_imputor.BaseImputor):
     threshold : float
     seed : int
         Created by drawing random int from rng
-    model : AbstractEPM
+    model : BaseEPM
         Predictive model (i.e. RandomForestWithInstances)
     var_threshold: float
     """
@@ -59,7 +59,7 @@ def __init__(
         rng: np.random.RandomState,
         cutoff: float,
         threshold: float,
-        model: AbstractEPM,
+        model: BaseEPM,
         change_threshold: float = 0.01,
         max_iter: int = 2,
     ):
diff --git a/smac/epm/util_funcs.py b/smac/epm/utils.py
similarity index 69%
rename from smac/epm/util_funcs.py
rename to smac/epm/utils.py
index ce78fe0fd..1f2aa8dca 100644
--- a/smac/epm/util_funcs.py
+++ b/smac/epm/utils.py
@@ -180,3 +180,76 @@ def get_rng(
             "run_id=%s of type %s" % (rng, type(rng), str(run_id), type(run_id))
         )
     return run_id_return, rng_return
+
+
+def check_subspace_points(
+    X: np.ndarray,
+    cont_dims: typing.Union[np.ndarray, typing.List] = [],
+    cat_dims: typing.Union[np.ndarray, typing.List] = [],
+    bounds_cont: typing.Optional[np.ndarray] = None,
+    bounds_cat: typing.Optional[typing.List[typing.Tuple]] = None,
+    expand_bound: bool = False,
+) -> np.ndarray:
+    """
+    Check which points are placed inside a given subspace
+
+    Parameters
+    ----------
+    X: typing.Optional[np.ndarray(N,D)],
+        points to be checked, where D = D_cont + D_cat
+    cont_dims: typing.Union[np.ndarray(D_cont), typing.List]
+        which dimensions represent continuous hyperparameters
+    cat_dims: typing.Union[np.ndarray(D_cat), typing.List]
+        which dimensions represent categorical hyperparameters
+    bounds_cont: typing.optional[typing.List[typing.Tuple]]
+        subspaces bounds of categorical hyperparameters, its length is the number of continuous hyperparameters
+    bounds_cat: typing.Optional[typing.List[typing.Tuple]]
+        subspaces bounds of continuous hyperparameters, its length is the number of categorical hyperparameters
+    expand_bound: bool
+        if the bound needs to be expanded to contain more points rather than the points inside the subregion
+
+    Return
+    ----------
+    indices_in_ss:np.ndarray(N)
+        indices of data that included in subspaces
+    """
+    if len(X.shape) == 1:
+        X = X[np.newaxis, :]
+    if len(cont_dims) == 0 and len(cat_dims) == 0:
+        return np.ones(X.shape[0], dtype=bool)
+
+    if len(cont_dims) > 0:
+        if bounds_cont is None:
+            raise ValueError("bounds_cont must be given if cont_dims provided")
+
+        if len(bounds_cont.shape) != 2 or bounds_cont.shape[1] != 2 or bounds_cont.shape[0] != len(cont_dims):
+            raise ValueError(
+                f"bounds_cont (with shape  {bounds_cont.shape}) should be an array with shape of"
+                f"({len(cont_dims)}, 2)"
+            )
+
+        data_in_ss = np.all(X[:, cont_dims] <= bounds_cont[:, 1], axis=1) & np.all(
+            X[:, cont_dims] >= bounds_cont[:, 0], axis=1
+        )
+
+        if expand_bound:
+            bound_left = bounds_cont[:, 0] - np.min(X[data_in_ss][:, cont_dims] - bounds_cont[:, 0], axis=0)
+            bound_right = bounds_cont[:, 1] + np.min(bounds_cont[:, 1] - X[data_in_ss][:, cont_dims], axis=0)
+            data_in_ss = np.all(X[:, cont_dims] <= bound_right, axis=1) & np.all(X[:, cont_dims] >= bound_left, axis=1)
+    else:
+        data_in_ss = np.ones(X.shape[0], dtype=bool)
+
+    if len(cat_dims) == 0:
+        return data_in_ss
+    if bounds_cat is None:
+        raise ValueError("bounds_cat must be given if cat_dims provided")
+
+    if len(bounds_cat) != len(cat_dims):
+        raise ValueError(
+            f"bounds_cat ({len(bounds_cat)}) and cat_dims ({len(cat_dims)}) must have " f"the same number of elements"
+        )
+
+    for bound_cat, cat_dim in zip(bounds_cat, cat_dims):
+        data_in_ss &= np.in1d(X[:, cat_dim], bound_cat)
+
+    return data_in_ss
diff --git a/smac/facade/experimental/hydra_facade.py b/smac/facade/experimental/hydra_facade.py
index f5f20e693..ae9b0cc3b 100644
--- a/smac/facade/experimental/hydra_facade.py
+++ b/smac/facade/experimental/hydra_facade.py
@@ -14,8 +14,8 @@
 import numpy as np
 from ConfigSpace.configuration_space import Configuration
 
-from smac.epm.util_funcs import get_rng
-from smac.facade.experimental.psmac_facade import PSMAC
+from smac.epm.utils import get_rng
+from smac.facade.psmac_facade import PSMAC
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.optimizer.pSMAC import read
 from smac.runhistory.runhistory import RunHistory
diff --git a/smac/facade/experimental/psmac_facade.py b/smac/facade/psmac_facade.py
similarity index 50%
rename from smac/facade/experimental/psmac_facade.py
rename to smac/facade/psmac_facade.py
index 3d1ca6526..cd688ff02 100644
--- a/smac/facade/experimental/psmac_facade.py
+++ b/smac/facade/psmac_facade.py
@@ -1,26 +1,27 @@
 # type: ignore
 # mypy: ignore-errors
 
-from typing import Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union
 
 import copy
 import datetime
 import logging
 import os
 import time
+from pathlib import Path
 
 import joblib
 import numpy as np
 from ConfigSpace.configuration_space import Configuration
 
-from smac.epm.util_funcs import get_rng
+from smac.epm.utils import get_rng
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.runhistory.runhistory import RunHistory
 from smac.scenario.scenario import Scenario
 from smac.tae.base import BaseRunner
 from smac.tae.execute_ta_run_hydra import ExecuteTARunOld
-from smac.utils.constants import MAXINT
 from smac.utils.io.output_directory import create_output_directory
+from smac.utils.io.result_merging import ResultMerger
 
 __author__ = "Andre Biedenkapp"
 __copyright__ = "Copyright 2018, ML4AAD"
@@ -28,11 +29,12 @@
 
 
 def optimize(
-    scenario: Type[Scenario],
-    tae: Type[BaseRunner],
-    tae_kwargs: Dict,
+    scenario: Scenario,
+    tae_runner: Type[BaseRunner],
+    tae_runner_kwargs: Dict,
     rng: Union[np.random.RandomState, int],
     output_dir: str,
+    facade_class: Optional[Type[SMAC4AC]] = None,
     **kwargs,
 ) -> Configuration:
     """
@@ -42,14 +44,14 @@ def optimize(
     ----------
     scenario: Scenario
         smac.Scenario to initialize SMAC
-    tae: BaseRunner
+    tae_runner: BaseRunner
         Target Algorithm Runner (supports old and aclib format)
     tae_runner_kwargs: Optional[dict]
         arguments passed to constructor of '~tae'
     rng: int/np.random.RandomState
         The randomState/seed to pass to each smac run
     output_dir: str
-        The directory in which each smac run should write it's results
+        The directory in which each smac run should write its results
 
     Returns
     -------
@@ -57,7 +59,11 @@ def optimize(
         The incumbent configuration of this run
 
     """
-    solver = SMAC4AC(scenario=scenario, tae_runner=tae, tae_runner_kwargs=tae_kwargs, rng=rng, **kwargs)
+    if facade_class is None:
+        facade_class = SMAC4AC
+    solver = facade_class(
+        scenario=scenario, tae_runner=tae_runner, tae_runner_kwargs=tae_runner_kwargs, rng=rng, **kwargs
+    )
     solver.stats.start_timing()
     solver.stats.print_stats()
 
@@ -71,34 +77,55 @@ def optimize(
 
 class PSMAC(object):
     """
-    Facade to use PSMAC
+    Facade to use pSMAC [1]_
+
+    With pSMAC you can either run n distinct SMAC optimizations in parallel
+    (`shared_model=False`) or you can parallelize the target algorithm evaluations
+    (`shared_model=True`).
+    In the latter case all SMAC workers share one file directory and communicate via
+    the logfiles. You can specify the number of SMAC workers/optimizers with the
+    argument `n_workers`.
+
+    You can pass all other kwargs for the SMAC4AC facade.
+    In addition, you can access the facade's attributes normally (e.g. smac.stats),
+    however each time a new SMAC object is built, reading the information from the
+    file system.
+
+
+    .. [1] Ramage, S. E. A. (2015). Advances in meta-algorithmic software libraries for
+        distributed automated algorithm configuration (T). University of British
+        Columbia. Retrieved from
+        https://open.library.ubc.ca/collections/ubctheses/24/items/1.0167184.
 
     Parameters
     ----------
     scenario : ~smac.scenario.scenario.Scenario
-        Scenario object
-    n_optimizers: int
+        Scenario object. Note that the budget/number of evaluations (runcount-limit) is
+        used for each worker. So if you specify 40 evaluations and 3 workers, 120
+        configurations will be evaluated in total.
+    n_workers: int
         Number of optimizers to run in parallel per round
     rng: int/np.random.RandomState
         The randomState/seed to pass to each smac run
     run_id: int
         run_id for this hydra run
-    tae: BaseRunner
+    tae_runner: BaseRunner
         Target Algorithm Runner (supports old and aclib format as well as AbstractTAFunc)
-    tae_kwargs: Optional[dict]
-        arguments passed to constructor of '~tae'
+    tae_runner_kwargs: Optional[dict]
+        arguments passed to constructor of '~tae_runner'
     shared_model: bool
         Flag to indicate whether information is shared between SMAC runs or not
     validate: bool / None
         Flag to indicate whether to validate the found configurations or to use the SMAC estimates
         None => neither and return the full portfolio
-    n_incs: int
-        Number of incumbents to return (n_incs <= 0 ==> all found configurations)
     val_set: List[str]
         List of instance-ids to validate on
+    **kwargs
+        Keyword arguments for the SMAC4AC facade
 
     Attributes
     ----------
+    # TODO update attributes
     logger
     stats : Stats
         loggs information about used resources
@@ -113,16 +140,16 @@ class PSMAC(object):
 
     def __init__(
         self,
-        scenario: Type[Scenario],
+        scenario: Scenario,
         rng: Optional[Union[np.random.RandomState, int]] = None,
         run_id: int = 1,
-        tae: Type[BaseRunner] = ExecuteTARunOld,
-        tae_kwargs: Union[dict, None] = None,
+        tae_runner: Type[BaseRunner] = ExecuteTARunOld,
+        tae_runner_kwargs: Union[dict, None] = None,
         shared_model: bool = True,
+        facade_class: Optional[Type[SMAC4AC]] = None,
         validate: bool = True,
-        n_optimizers: int = 2,
+        n_workers: int = 2,
         val_set: Union[List[str], None] = None,
-        n_incs: int = 1,
         **kwargs,
     ):
         self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
@@ -131,30 +158,34 @@ def __init__(
         self.run_id, self.rng = get_rng(rng, run_id, logger=self.logger)
         self.kwargs = kwargs
         self.output_dir = None
+        if facade_class is None:
+            facade_class = SMAC4AC
+        self.facade_class = facade_class
         self.rh = RunHistory()
-        self._tae = tae
-        self._tae_kwargs = tae_kwargs
-        if n_optimizers <= 1:
-            self.logger.warning("Invalid value in %s: %d. Setting to 2", "n_optimizers", n_optimizers)
-        self.n_optimizers = max(n_optimizers, 2)
+        self._tae_runner = tae_runner
+        self._tae_runner_kwargs = tae_runner_kwargs
+        if n_workers <= 1:
+            self.logger.warning("Invalid value in %s: %d. Setting to 2", "n_workers", n_workers)
+        self.n_workers = max(n_workers, 2)
+        self.seeds = np.arange(0, self.n_workers, dtype=int)  # seeds for the parallel runs
         self.validate = validate
         self.shared_model = shared_model
-        self.n_incs = min(max(1, n_incs), self.n_optimizers)
         if val_set is None:
             self.val_set = scenario.train_insts
         else:
             self.val_set = val_set
 
-    def optimize(self):
+        self.result_merger: Optional[ResultMerger] = None
+        self.n_incs: int = 1
+
+    def optimize(self) -> Configuration:
         """
         Optimizes the algorithm provided in scenario (given in constructor)
 
         Returns
         -------
-        incumbent(s) : Configuration / List[Configuration] / ndarray[Configuration]
-            Incumbent / Portfolio of incumbents
-        pid(s) : int / ndarray[ints]
-            Process ID(s) from which the configuration stems
+        incumbent : Configuration
+            Best configuration across all workers.
 
         """
         # Setup output directory
@@ -174,55 +205,47 @@ def optimize(self):
         self.logger.info("+" * 120)
         self.logger.info("PSMAC run")
 
-        incs = joblib.Parallel(n_jobs=self.n_optimizers)(
+        incs = joblib.Parallel(n_jobs=self.n_workers)(
             joblib.delayed(optimize)(
-                self.scenario,  # Scenario object
-                self._tae,  # type of tae to run target with
-                self._tae_kwargs,
-                p,  # seed for the rng/run_id
-                self.output_dir,  # directory to create outputs in
+                scenario=self.scenario,  # Scenario object
+                tae_runner=self._tae_runner,  # type of tae_runner to run target with
+                tae_runner_kwargs=self._tae_runner_kwargs,
+                rng=int(seed),  # seed for the rng/run_id
+                output_dir=self.output_dir,  # directory to create outputs in
+                facade_class=self.facade_class,
                 **self.kwargs,
             )
-            for p in range(self.n_optimizers)
+            for seed in self.seeds
         )
 
-        if self.n_optimizers == self.n_incs:  # no validation necessary just return all incumbents
-            return incs
-        else:
-            _, val_ids, _, est_ids = self.get_best_incumbents_ids(incs)  # determine the best incumbents
-            if val_ids:
-                return [inc for i, inc in enumerate(incs) if i in val_ids]
-            return [inc for i, inc in enumerate(incs) if i in est_ids]
+        inc = self.get_best_incumbent(incs=incs)
+        return inc
 
-    def get_best_incumbents_ids(self, incs: List[Configuration]):
+    def get_best_incumbent(self, incs: List[Configuration]) -> Configuration:
         """
-        Determines the IDs and costs of the best configurations
+        Determine ID and cost of best configuration (incumbent).
 
         Parameters
         ----------
         incs : List[Configuration]
-            incumbents determined by all parallel SMAC runs
+            List of incumbents from the workers.
 
         Returns
         -------
-        Dict(Config -> Dict(inst_id (str) -> cost (float)))  (if real validation runs are performed)
-        List(ints) (indices of best configurations if validation runs are performed)
-        Dict(Config -> Dict(inst_id (str) -> cost (float)))  (if performance is estimated)
-        List(ints) (indices of best configurations if performance is estimated)
+        Configuration
+            Best incumbent from all workers.
 
         """
         if self.validate is True:
             mean_costs_conf_valid, cost_per_config_valid = self.validate_incs(incs)
-            val_ids = list(map(lambda x: x[0], sorted(enumerate(mean_costs_conf_valid), key=lambda y: y[1])))[
-                : self.n_incs
-            ]
+            val_id = list(map(lambda x: x[0], sorted(enumerate(mean_costs_conf_valid), key=lambda y: y[1])))[0]
+            inc = incs[val_id]
         else:
-            cost_per_config_valid = val_ids = None
-        mean_costs_conf_estimate, cost_per_config_estimate = self._get_mean_costs(incs, self.rh)
-        est_ids = list(map(lambda x: x[0], sorted(enumerate(mean_costs_conf_estimate), key=lambda y: y[1])))[
-            : self.n_incs
-        ]
-        return cost_per_config_valid, val_ids, cost_per_config_estimate, est_ids
+            mean_costs_conf_estimate, cost_per_config_estimate = self._get_mean_costs(incs, self.rh)
+            est_id = list(map(lambda x: x[0], sorted(enumerate(mean_costs_conf_estimate), key=lambda y: y[1])))[0]
+            inc = incs[est_id]
+
+        return inc
 
     def _get_mean_costs(self, incs: List[Configuration], new_rh: RunHistory):
         """
@@ -253,9 +276,14 @@ def _get_mean_costs(self, incs: List[Configuration], new_rh: RunHistory):
                 results.append(np.nan)
         return results, config_cost_per_inst
 
+    def _get_solver(self):
+        # TODO: specify one output dir or no output dir
+        solver = self.facade_class(scenario=self.scenario, rng=self.rng, run_id=None, **self.kwargs)
+        return solver
+
     def validate_incs(self, incs: List[Configuration]):
         """Validation of the incumbents."""
-        solver = SMAC4AC(scenario=self.scenario, rng=self.rng, run_id=MAXINT, **self.kwargs)
+        solver = self._get_solver()
         self.logger.info("*" * 120)
         self.logger.info("Validating")
         new_rh = solver.validate(
@@ -263,6 +291,58 @@ def validate_incs(self, incs: List[Configuration]):
             instance_mode=self.val_set,
             repetitions=1,
             use_epm=False,
-            n_jobs=self.n_optimizers,
+            n_jobs=self.n_workers,
         )
         return self._get_mean_costs(incs, new_rh)
+
+    def write_run(self) -> None:
+        """
+        Write all SMAC files to pSMAC dir
+
+        Returns
+        -------
+        None
+
+        """
+        # write runhistory
+        # write configspace file .pcs .json
+        # write trajectory traj.json
+        # write scenario .txt
+        # write stats
+        raise NotImplementedError
+
+    def _check_result_merger(self):
+        if self.result_merger is None:
+            if self.output_dir is None:
+                raise ValueError(
+                    "Cannot instantiate `ResultMerger` because `output_dir` "
+                    "is None. In pSMAC `output_dir` is set after "
+                    "`optimize()` has been called. If you already have "
+                    "a pSMAC run or rundirs, please directly use "
+                    "`smac.utils.io.result_merging.ResultMerger`."
+                )
+            self.result_merger = ResultMerger(output_dir=Path(self.output_dir).parent)
+
+    def get_runhistory(self) -> Optional[RunHistory]:
+        """
+        Get merged runhistory from pSMAC workers
+
+        Returns
+        -------
+        Optional[RunHistory]
+
+        """
+        self._check_result_merger()
+        return self.result_merger.get_runhistory()
+
+    def get_trajectory(self) -> Optional[List[Dict[str, Any]]]:
+        """
+        Get trajectory from merged runhistory
+
+        Returns
+        -------
+        Optional[List[Dict[str, Any]]]
+
+        """
+        self._check_result_merger()
+        return self.result_merger.get_trajectory()
diff --git a/smac/facade/roar_facade.py b/smac/facade/roar_facade.py
index 772673bbb..7657abb86 100644
--- a/smac/facade/roar_facade.py
+++ b/smac/facade/roar_facade.py
@@ -10,10 +10,13 @@
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.initial_design.initial_design import InitialDesign
 from smac.intensification.abstract_racer import AbstractRacer
-from smac.optimizer.ei_optimization import AcquisitionFunctionMaximizer, RandomSearch
-from smac.optimizer.multi_objective.abstract_multi_objective_algorithm import (
+from smac.multi_objective.abstract_multi_objective_algorithm import (
     AbstractMultiObjectiveAlgorithm,
 )
+from smac.optimizer.acquisition.maximizer import (
+    AcquisitionFunctionMaximizer,
+    RandomSearch,
+)
 from smac.runhistory.runhistory import RunHistory
 from smac.runhistory.runhistory2epm import (
     AbstractRunHistory2EPM,
diff --git a/smac/facade/smac_ac_facade.py b/smac/facade/smac_ac_facade.py
index c6173fbab..557f4d9d0 100644
--- a/smac/facade/smac_ac_facade.py
+++ b/smac/facade/smac_ac_facade.py
@@ -8,13 +8,13 @@
 import numpy as np
 
 from smac.configspace import Configuration
-from smac.epm.base_epm import AbstractEPM
-from smac.epm.base_uncorrelated_mo_model import UncorrelatedMultiObjectiveModel
+from smac.epm.base_epm import BaseEPM
+from smac.epm.multi_objective_epm import MultiObjectiveEPM
 
 # epm
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.rfr_imputator import RFRImputator
-from smac.epm.util_funcs import get_rng, get_types
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rfr_imputator import RFRImputator
+from smac.epm.utils import get_rng, get_types
 from smac.initial_design.default_configuration_design import DefaultConfiguration
 from smac.initial_design.factorial_design import FactorialInitialDesign
 
@@ -29,6 +29,13 @@
 # intensification
 from smac.intensification.intensification import Intensifier
 from smac.intensification.successive_halving import SuccessiveHalving
+from smac.multi_objective.abstract_multi_objective_algorithm import (
+    AbstractMultiObjectiveAlgorithm,
+)
+from smac.multi_objective.aggregation_strategy import (
+    AggregationStrategy,
+    MeanAggregationStrategy,
+)
 from smac.optimizer.acquisition import (
     EI,
     EIPS,
@@ -37,21 +44,14 @@
     LogEI,
     PriorAcquisitionFunction,
 )
-from smac.optimizer.ei_optimization import (
+from smac.optimizer.acquisition.maximizer import (
     AcquisitionFunctionMaximizer,
     LocalAndSortedPriorRandomSearch,
     LocalAndSortedRandomSearch,
 )
-from smac.optimizer.multi_objective.abstract_multi_objective_algorithm import (
-    AbstractMultiObjectiveAlgorithm,
-)
-from smac.optimizer.multi_objective.aggregation_strategy import (
-    AggregationStrategy,
-    MeanAggregationStrategy,
-)
-from smac.optimizer.random_configuration_chooser import (
+from smac.optimizer.configuration_chooser.random_chooser import (
     ChooserProb,
-    RandomConfigurationChooser,
+    RandomChooser,
 )
 
 # optimizer
@@ -135,7 +135,7 @@ class SMAC4AC(object):
         Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set.
     acquisition_function_optimizer_kwargs: Optional[dict]
         Arguments passed to constructor of `~acquisition_function_optimizer`
-    model : AbstractEPM
+    model : BaseEPM
         Model that implements train() and predict(). Will use a
         :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set.
     model_kwargs : Optional[dict]
@@ -171,6 +171,8 @@ class SMAC4AC(object):
     smbo_class : ~smac.optimizer.smbo.SMBO
         Class implementing the SMBO interface which will be used to
         instantiate the optimizer class.
+    smbo_kwargs : ~ Optional[Dict]
+        Arguments passed to the constructor of '~smbo'
     run_id : int (optional)
         Run ID will be used as subfolder for output_dir. If no ``run_id`` is given, a random ``run_id`` will be
         chosen.
@@ -212,7 +214,7 @@ def __init__(
         user_prior_kwargs: Optional[Dict] = None,
         acquisition_function_optimizer: Optional[Type[AcquisitionFunctionMaximizer]] = None,
         acquisition_function_optimizer_kwargs: Optional[Dict] = None,
-        model: Optional[Type[AbstractEPM]] = None,
+        model: Optional[Type[BaseEPM]] = None,
         model_kwargs: Optional[Dict] = None,
         runhistory2epm: Optional[Type[AbstractRunHistory2EPM]] = None,
         runhistory2epm_kwargs: Optional[Dict] = None,
@@ -225,8 +227,9 @@ def __init__(
         restore_incumbent: Optional[Configuration] = None,
         rng: Optional[Union[np.random.RandomState, int]] = None,
         smbo_class: Optional[Type[SMBO]] = None,
+        smbo_kwargs: Optional[Dict] = None,
         run_id: Optional[int] = None,
-        random_configuration_chooser: Optional[Type[RandomConfigurationChooser]] = None,
+        random_configuration_chooser: Optional[Type[RandomChooser]] = None,
         random_configuration_chooser_kwargs: Optional[Dict] = None,
         dask_client: Optional[dask.distributed.Client] = None,
         n_jobs: Optional[int] = 1,
@@ -303,7 +306,7 @@ def __init__(
             random_configuration_chooser_instance = random_configuration_chooser(  # type: ignore # noqa F821
                 **rand_conf_chooser_kwargs  # type: ignore[arg-type] # noqa F821
             )
-        elif not isinstance(random_configuration_chooser, RandomConfigurationChooser):
+        elif not isinstance(random_configuration_chooser, RandomChooser):
             raise ValueError(
                 "random_configuration_chooser has to be" " a class or object of RandomConfigurationChooser"
             )
@@ -340,7 +343,7 @@ def __init__(
                     model_def_kwargs[key] = value
             model_def_kwargs["configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
             model_instance = RandomForestWithInstances(
-                **model_def_kwargs  # type: ignore[arg-type] # noqa F821  # type: AbstractEPM
+                **model_def_kwargs  # type: ignore[arg-type] # noqa F821  # type: BaseEPM
             )
         elif inspect.isclass(model):
             model_def_kwargs["configspace"] = self.scenario.cs  # type: ignore[attr-defined] # noqa F821
@@ -366,9 +369,7 @@ def __init__(
                 "Argument acquisition_function must be None or an object implementing the "
                 "AbstractAcquisitionFunction, not %s." % type(acquisition_function)
             )
-        if isinstance(acquisition_function_instance, EIPS) and not isinstance(
-            model_instance, UncorrelatedMultiObjectiveModel
-        ):
+        if isinstance(acquisition_function_instance, EIPS) and not isinstance(model_instance, MultiObjectiveEPM):
             raise TypeError(
                 "If the acquisition function is EIPS, the surrogate model must support multi-objective prediction!"
             )
@@ -530,7 +531,7 @@ def __init__(
 
         if scenario.multi_objectives is not None and num_obj > 1:  # type: ignore[attr-defined] # noqa F821
             # define any defaults here
-            _multi_objective_kwargs = {"rng": rng, "num_obj": num_obj}
+            _multi_objective_kwargs = {"rng": rng}
 
             if multi_objective_kwargs is not None:
                 _multi_objective_kwargs.update(multi_objective_kwargs)
@@ -701,6 +702,8 @@ def __init__(
             "random_configuration_chooser": random_configuration_chooser_instance,
             "tae_runner": tae_runner_instance,
         }  # type: Dict[str, Any]
+        if smbo_kwargs is not None:
+            smbo_args.update(smbo_kwargs)
 
         if smbo_class is None:
             self.solver = SMBO(**smbo_args)  # type: ignore[arg-type] # noqa F821
diff --git a/smac/facade/smac_bb_facade.py b/smac/facade/smac_bb_facade.py
index b5b52f44d..dc44ae9bc 100644
--- a/smac/facade/smac_bb_facade.py
+++ b/smac/facade/smac_bb_facade.py
@@ -2,11 +2,16 @@
 
 import numpy as np
 
-from smac.epm.base_gp import BaseModel
-from smac.epm.gaussian_process_mcmc import GaussianProcess, GaussianProcessMCMC
-from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior
-from smac.epm.gp_kernels import ConstantKernel, HammingKernel, Matern, WhiteKernel
-from smac.epm.util_funcs import get_rng, get_types
+from smac.epm.gaussian_process import BaseModel, GaussianProcess
+from smac.epm.gaussian_process.kernels import (
+    ConstantKernel,
+    HammingKernel,
+    Matern,
+    WhiteKernel,
+)
+from smac.epm.gaussian_process.mcmc import MCMCGaussianProcess
+from smac.epm.gaussian_process.utils.prior import HorseshoePrior, LognormalPrior
+from smac.epm.utils import get_rng, get_types
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.initial_design.sobol_design import SobolDesign
 from smac.runhistory.runhistory2epm import RunHistory2EPM4Cost
@@ -132,7 +137,7 @@ def __init__(self, model_type: str = "gp_mcmc", **kwargs: Any):
                 model_kwargs["normalize_y"] = True
                 model_kwargs["seed"] = rng.randint(0, 2**20)
             elif model_type == "gp_mcmc":
-                model_class = GaussianProcessMCMC
+                model_class = MCMCGaussianProcess
                 kwargs["model"] = model_class
                 kwargs["integrate_acquisition_function"] = True
 
diff --git a/smac/facade/smac_boing_facade.py b/smac/facade/smac_boing_facade.py
new file mode 100644
index 000000000..c9332a02b
--- /dev/null
+++ b/smac/facade/smac_boing_facade.py
@@ -0,0 +1,111 @@
+from typing import Any
+
+import warnings
+
+import numpy as np
+from botorch.models.kernels.categorical import CategoricalKernel
+from gpytorch.constraints.constraints import Interval
+from gpytorch.kernels import MaternKernel, ScaleKernel
+from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
+from gpytorch.priors import HorseshoePrior, LogNormalPrior
+
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.facade.smac_hpo_facade import SMAC4HPO
+from smac.optimizer.configuration_chooser.boing_chooser import BOinGChooser
+from smac.runhistory.runhistory2epm_boing import RunHistory2EPM4ScaledLogCostWithRaw
+
+
+class SMAC4BOING(SMAC4HPO):
+    """
+    SMAC wrapper for BO inside Grove(BOinG):
+        Deng and Lindauer, Searching in the Forest for Local Bayesian Optimization
+        https://arxiv.org/abs/2111.05834
+
+    BOiNG is a two-stages optimizer: at the first stage, the global optimizer extracts the global optimum with a random
+    forest (RF) model. Then in the second stage, the optimizer constructs a local model in the vicinity of the
+    configuration suggested by the global surrogate model.
+
+    Its Hyperparameter settings follow the implementation from smac.facade.smac_bb_facade.SMAC4BB:
+    Hyperparameters are chosen according to the best configuration for Gaussian process maximum likelihood found in
+    "Towards Assessing the Impact of Bayesian Optimization's Own Hyperparameters" by Lindauer et al., presented at the
+    DSO workshop 2019 (https://arxiv.org/abs/1908.06674).
+    """
+
+    def __init__(self, **kwargs: Any):
+        kwargs["runhistory2epm"] = kwargs.get("runhistory2epm", RunHistory2EPM4ScaledLogCostWithRaw)
+        smbo_kwargs = kwargs.get("smbo_kwargs", {})
+        if smbo_kwargs is None:
+            smbo_kwargs = {"epm_chooser", BOinGChooser}
+        epm_chooser = smbo_kwargs.get("epm_chooser", BOinGChooser)
+        if epm_chooser != BOinGChooser:
+            warnings.warn("BOinG must have BOinGChooser as its EPM chooser!")
+            epm_chooser = BOinGChooser
+        smbo_kwargs["epm_chooser"] = epm_chooser
+        epm_chooser_kwargs = smbo_kwargs.get("epm_chooser_kwargs", None)
+
+        if epm_chooser_kwargs is None or epm_chooser_kwargs.get("model_local") is None:
+            # The lower bound and upper bounds are set to be identical as SMAC4BB
+            cont_kernel_kwargs = {
+                "lengthscale_constraint": Interval(
+                    np.exp(-6.754111155189306), np.exp(0.0858637988771976), transform=None, initial_value=1.0
+                ),
+            }
+            cat_kernel_kwargs = {
+                "lengthscale_constraint": Interval(
+                    np.exp(-6.754111155189306), np.exp(0.0858637988771976), transform=None, initial_value=1.0
+                ),
+            }
+            scale_kernel_kwargs = {
+                "outputscale_constraint": Interval(np.exp(-10.0), np.exp(2.0), transform=None, initial_value=2.0),
+                "outputscale_prior": LogNormalPrior(0.0, 1.0),
+            }
+
+            kernel_kwargs = {
+                "cont_kernel": MaternKernel,
+                "cont_kernel_kwargs": cont_kernel_kwargs,
+                "cat_kernel": CategoricalKernel,
+                "cat_kernel_kwargs": cat_kernel_kwargs,
+                "scale_kernel": ScaleKernel,
+                "scale_kernel_kwargs": scale_kernel_kwargs,
+            }
+
+            # by setting lower bound of noise_constraint we could make it more stable
+            noise_prior = HorseshoePrior(0.1)
+            likelihood = GaussianLikelihood(
+                noise_prior=noise_prior, noise_constraint=Interval(1e-5, np.exp(2), transform=None)
+            ).double()
+
+            if epm_chooser_kwargs is None:
+                smbo_kwargs["epm_chooser_kwargs"] = {
+                    "model_local": GloballyAugmentedLocalGaussianProcess,
+                    "model_local_kwargs": dict(kernel_kwargs=kernel_kwargs, likelihood=likelihood),
+                }
+            else:
+                smbo_kwargs["epm_chooser_kwargs"].update(
+                    {
+                        "model_local": GloballyAugmentedLocalGaussianProcess,
+                        "model_local_kwargs": dict(kernel_kwargs=kernel_kwargs, likelihood=likelihood),
+                    }
+                )
+        kwargs["smbo_kwargs"] = smbo_kwargs
+
+        if kwargs.get("random_configuration_chooser") is None:
+            # follows SMAC4BB
+            random_config_chooser_kwargs = (
+                kwargs.get(
+                    "random_configuration_chooser_kwargs",
+                    dict(),
+                )
+                or dict()
+            )
+            random_config_chooser_kwargs["prob"] = random_config_chooser_kwargs.get("prob", 0.08447232371720552)
+            kwargs["random_configuration_chooser_kwargs"] = random_config_chooser_kwargs
+
+        super().__init__(**kwargs)
+
+        if self.solver.scenario.n_features > 0:
+            raise NotImplementedError("BOinG cannot handle instances")
+
+        self.solver.scenario.acq_opt_challengers = 1000  # type: ignore[attr-defined] # noqa F821
+        # activate predict incumbent
+        self.solver.epm_chooser.predict_x_best = True
diff --git a/smac/facade/smac_hpo_facade.py b/smac/facade/smac_hpo_facade.py
index 89c80066d..edc7cc191 100644
--- a/smac/facade/smac_hpo_facade.py
+++ b/smac/facade/smac_hpo_facade.py
@@ -1,6 +1,6 @@
 from typing import Any
 
-from smac.epm.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.initial_design.sobol_design import SobolDesign
 from smac.optimizer.acquisition import LogEI
diff --git a/smac/initial_design/initial_design.py b/smac/initial_design/initial_design.py
index b27a14856..1fdb211a5 100644
--- a/smac/initial_design/initial_design.py
+++ b/smac/initial_design/initial_design.py
@@ -98,6 +98,8 @@ def __init__(
 
     def select_configurations(self) -> List[Configuration]:
         """Selects the initial configurations."""
+        if self.init_budget == 0:
+            return []
         if self.configs is None:
             self.configs = self._select_configurations()
 
diff --git a/smac/intensification/abstract_racer.py b/smac/intensification/abstract_racer.py
index 574c8722d..b1c40bf75 100644
--- a/smac/intensification/abstract_racer.py
+++ b/smac/intensification/abstract_racer.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from smac.configspace import Configuration
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.utils.io.traj_logging import TrajLogger
@@ -89,7 +89,6 @@ def __init__(
         maxR: int = 2000,
         adaptive_capping_slackfactor: float = 1.2,
         min_chall: int = 1,
-        num_obj: int = 1,
     ):
 
         self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
@@ -129,12 +128,6 @@ def __init__(
         # to mark the end of an iteration
         self.iteration_done = False
 
-        if num_obj > 1:
-            raise ValueError(
-                "Intensifiers only support single objective optimization. For multi-objective problems,"
-                "please refer to multi-objective intensifiers"
-            )
-
     def get_next_run(
         self,
         challengers: Optional[List[Configuration]],
@@ -303,9 +296,10 @@ def _adapt_cutoff(self, challenger: Configuration, run_history: RunHistory, inc_
         # reasons)
         chall_inst_seeds = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
         chal_sum_cost = run_history.sum_cost(
-            config=challenger,
-            instance_seed_budget_keys=chall_inst_seeds,
+            config=challenger, instance_seed_budget_keys=chall_inst_seeds, normalize=True
         )
+        assert type(chal_sum_cost) == float
+
         cutoff = min(curr_cutoff, inc_sum_cost * self.adaptive_capping_slackfactor - chal_sum_cost)
         return cutoff
 
@@ -348,8 +342,11 @@ def _compare_configs(
 
         # performance on challenger runs, the challenger only becomes incumbent
         # if it dominates the incumbent
-        chal_perf = run_history.average_cost(challenger, to_compare_runs)
-        inc_perf = run_history.average_cost(incumbent, to_compare_runs)
+        chal_perf = run_history.average_cost(challenger, to_compare_runs, normalize=True)
+        inc_perf = run_history.average_cost(incumbent, to_compare_runs, normalize=True)
+
+        assert type(chal_perf) == float
+        assert type(inc_perf) == float
 
         # Line 15
         if np.any(chal_perf > inc_perf) and len(chall_runs) >= self.minR:
diff --git a/smac/intensification/hyperband.py b/smac/intensification/hyperband.py
index c80fda4b8..e56a58b88 100644
--- a/smac/intensification/hyperband.py
+++ b/smac/intensification/hyperband.py
@@ -8,7 +8,7 @@
 from smac.intensification.abstract_racer import AbstractRacer, RunInfoIntent
 from smac.intensification.parallel_scheduling import ParallelScheduler
 from smac.intensification.successive_halving import _SuccessiveHalving
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import (  # noqa: F401
     RunHistory,
     RunInfo,
@@ -98,7 +98,6 @@ def __init__(
         min_chall: int = 1,
         incumbent_selection: str = "highest_executed_budget",
         identifier: int = 0,
-        num_obj: int = 1,
     ) -> None:
 
         super().__init__(
@@ -119,7 +118,6 @@ def __init__(
             adaptive_capping_slackfactor=adaptive_capping_slackfactor,
             min_chall=min_chall,
             incumbent_selection=incumbent_selection,
-            num_obj=num_obj,
         )
 
         self.identifier = identifier
diff --git a/smac/intensification/intensification.py b/smac/intensification/intensification.py
index 119367571..83322d3a3 100644
--- a/smac/intensification/intensification.py
+++ b/smac/intensification/intensification.py
@@ -12,7 +12,7 @@
     RunInfoIntent,
     _config_to_run_type,
 )
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import (
     InstSeedBudgetKey,
     RunHistory,
@@ -144,7 +144,6 @@ def __init__(
         maxR: int = 2000,
         adaptive_capping_slackfactor: float = 1.2,
         min_chall: int = 2,
-        num_obj: int = 1,
     ):
         super().__init__(
             stats=stats,
@@ -159,7 +158,6 @@ def __init__(
             maxR=maxR,
             adaptive_capping_slackfactor=adaptive_capping_slackfactor,
             min_chall=min_chall,
-            num_obj=num_obj,
         )
 
         self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
@@ -795,6 +793,7 @@ def _process_racer_results(
         """
         chal_runs = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
         chal_perf = run_history.get_cost(challenger)
+
         # if all <instance, seed> have been run, compare challenger performance
         if not self.to_run:
             new_incumbent = self._compare_configs(
@@ -896,11 +895,8 @@ def _get_instances_to_run(
         # because of efficiency computed here
         inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
         # cost used by incumbent for going over all runs in inst_seed_pairs
-        inc_sum_cost = run_history.sum_cost(
-            config=incumbent,
-            instance_seed_budget_keys=inst_seed_pairs,
-        )
-
+        inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inst_seed_pairs, normalize=True)
+        assert type(inc_sum_cost) == float
         return to_run, inc_sum_cost
 
     def get_next_challenger(
diff --git a/smac/intensification/parallel_scheduling.py b/smac/intensification/parallel_scheduling.py
index 4c15173f3..ec01143e0 100644
--- a/smac/intensification/parallel_scheduling.py
+++ b/smac/intensification/parallel_scheduling.py
@@ -6,7 +6,7 @@
 
 from smac.configspace import Configuration
 from smac.intensification.abstract_racer import AbstractRacer, RunInfoIntent
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.utils.io.traj_logging import TrajLogger
@@ -89,7 +89,6 @@ def __init__(
         inst_seed_pairs: Optional[List[Tuple[str, int]]] = None,
         min_chall: int = 1,
         incumbent_selection: str = "highest_executed_budget",
-        num_obj: int = 1,
     ) -> None:
 
         super().__init__(
@@ -103,7 +102,6 @@ def __init__(
             run_obj_time=run_obj_time,
             adaptive_capping_slackfactor=adaptive_capping_slackfactor,
             min_chall=min_chall,
-            num_obj=num_obj,
         )
 
         # We have a pool of instances that yield configurations ot run
diff --git a/smac/intensification/simple_intensifier.py b/smac/intensification/simple_intensifier.py
index fe8d6d1d2..491449f88 100644
--- a/smac/intensification/simple_intensifier.py
+++ b/smac/intensification/simple_intensifier.py
@@ -4,7 +4,7 @@
 
 from smac.configspace import Configuration
 from smac.intensification.abstract_racer import AbstractRacer, RunInfoIntent
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.utils.constants import MAXINT
@@ -46,7 +46,6 @@ def __init__(
         cutoff: Optional[float] = None,
         deterministic: bool = False,
         run_obj_time: bool = True,
-        num_obj: int = 1,
         **kwargs: Any,
     ) -> None:
 
@@ -61,10 +60,7 @@ def __init__(
             run_obj_time=run_obj_time,
             adaptive_capping_slackfactor=1.0,
             min_chall=1,
-            num_obj=num_obj,
         )
-        # Simple intensifier does not require comparing run results, thus we could simply ignore num_obj here
-
         # We want to control the number of runs that are sent to
         # the workers. At any time, we want to make sure that if there
         # are just W workers, there should be at max W active runs
diff --git a/smac/intensification/successive_halving.py b/smac/intensification/successive_halving.py
index 749fbcf8f..7738d1304 100644
--- a/smac/intensification/successive_halving.py
+++ b/smac/intensification/successive_halving.py
@@ -8,7 +8,7 @@
 from smac.configspace import Configuration
 from smac.intensification.abstract_racer import AbstractRacer, RunInfoIntent
 from smac.intensification.parallel_scheduling import ParallelScheduler
-from smac.optimizer.epm_configuration_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 from smac.stats.stats import Stats
 from smac.tae import StatusType
@@ -137,7 +137,6 @@ def __init__(
         min_chall: int = 1,
         incumbent_selection: str = "highest_executed_budget",
         identifier: int = 0,
-        num_obj: int = 1,
     ) -> None:
         super().__init__(
             stats=stats,
@@ -150,7 +149,6 @@ def __init__(
             run_obj_time=run_obj_time,
             adaptive_capping_slackfactor=adaptive_capping_slackfactor,
             min_chall=min_chall,
-            num_obj=num_obj,
         )
 
         self.identifier = identifier
@@ -629,14 +627,16 @@ def get_next_run(
         #   - during the 1st intensify run, the incumbent shouldn't be capped after being compared against itself
         if incumbent and incumbent != challenger:
             inc_runs = run_history.get_runs_for_config(incumbent, only_max_observed_budget=True)
-            inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inc_runs)
+            inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inc_runs, normalize=True)
         else:
             inc_sum_cost = np.inf
             if self.first_run:
                 self.logger.info("First run, no incumbent provided; challenger is assumed to be the incumbent")
                 incumbent = challenger
 
-        # selecting instance-seed subset for this budget, depending on the kind of budget
+        assert type(inc_sum_cost) == float
+
+        # Selecting instance-seed subset for this budget, depending on the kind of budget
         if self.instance_as_budget:
             prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
             curr_insts = self.inst_seed_pairs[int(prev_budget) : int(curr_budget)]
diff --git a/smac/multi_objective/__init__.py b/smac/multi_objective/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/smac/optimizer/multi_objective/abstract_multi_objective_algorithm.py b/smac/multi_objective/abstract_multi_objective_algorithm.py
similarity index 75%
rename from smac/optimizer/multi_objective/abstract_multi_objective_algorithm.py
rename to smac/multi_objective/abstract_multi_objective_algorithm.py
index 6ce18d141..3d321772d 100644
--- a/smac/optimizer/multi_objective/abstract_multi_objective_algorithm.py
+++ b/smac/multi_objective/abstract_multi_objective_algorithm.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from abc import ABC
 from typing import Optional
 
@@ -10,10 +12,8 @@ class AbstractMultiObjectiveAlgorithm(ABC):
     It can be applied to rh2epm or epmchooser.
     """
 
-    def __init__(self, num_obj: int, rng: Optional[np.random.RandomState] = None):
-
+    def __init__(self, rng: Optional[np.random.RandomState] = None):
         if rng is None:
             rng = np.random.RandomState(0)
 
-        self.num_obj = num_obj
         self.rng = rng
diff --git a/smac/optimizer/multi_objective/aggregation_strategy.py b/smac/multi_objective/aggregation_strategy.py
similarity index 59%
rename from smac/optimizer/multi_objective/aggregation_strategy.py
rename to smac/multi_objective/aggregation_strategy.py
index d860583e8..b5b9fac46 100644
--- a/smac/optimizer/multi_objective/aggregation_strategy.py
+++ b/smac/multi_objective/aggregation_strategy.py
@@ -1,30 +1,34 @@
+from __future__ import annotations
+
 from abc import abstractmethod
 
 import numpy as np
 
-from smac.optimizer.multi_objective.abstract_multi_objective_algorithm import (
+from smac.multi_objective.abstract_multi_objective_algorithm import (
     AbstractMultiObjectiveAlgorithm,
 )
 
 
 class AggregationStrategy(AbstractMultiObjectiveAlgorithm):
     """
-    An abstract class to aggregate multi-objective losses to a single objective losses, which can then be utilized
-    by the single-objective optimizer.
+    An abstract class to aggregate multi-objective losses to a single objective loss,
+    which can then be utilized by the single-objective optimizer.
     """
 
     @abstractmethod
-    def __call__(self, values: np.ndarray) -> float:
+    def __call__(self, values: list[float]) -> float:
         """
         Transform a multi-objective loss to a single loss.
 
         Parameters
         ----------
-            values: np.ndarray[num_evaluations, num_obj].
+        values : list[float]
+            Normalized values.
 
         Returns
         -------
-            cost: float.
+        cost : float
+            Combined cost.
         """
         raise NotImplementedError
 
@@ -35,16 +39,18 @@ class MeanAggregationStrategy(AggregationStrategy):
     which can then be utilized by the single-objective optimizer.
     """
 
-    def __call__(self, values: np.ndarray) -> float:
+    def __call__(self, values: list[float]) -> float:
         """
         Transform a multi-objective loss to a single loss.
 
         Parameters
         ----------
-            values (np.ndarray): Normalized values.
+        values : list[float]
+            Normalized values.
 
         Returns
         -------
-            cost (float): Combined cost.
+        cost : float
+            Combined cost.
         """
-        return np.mean(values, axis=1)
+        return np.mean(values, axis=0)
diff --git a/smac/optimizer/multi_objective/parego.py b/smac/multi_objective/parego.py
similarity index 57%
rename from smac/optimizer/multi_objective/parego.py
rename to smac/multi_objective/parego.py
index 90b75efa5..94439292c 100644
--- a/smac/optimizer/multi_objective/parego.py
+++ b/smac/multi_objective/parego.py
@@ -1,39 +1,41 @@
+from __future__ import annotations
+
 from typing import Optional
 
 import numpy as np
 
-from smac.optimizer.multi_objective.aggregation_strategy import AggregationStrategy
+from smac.multi_objective.aggregation_strategy import AggregationStrategy
 
 
 class ParEGO(AggregationStrategy):
     def __init__(
         self,
-        num_obj: int,
         rng: Optional[np.random.RandomState] = None,
         rho: float = 0.05,
     ):
-        super(ParEGO, self).__init__(num_obj=num_obj, rng=rng)
+        super(ParEGO, self).__init__(rng=rng)
         self.rho = rho
 
-    def __call__(self, values: np.ndarray) -> float:
+    def __call__(self, values: list[float]) -> float:
         """
         Transform a multi-objective loss to a single loss.
 
         Parameters
         ----------
-            values (np.ndarray): Normalized values.
+        values : list[float]
+            Normalized values.
 
         Returns
         -------
-            cost (float): Combined cost.
+        cost : float
+            Combined cost.
         """
         # Then we have to compute the weight
-        theta = self.rng.rand(self.num_obj)
+        theta = self.rng.rand(len(values))
 
         # Normalize st all theta values sum up to 1
         theta = theta / (np.sum(theta) + 1e-10)
 
         # Weight the values
         theta_f = theta * values
-
-        return np.max(theta_f, axis=1) + self.rho * np.sum(theta_f, axis=1)
+        return np.max(theta_f, axis=0) + self.rho * np.sum(theta_f, axis=0)
diff --git a/smac/multi_objective/utils.py b/smac/multi_objective/utils.py
new file mode 100644
index 000000000..a88eb397f
--- /dev/null
+++ b/smac/multi_objective/utils.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+
+def normalize_costs(values: list[float], bounds: list[tuple[float, float]] | None = None) -> list[float]:
+    """
+    Normalizes a list of floats with corresponding bounds.
+
+    Parameters
+    ----------
+    values : list[float]
+        List of costs to be normalized.
+    bounds : list[tuple[float, float]] | None, optional
+        List of tuple of bounds. By default None. If no bounds are passed, the values are returned
+        unnormalized.
+
+    Returns
+    -------
+    normalized_costs : list[float]
+        Normalized costs based on the bounds. If no bounds are given, the values are returned unnormalized.
+        Also, if min and max bounds are the same, the value of the corresponding objective is set to 1.
+    """
+    if bounds is None:
+        return values
+
+    if len(values) != len(bounds):
+        raise ValueError("Number of values and bounds must be equal.")
+
+    costs = []
+    for v, b in zip(values, bounds):
+        assert type(v) != list
+        p = v - b[0]
+        q = b[1] - b[0]
+
+        if q < 1e-10:
+            cost = 1.0
+        else:
+            cost = p / q
+        costs += [cost]
+
+    return costs
diff --git a/smac/optimizer/acquisition.py b/smac/optimizer/acquisition/__init__.py
similarity index 96%
rename from smac/optimizer/acquisition.py
rename to smac/optimizer/acquisition/__init__.py
index 04e799258..1753a5c8e 100644
--- a/smac/optimizer/acquisition.py
+++ b/smac/optimizer/acquisition/__init__.py
@@ -10,7 +10,7 @@
 
 from smac.configspace import Configuration
 from smac.configspace.util import convert_configurations_to_array
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 from smac.utils.logging import PickableLoggerAdapter
 
 __author__ = "Aaron Klein, Marius Lindauer"
@@ -23,7 +23,7 @@ class AbstractAcquisitionFunction(object, metaclass=abc.ABCMeta):
 
     Parameters
     ----------
-    model : AbstractEPM
+    model : BaseEPM
         Models the objective function.
 
     Attributes
@@ -32,7 +32,7 @@ class AbstractAcquisitionFunction(object, metaclass=abc.ABCMeta):
     logger
     """
 
-    def __init__(self, model: AbstractEPM):
+    def __init__(self, model: BaseEPM):
         self.model = model
         self._required_updates = ("model",)  # type: Tuple[str, ...]
         self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
@@ -113,12 +113,12 @@ class IntegratedAcquisitionFunction(AbstractAcquisitionFunction):
     for further details.
     """
 
-    def __init__(self, model: AbstractEPM, acquisition_function: AbstractAcquisitionFunction, **kwargs: Any):
+    def __init__(self, model: BaseEPM, acquisition_function: AbstractAcquisitionFunction, **kwargs: Any):
         """Constructor.
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             The model needs to implement an additional attribute ``models`` which contains the different models to
             integrate over.
         kwargs
@@ -141,7 +141,7 @@ def update(self, **kwargs: Any) -> None:
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             The model needs to implement an additional attribute ``models`` which contains the different models to
             integrate over.
         kwargs
@@ -184,7 +184,7 @@ class PriorAcquisitionFunction(AbstractAcquisitionFunction):
 
     def __init__(
         self,
-        model: AbstractEPM,
+        model: BaseEPM,
         acquisition_function: AbstractAcquisitionFunction,
         decay_beta: float,
         prior_floor: float = 1e-12,
@@ -196,7 +196,7 @@ def __init__(
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             Models the objective function.
         decay_beta: Decay factor on the user prior - defaults to n_iterations / 10 if not specifed
             otherwise.
@@ -350,12 +350,12 @@ class EI(AbstractAcquisitionFunction):
     with :math:`f(X^+)` as the best location.
     """
 
-    def __init__(self, model: AbstractEPM, par: float = 0.0):
+    def __init__(self, model: BaseEPM, par: float = 0.0):
         """Constructor.
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X)
         par : float, default=0.0
@@ -419,7 +419,7 @@ def calculate_f():
 
 
 class EIPS(EI):
-    def __init__(self, model: AbstractEPM, par: float = 0.0):
+    def __init__(self, model: BaseEPM, par: float = 0.0):
         r"""Computes for a given x the expected improvement as
         acquisition value.
         :math:`EI(X) := \frac{\mathbb{E}\left[\max\{0,f(\mathbf{X^+})-f_{t+1}(\mathbf{X})-\xi\right]\}]}{np.log(r(x))}`,
@@ -427,7 +427,7 @@ def __init__(self, model: AbstractEPM, par: float = 0.0):
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X) returning a tuples of
                    predicted cost and running time
@@ -501,13 +501,13 @@ def calculate_f():
 
 
 class LogEI(AbstractAcquisitionFunction):
-    def __init__(self, model: AbstractEPM, par: float = 0.0):
+    def __init__(self, model: BaseEPM, par: float = 0.0):
         r"""Computes for a given x the logarithm expected improvement as
         acquisition value.
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X)
         par : float, default=0.0
@@ -574,7 +574,7 @@ def calculate_log_ei():
 
 
 class PI(AbstractAcquisitionFunction):
-    def __init__(self, model: AbstractEPM, par: float = 0.0):
+    def __init__(self, model: BaseEPM, par: float = 0.0):
         r"""Computes the probability of improvement for a given x over the best so far value as acquisition value.
 
         :math:`P(f_{t+1}(\mathbf{X})\geq f(\mathbf{X^+}))` :math:`:= \Phi(\\frac{ \mu(\mathbf{X})-f(\mathbf{X^+}) }
@@ -582,7 +582,7 @@ def __init__(self, model: AbstractEPM, par: float = 0.0):
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X)
         par : float, default=0.0
@@ -623,7 +623,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray:
 
 
 class LCB(AbstractAcquisitionFunction):
-    def __init__(self, model: AbstractEPM, par: float = 1.0):
+    def __init__(self, model: BaseEPM, par: float = 1.0):
         r"""Computes the lower confidence bound for a given x over the best so far value as
         acquisition value.
 
@@ -633,7 +633,7 @@ def __init__(self, model: AbstractEPM, par: float = 1.0):
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X)
         par : float, default=1.0
@@ -674,7 +674,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray:
 
 
 class TS(AbstractAcquisitionFunction):
-    def __init__(self, model: AbstractEPM, par: float = 0.0):
+    def __init__(self, model: BaseEPM, par: float = 0.0):
         r"""Do a Thompson Sampling for a given x over the best so far value as
         acquisition value.
 
@@ -690,7 +690,7 @@ def __init__(self, model: AbstractEPM, par: float = 0.0):
 
         Parameters
         ----------
-        model : AbstractEPM
+        model : BaseEPM
             A model that implements at least
                  - predict_marginalized_over_instances(X)
         par : float, default=0.0
diff --git a/smac/optimizer/ei_optimization.py b/smac/optimizer/acquisition/maximizer.py
similarity index 89%
rename from smac/optimizer/ei_optimization.py
rename to smac/optimizer/acquisition/maximizer.py
index d0e7029ce..6c9fa446f 100644
--- a/smac/optimizer/ei_optimization.py
+++ b/smac/optimizer/acquisition/maximizer.py
@@ -16,9 +16,9 @@
     get_one_exchange_neighbourhood,
 )
 from smac.optimizer.acquisition import AbstractAcquisitionFunction
-from smac.optimizer.random_configuration_chooser import (
+from smac.optimizer.configuration_chooser.random_chooser import (
     ChooserNoCoolDown,
-    RandomConfigurationChooser,
+    RandomChooser,
 )
 from smac.runhistory.runhistory import RunHistory
 from smac.stats.stats import Stats
@@ -69,7 +69,7 @@ def maximize(
         runhistory: RunHistory,
         stats: Stats,
         num_points: int,
-        random_configuration_chooser: Optional[RandomConfigurationChooser] = None,
+        random_configuration_chooser: Optional[RandomChooser] = None,
     ) -> Iterator[Configuration]:
         """Maximize acquisition function using ``_maximize``.
 
@@ -245,52 +245,81 @@ def _get_initial_points(
             # initiate local search
             configs_previous_runs = runhistory.get_all_configs()
 
-            # configurations with the highest previous EI
-            configs_previous_runs_sorted = self._sort_configs_by_acq_value(configs_previous_runs)
-            configs_previous_runs_sorted = [conf[1] for conf in configs_previous_runs_sorted[:num_points]]
-
-            # configurations with the lowest predictive cost, check for None to make unit tests work
-            if self.acquisition_function.model is not None:
-                conf_array = convert_configurations_to_array(configs_previous_runs)
-                costs = self.acquisition_function.model.predict_marginalized_over_instances(conf_array)[0]
-                assert len(conf_array) == len(costs), (conf_array.shape, costs.shape)
-
-                # In case of the predictive model returning the prediction for more than one objective per configuration
-                # (for example multi-objective or EIPS) it is not immediately clear how to sort according to the cost
-                # of a configuration. Therefore, we simply follow the ParEGO approach and use a random scalarization.
-                if len(costs.shape) == 2 and costs.shape[1] > 1:
-                    weights = np.array([self.rng.rand() for _ in range(costs.shape[1])])
-                    weights = weights / np.sum(weights)
-                    costs = costs @ weights
-
-                # From here
-                # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
-                random = self.rng.rand(len(costs))
-                # Last column is primary sort key!
-                indices = np.lexsort((random.flatten(), costs.flatten()))
-
-                # Cannot use zip here because the indices array cannot index the
-                # rand_configs list, because the second is a pure python list
-                configs_previous_runs_sorted_by_cost = [configs_previous_runs[ind] for ind in indices][:num_points]
-            else:
-                configs_previous_runs_sorted_by_cost = []
+            init_points = self._get_init_points_from_previous_configs(
+                num_points, configs_previous_runs, additional_start_points
+            )
+        return init_points
 
-            if additional_start_points is not None:
-                additional_start_points = [asp[1] for asp in additional_start_points[:num_points]]
-            else:
-                additional_start_points = []
-
-            init_points = []
-            init_points_as_set = set()  # type: Set[Configuration]
-            for cand in itertools.chain(
-                configs_previous_runs_sorted,
-                configs_previous_runs_sorted_by_cost,
-                additional_start_points,
-            ):
-                if cand not in init_points_as_set:
-                    init_points.append(cand)
-                    init_points_as_set.add(cand)
+    def _get_init_points_from_previous_configs(
+        self,
+        num_points: int,
+        configs_previous_runs: List[Configuration],
+        additional_start_points: Optional[List[Tuple[float, Configuration]]],
+    ) -> List[Configuration]:
+        """
+        A function that generates a set of initial points from the previous configurations and additional points (if
+        applicable). The idea is to decouple runhistory from the local search model and replace it with a more genreal
+        form (List[Configuration]).
 
+        Parameters
+        ----------
+        num_points: int
+            Number of initial points to be generated
+        configs_previous_runs: List[Configuration]
+            previous configuration from runhistory
+        additional_start_points: Optional[List[Tuple[float, Configuration]]]
+            if we want to specify another set of points as initial points
+
+        Returns
+        -------
+        init_points: List[Configuration]
+            a set of initial points
+        """
+        # configurations with the highest previous EI
+        configs_previous_runs_sorted = self._sort_configs_by_acq_value(configs_previous_runs)
+        configs_previous_runs_sorted = [conf[1] for conf in configs_previous_runs_sorted[:num_points]]
+
+        # configurations with the lowest predictive cost, check for None to make unit tests work
+        if self.acquisition_function.model is not None:
+            conf_array = convert_configurations_to_array(configs_previous_runs)
+            costs = self.acquisition_function.model.predict_marginalized_over_instances(conf_array)[0]
+            assert len(conf_array) == len(costs), (conf_array.shape, costs.shape)
+
+            # In case of the predictive model returning the prediction for more than one objective per configuration
+            # (for example multi-objective or EIPS) it is not immediately clear how to sort according to the cost
+            # of a configuration. Therefore, we simply follow the ParEGO approach and use a random scalarization.
+            if len(costs.shape) == 2 and costs.shape[1] > 1:
+                weights = np.array([self.rng.rand() for _ in range(costs.shape[1])])
+                weights = weights / np.sum(weights)
+                costs = costs @ weights
+
+            # From here
+            # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
+            random = self.rng.rand(len(costs))
+            # Last column is primary sort key!
+            indices = np.lexsort((random.flatten(), costs.flatten()))
+
+            # Cannot use zip here because the indices array cannot index the
+            # rand_configs list, because the second is a pure python list
+            configs_previous_runs_sorted_by_cost = [configs_previous_runs[ind] for ind in indices][:num_points]
+        else:
+            configs_previous_runs_sorted_by_cost = []
+
+        if additional_start_points is not None:
+            additional_start_points = [asp[1] for asp in additional_start_points[:num_points]]
+        else:
+            additional_start_points = []
+
+        init_points = []
+        init_points_as_set = set()  # type: Set[Configuration]
+        for cand in itertools.chain(
+            configs_previous_runs_sorted,
+            configs_previous_runs_sorted_by_cost,
+            additional_start_points,
+        ):
+            if cand not in init_points_as_set:
+                init_points.append(cand)
+                init_points_as_set.add(cand)
         return init_points
 
     def _do_search(
@@ -802,7 +831,7 @@ def __init__(
         self,
         challenger_callback: Callable,
         configuration_space: ConfigurationSpace,
-        random_configuration_chooser: Optional[RandomConfigurationChooser] = ChooserNoCoolDown(modulus=2.0),
+        random_configuration_chooser: Optional[RandomChooser] = ChooserNoCoolDown(modulus=2.0),
     ):
         self.challengers_callback = challenger_callback
         self.challengers = None  # type: Optional[List[Configuration]]
diff --git a/smac/optimizer/configuration_chooser/__init__.py b/smac/optimizer/configuration_chooser/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/smac/optimizer/configuration_chooser/boing_chooser.py b/smac/optimizer/configuration_chooser/boing_chooser.py
new file mode 100644
index 000000000..865911033
--- /dev/null
+++ b/smac/optimizer/configuration_chooser/boing_chooser.py
@@ -0,0 +1,658 @@
+from typing import Dict, Iterator, List, Optional, Tuple, Type, Union
+
+import copy
+from itertools import chain
+
+import numpy as np
+from ConfigSpace.hyperparameters import NumericalHyperparameter
+
+from smac.configspace import Configuration
+from smac.epm.base_epm import BaseEPM
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import get_types
+from smac.optimizer.acquisition import EI, TS, AbstractAcquisitionFunction
+from smac.optimizer.acquisition.maximizer import AcquisitionFunctionMaximizer
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.random_chooser import (
+    ChooserNoCoolDown,
+    RandomChooser,
+)
+from smac.optimizer.subspaces.boing_subspace import BOinGSubspace
+from smac.optimizer.subspaces.turbo_subspace import TuRBOSubSpace
+from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory2epm_boing import RunHistory2EPM4CostWithRaw
+from smac.scenario.scenario import Scenario
+from smac.stats.stats import Stats
+from smac.utils.constants import MAXINT
+
+
+class BOinGChooser(EPMChooser):
+    """
+    Interface to train the EPM and generate next configurations with both global and local models.
+
+    Parameters
+    ----------
+    runhistory2epm: RunHistory2EPM4CostWithRaw,
+        a transformer to transform rh to vectors, different from the rh2epm used in vanilla EPMChooser, this rh2epm
+        object needs to provide the raw values for optimizer in different stages
+    model: smac.epm.rf_with_instances.RandomForestWithInstances
+        empirical performance model (right now, we support only RandomForestWithInstances) as a global model
+    acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
+        Optimizer of acquisition function of global models
+    model_local: BaseEPM,
+        local empirical performance model, used in subspace
+    model_local_kwargs: Optional[Dict] = None,
+        parameters for initializing a local model
+    acquisition_func_local: AbstractAcquisitionFunction,
+        local acquisition function,  used in subspace
+    acquisition_func_local_kwargs: Optional[Dict] = None,
+        parameters for initializing a local acquisition function optimizer
+    acq_optimizer_local: Optional[AcquisitionFunctionMaximizer] = None,
+        Optimizer of acquisition function of local models
+    acq_optimizer_local_kwargs: typing: Optional[Dict] = None,
+        parameters for the optimizer of acquisition function of local models
+    max_configs_local_fracs : float
+        The maximal number of fractions of samples to be included in the subspace. If the number of samples in the
+        subspace is greater than this value and n_min_config_inner, the subspace will be cropped to fit the requirement
+    min_configs_local: int,
+        Minimum number of samples included in the inner loop model
+    do_switching: bool
+       if we want to switch between turbo and boing or do a pure BOinG search
+    turbo_kwargs: Optional[Dict] = None
+       parameters for building a turbo optimizer
+    """
+
+    def __init__(
+        self,
+        scenario: Scenario,
+        stats: Stats,
+        runhistory: RunHistory,
+        runhistory2epm: RunHistory2EPM4CostWithRaw,
+        model: RandomForestWithInstances,
+        acq_optimizer: AcquisitionFunctionMaximizer,
+        acquisition_func: AbstractAcquisitionFunction,
+        rng: np.random.RandomState,
+        restore_incumbent: Configuration = None,
+        random_configuration_chooser: RandomChooser = ChooserNoCoolDown(2.0),
+        predict_x_best: bool = True,
+        min_samples_model: int = 1,
+        model_local: Union[BaseEPM, Type[BaseEPM]] = GloballyAugmentedLocalGaussianProcess,
+        acquisition_func_local: Union[AbstractAcquisitionFunction, Type[AbstractAcquisitionFunction]] = EI,
+        model_local_kwargs: Optional[Dict] = None,
+        acquisition_func_local_kwargs: Optional[Dict] = None,
+        acq_optimizer_local: Optional[AcquisitionFunctionMaximizer] = None,
+        acq_optimizer_local_kwargs: Optional[Dict] = None,
+        max_configs_local_fracs: float = 0.5,
+        min_configs_local: Optional[int] = None,
+        do_switching: bool = False,
+        turbo_kwargs: Optional[Dict] = None,
+    ):
+        # initialize the original EPM_Chooser
+        super(BOinGChooser, self).__init__(
+            scenario=scenario,
+            stats=stats,
+            runhistory=runhistory,
+            runhistory2epm=runhistory2epm,
+            model=model,
+            acq_optimizer=acq_optimizer,
+            acquisition_func=acquisition_func,
+            rng=rng,
+            restore_incumbent=restore_incumbent,
+            random_configuration_chooser=random_configuration_chooser,
+            predict_x_best=predict_x_best,
+            min_samples_model=min_samples_model,
+        )
+        if not isinstance(self.model, RandomForestWithInstances):
+            raise ValueError("BOinG only supports RandomForestWithInstances as its global optimizer")
+        if not isinstance(self.rh2EPM, RunHistory2EPM4CostWithRaw):
+            raise ValueError("BOinG only supports RunHistory2EPM4CostWithRaw as its rh transformer")
+
+        cs = self.scenario.cs  # type: ignore
+
+        self.subspace_info = {
+            "model_local": model_local,
+            "model_local_kwargs": model_local_kwargs,
+            "acq_func_local": acquisition_func_local,
+            "acq_func_local_kwargs": acquisition_func_local_kwargs,
+            "acq_optimizer_local": acq_optimizer_local,
+            "acq_optimizer_local_kwargs": acq_optimizer_local_kwargs,
+        }
+
+        self.max_configs_local_fracs = max_configs_local_fracs
+        self.min_configs_local = (
+            min_configs_local if min_configs_local is not None else 5 * len(cs.get_hyperparameters())
+        )
+
+        types, bounds = get_types(cs, instance_features=None)
+
+        self.types = types
+        self.bounds = bounds
+        self.cat_dims = np.where(np.array(types) != 0)[0]
+        self.cont_dims = np.where(np.array(types) == 0)[0]
+        self.config_space = cs
+
+        self.frac_to_start_bi = 0.8
+        self.split_count = np.zeros(len(types))
+        self.do_switching = do_switching
+        self.random_search_upper_log = 1
+
+        self.optimal_value = np.inf
+        self.optimal_config = None
+
+        self.ss_threshold = 0.1 ** len(cs.get_hyperparameters())
+        if self.do_switching:
+            # If we want to switch between BOinG and TurBO
+            self.run_TuRBO = False
+            self.failcount_BOinG = 0
+            self.failcount_TurBO = 0
+
+            turbo_model = copy.deepcopy(model_local)
+            turbo_acq = TS
+            turbo_opt_kwargs = dict(
+                config_space=cs,
+                bounds=bounds,
+                hps_types=types,
+                model_local=turbo_model,
+                model_local_kwargs=copy.deepcopy(model_local_kwargs),
+                acq_func_local=turbo_acq,
+                rng=rng,
+                length_min=2e-4,
+            )
+            self.turbo_kwargs = turbo_opt_kwargs
+            if turbo_kwargs is not None:
+                turbo_opt_kwargs.update(turbo_kwargs)
+            self.turbo_optimizer = TuRBOSubSpace(**turbo_opt_kwargs)
+
+    def restart_TuRBOinG(self, X: np.ndarray, Y: np.ndarray, Y_raw: np.ndarray, train_model: bool = False) -> None:
+        """
+        Restart a new TurBO Optimizer, the bounds of the TurBO Optimizer is determined by a RF, we randomly sample 20
+        points and extract subspaces that contain at least self.min_configs_local points, and we select the subspace
+        with the largest volume to construct a turbo optimizer
+        Parameters
+        ----------
+        X: np.ndarray (N, D)
+            previous evaluated configurations
+        Y: np.ndarray (N,)
+            performances of previous evaluated configurations (transformed by rh2epm transformer)
+        Y_raw: np.ndarray (N,)
+            performances of previous evaluated configurations (raw values, not transformed)
+        train_model: bool
+            if we retrain the model with the given X and Y
+        """
+        if train_model:
+            self.model.train(X, Y)
+        num_samples = 20
+        union_ss = []
+        union_indices = []
+        rand_samples = self.config_space.sample_configuration(num_samples)
+        for sample in rand_samples:
+            sample_array = sample.get_array()
+            union_bounds_cont, _, ss_data_indices = subspace_extraction(
+                X=X,
+                challenger=sample_array,
+                model=self.model,
+                num_min=self.min_configs_local,
+                num_max=MAXINT,
+                bounds=self.bounds,
+                cont_dims=self.cont_dims,
+                cat_dims=self.cat_dims,
+            )
+            union_ss.append(union_bounds_cont)
+            union_indices.append(ss_data_indices)
+        union_ss = np.asarray(union_ss)
+        volume_ss = np.product(union_ss[:, :, 1] - union_ss[:, :, 0], axis=1)  # type: ignore
+        ss_idx = np.argmax(volume_ss)
+        ss_turbo = union_ss[ss_idx]
+        ss_data_indices = union_indices[ss_idx]
+
+        # we only consider numerical(continuous) hyperparameters here
+        self.turbo_optimizer = TuRBOSubSpace(
+            **self.turbo_kwargs,  # type: ignore
+            bounds_ss_cont=ss_turbo,  # type: ignore
+            initial_data=(X[ss_data_indices], Y_raw[ss_data_indices]),  # type: ignore
+        )
+        self.turbo_optimizer.add_new_observations(X[ss_data_indices], Y_raw[ss_data_indices])
+
+    def choose_next(self, incumbent_value: float = None) -> Iterator[Configuration]:
+        """
+        Choose next candidate solution with Bayesian optimization. We use TurBO optimizer or BOinG to suggest
+         the next configuration.
+        If we switch local model between TurBO and BOinG, we gradually increase the probability to switch to another
+        optimizer if we cannot make further process. (Or if TurBO find a new incumbent, we will switch to BOinG to do
+        further exploitation)
+
+        Parameters
+        ----------
+        incumbent_value: float
+            Cost value of incumbent configuration (required for acquisition function);
+            If not given, it will be inferred from runhistory or predicted;
+            if not given and runhistory is empty, it will raise a ValueError.
+
+        Returns
+        -------
+        Iterator
+        """
+        # we also need the untransformed raw y values to used for local models
+        X, Y, Y_raw, X_configurations = self._collect_all_data_to_train_model()
+        if self.do_switching:
+            if self.run_TuRBO:
+                X, Y, Y_raw, X_configurations = self._collect_all_data_to_train_model()
+
+                num_new_bservations = 1  # here we only consider batch_size ==1
+
+                new_observations = Y_raw[-num_new_bservations:]
+
+                # give new suggestions from initialized values in TurBO
+                if len(self.turbo_optimizer.init_configs) > 0:
+                    self.turbo_optimizer.add_new_observations(X[-num_new_bservations:], Y_raw[-num_new_bservations:])
+                    return self.turbo_optimizer.generate_challengers()
+
+                self.turbo_optimizer.adjust_length(new_observations)
+
+                # if we need to restart TurBO, we first check if we want to switch to BOinG
+                if self.turbo_optimizer.length < self.turbo_optimizer.length_min:
+                    optimal_turbo = np.min(self.turbo_optimizer.ss_y)
+
+                    self.logger.debug(f"Best Found value by TuRBO: {optimal_turbo}")
+
+                    increment = optimal_turbo - self.optimal_value
+
+                    if increment < 0:
+                        min_idx = np.argmin(Y_raw)
+                        self.optimal_value = Y_raw[min_idx].item()
+                        # compute the distance between the previous incumbent and new incumbent
+                        cfg_diff = X[min_idx] - self.optimal_config
+                        self.optimal_config = X[min_idx]
+                        # we avoid sticking to a local minimum too often, e.g. either we have a relative much better
+                        # configuration or the new configuration is a little bit far away from the current incumbent
+                        if (
+                            increment < -1e-3 * np.abs(self.optimal_value)
+                            or np.abs(np.product(cfg_diff)) >= self.ss_threshold
+                        ):
+                            self.failcount_TurBO -= 1
+                            # switch to BOinG as TurBO found a better model and we could do exploration
+                            # also we halve the failcount of BOinG to avoid switching to TurBO too frequently
+                            self.failcount_BOinG = self.failcount_BOinG // 2
+                            self.run_TuRBO = False
+                            self.logger.debug("Optimizer switches to BOinG!")
+
+                    else:
+                        self.failcount_TurBO += 1
+
+                    # The probability is a linear curve.
+                    prob_to_BOinG = 0.1 * self.failcount_TurBO
+                    self.logger.debug(f"failure_count TuRBO :{self.failcount_TurBO}")
+                    rand_value = self.rng.random()
+
+                    if rand_value < prob_to_BOinG:
+                        self.failcount_BOinG = self.failcount_BOinG // 2
+                        self.run_TuRBO = False
+                        self.logger.debug("Optimizer switches to BOinG!")
+                    else:
+                        self.restart_TuRBOinG(X=X, Y=Y, Y_raw=Y_raw, train_model=True)
+                        return self.turbo_optimizer.generate_challengers()
+
+                self.turbo_optimizer.add_new_observations(X[-num_new_bservations:], Y_raw[-num_new_bservations:])
+
+                return self.turbo_optimizer.generate_challengers()
+
+        if X.shape[0] == 0:
+            # Only return a single point to avoid an overly high number of
+            # random search iterations
+            return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1)
+        # if the number of points is not big enough, we simply build one subspace (the raw configuration space) and
+        # the local model becomes global model
+        if X.shape[0] < (self.min_configs_local / self.frac_to_start_bi):
+            if len(self.config_space.get_conditions()) == 0:
+                self.model.train(X, Y)
+                cs = self.scenario.cs  # type: ignore
+                ss = BOinGSubspace(
+                    config_space=cs,
+                    bounds=self.bounds,
+                    hps_types=self.types,
+                    rng=self.rng,
+                    initial_data=(X, Y_raw),
+                    incumbent_array=None,
+                    model_local=self.subspace_info["model_local"],  # type: ignore
+                    model_local_kwargs=self.subspace_info["model_local_kwargs"],  # type: ignore
+                    acq_func_local=self.subspace_info["acq_func_local"],  # type: ignore
+                    acq_func_local_kwargs=self.subspace_info["acq_func_local_kwargs"],  # type: ignore
+                    acq_optimizer_local=self.acq_optimizer,
+                )
+                return ss.generate_challengers()
+
+        # train the outer model
+        self.model.train(X, Y)
+
+        if incumbent_value is not None:
+            best_observation = incumbent_value
+            x_best_array = None  # type: Optional[np.ndarray]
+        else:
+            if self.runhistory.empty():
+                raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.")
+            x_best_array, best_observation = self._get_x_best(self.predict_x_best, X_configurations)
+
+        self.acquisition_func.update(
+            model=self.model,
+            eta=best_observation,
+            incumbent_array=x_best_array,
+            num_data=len(self._get_evaluated_configs()),
+            X=X_configurations,
+        )
+
+        if self.do_switching:
+            # check if we need to switch to turbo
+            # same as above
+            self.failcount_BOinG += 1
+            increment = Y_raw[-1].item() - self.optimal_value
+            if increment < 0:
+                if self.optimal_config is not None:
+                    cfg_diff = X[-1] - self.optimal_config
+                    if (
+                        increment < -1e-2 * np.abs(self.optimal_value)
+                        or np.abs(np.product(cfg_diff)) >= self.ss_threshold
+                    ):
+                        self.failcount_BOinG -= X.shape[-1]
+                    self.optimal_value = Y_raw[-1].item()
+                    self.optimal_config = X[-1]
+                else:
+                    # restart
+                    idx_min = np.argmin(Y_raw)
+                    self.logger.debug("Better value found by BOinG, continue BOinG")
+                    self.optimal_value = Y_raw[idx_min].item()
+                    self.optimal_config = X[idx_min]
+                    self.failcount_BOinG = 0
+
+            # similar to TurBO, we do a judgement every n_dimension times
+            amplify_param = self.failcount_BOinG // (X.shape[-1] * 1)
+
+            if self.failcount_BOinG % (X.shape[-1] * 1) == 0:
+                prob_to_TurBO = 0.1 * amplify_param
+                rand_value = self.rng.random()
+
+                if rand_value < prob_to_TurBO:
+                    self.run_TuRBO = True
+                    self.logger.debug("Switch To TuRBO")
+                    self.failcount_TurBO = self.failcount_TurBO // 2
+                    self.restart_TuRBOinG(X=X, Y=Y, Y_raw=Y_raw, train_model=False)
+
+        challengers_global = self.acq_optimizer.maximize(
+            runhistory=self.runhistory,
+            stats=self.stats,
+            num_points=self.scenario.acq_opt_challengers,  # type: ignore[attr-defined] # noqa F821
+            random_configuration_chooser=self.random_configuration_chooser,
+        )
+
+        if (
+            X.shape[0] < (self.min_configs_local / self.frac_to_start_bi)
+            and len(self.config_space.get_conditions()) == 0
+        ):
+            return challengers_global
+
+        cfg_challenger_global_first = next(challengers_global)
+        array_challenger_global_first = cfg_challenger_global_first.get_array()  # type: np.ndarray
+
+        num_max_configs = int(X.shape[0] * self.max_configs_local_fracs)
+
+        # to avoid the case that num_max_configs is only a little larger than self.min_configs_local
+        num_max = MAXINT if num_max_configs <= 2 * self.min_configs_local else num_max_configs
+
+        if len(self.config_space.get_conditions()) > 0:
+            challanger_activate_hps = np.isfinite(array_challenger_global_first).astype(int)
+            rh_activate_hps = np.isfinite(X).astype(int)
+            indices_X_in_same_hierarchy = np.all((challanger_activate_hps - rh_activate_hps) == 0, axis=1)
+            num_indices_X_in_same_hierarchy = sum(indices_X_in_same_hierarchy)
+
+            if num_indices_X_in_same_hierarchy == 0:
+                return chain([cfg_challenger_global_first], challengers_global)
+
+            activate_dims = []
+            hps = self.config_space.get_hyperparameters()
+            for idx_hp in np.where(challanger_activate_hps > 0)[0]:
+                if isinstance(hps[idx_hp], NumericalHyperparameter):
+                    activate_dims.append(idx_hp)
+                else:
+                    indices_X_in_same_hierarchy = indices_X_in_same_hierarchy & (
+                        X[:, idx_hp] == array_challenger_global_first[idx_hp]
+                    )
+            num_indices_X_in_same_hierarchy = sum(indices_X_in_same_hierarchy)
+
+            X = X[indices_X_in_same_hierarchy]
+            Y_raw = Y_raw[indices_X_in_same_hierarchy]
+
+            if len(activate_dims) == 0 or num_indices_X_in_same_hierarchy <= max(5, len(activate_dims)):
+                return chain([cfg_challenger_global_first], challengers_global)
+            n_min_configs_inner = self.min_configs_local // len(hps) * len(activate_dims)
+        else:
+            n_min_configs_inner = self.min_configs_local
+            activate_dims = np.arange(len(self.config_space.get_hyperparameters()))
+
+        bounds_ss_cont, bounds_ss_cat, ss_data_indices = subspace_extraction(
+            X=X,
+            challenger=array_challenger_global_first,
+            model=self.model,
+            num_min=n_min_configs_inner,
+            num_max=num_max,
+            bounds=self.bounds,
+            cont_dims=self.cont_dims,
+            cat_dims=self.cat_dims,
+        )
+
+        self.logger.debug("contained {0} data of {1}".format(sum(ss_data_indices), Y_raw.size))
+
+        ss = BOinGSubspace(
+            config_space=self.scenario.cs,  # type: ignore
+            bounds=self.bounds,
+            hps_types=self.types,
+            bounds_ss_cont=bounds_ss_cont,  # type: ignore[arg-type]
+            bounds_ss_cat=bounds_ss_cat,
+            rng=self.rng,
+            initial_data=(X, Y_raw),
+            incumbent_array=array_challenger_global_first,  # type: ignore[arg-type]
+            activate_dims=activate_dims,
+            **self.subspace_info,  # type: ignore[arg-type]
+        )
+        return ss.generate_challengers()
+
+    def _collect_all_data_to_train_model(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """Similar to the implementaiton of EPMChooser, however, we also return the raw values here."""
+        # if we use a float value as a budget, we want to train the model only on the highest budget
+        available_budgets = []
+        for run_key in self.runhistory.data.keys():
+            available_budgets.append(run_key.budget)
+
+        # Sort available budgets from highest to lowest budget
+        available_budgets = sorted(list(set(available_budgets)), reverse=True)
+
+        # Get #points per budget and if there are enough samples, then build a model
+        for b in available_budgets:
+            X, Y, Y_raw = self.rh2EPM.transform_with_raw(  # type: ignore[attr-defined]
+                self.runhistory,
+                budget_subset=[
+                    b,
+                ],
+            )  # type: ignore
+            if X.shape[0] >= self.min_samples_model:
+                self.currently_considered_budgets = [
+                    b,
+                ]
+                configs_array = self.rh2EPM.get_configurations(
+                    self.runhistory, budget_subset=self.currently_considered_budgets
+                )
+                return X, Y, Y_raw, configs_array
+
+        return (
+            np.empty(shape=[0, 0]),
+            np.empty(
+                shape=[
+                    0,
+                ]
+            ),
+            np.empty(
+                shape=[
+                    0,
+                ]
+            ),
+            np.empty(shape=[0, 0]),
+        )
+
+
+def subspace_extraction(
+    X: np.ndarray,
+    challenger: np.ndarray,
+    model: RandomForestWithInstances,
+    num_min: int,
+    num_max: int,
+    bounds: Union[np.ndarray, List[Tuple]],
+    cat_dims: np.ndarray,
+    cont_dims: np.ndarray,
+) -> Tuple[np.ndarray, List[Tuple], np.ndarray]:
+    """
+    Extract a subspace that contains at least num_min points but no more than num_max points
+
+    Parameters
+    ----------
+    X: np.ndarray (N, D)
+        points used to train the model
+    challenger: np.ndarray (1, D)
+        the challenger where the subspace would grow
+    model: RandomForestWithInstances
+        a rf model
+    num_min: int
+        minimal number of points to be included in the subspace
+    num_max: int
+        maximal number of points to be included in the subspace
+    bounds: np.ndarray(D, 2)
+        bounds of the entire space, D = D_cat + D_cont
+    cat_dims: np.ndarray (D_cat)
+        categorical dimensions
+    cont_dims: np.ndarray(D_cont)
+    continuous dimensions
+
+    Returns
+    -------
+    union_bounds_cont: np.ndarray(D_cont, 2),
+         the continuous bounds of the subregion
+    union_bounds_cat, List[Tuple],
+        the categorical bounds of the subregion
+    in_ss_dims:
+        indices of the points that lie inside the subregion
+    """
+    trees = model.rf.get_all_trees()
+    trees = [tree for tree in trees]
+    num_trees = len(trees)
+    node_indices = [0] * num_trees
+
+    indices_trees = np.arange(num_trees)
+    np.random.shuffle(indices_trees)
+    ss_indices = np.full(X.shape[0], True)  # type: np.ndarray
+
+    stop_update = [False] * num_trees
+
+    ss_bounds = np.array(bounds)
+
+    cont_dims = np.array(cont_dims)
+    cat_dims = np.array(cat_dims)
+
+    if len(cat_dims) == 0:
+        ss_bounds_cat = [()]
+    else:
+        ss_bounds_cat = [() for _ in range(len(cat_dims))]
+        for i, cat_dim in enumerate(cat_dims):
+            ss_bounds_cat[i] = np.arange(ss_bounds[cat_dim][0])
+
+    if len(cont_dims) == 0:
+        ss_bounds_cont = np.array([])  # type: np.ndarray
+    else:
+        ss_bounds_cont = ss_bounds[cont_dims]
+
+    def traverse_forest(check_num_min: bool = True) -> None:
+        nonlocal ss_indices
+        np.random.shuffle(indices_trees)
+        for i in indices_trees:
+            if stop_update[i]:
+                continue
+            tree = trees[int(i)]
+            node_idx = node_indices[i]
+            node = tree.get_node(node_idx)
+
+            if node.is_a_leaf():
+                stop_update[i] = True
+                continue
+
+            feature_idx = node.get_feature_index()
+            cont_feature_idx = np.where(feature_idx == cont_dims)[0]
+            if cont_feature_idx.size == 0:
+                # This node split the subspace w.r.t. the categorical hyperparameters
+                cat_feature_idx = np.where(feature_idx == cat_dims)[0][0]
+                split_value = node.get_cat_split()
+                intersect = np.intersect1d(ss_bounds_cat[cat_feature_idx], split_value, assume_unique=True)
+
+                if len(intersect) == len(ss_bounds_cat[cat_feature_idx]):
+                    # will fall into the left child
+                    temp_child_idx = 0
+                    node_indices[i] = node.get_child_index(temp_child_idx)
+                elif len(intersect) == 0:
+                    # will fall into the left child
+                    temp_child_idx = 1
+                    node_indices[i] = node.get_child_index(temp_child_idx)
+                else:
+                    if challenger[feature_idx] in intersect:
+                        temp_child_idx = 0
+                        temp_node_indices = ss_indices & np.in1d(X[:, feature_idx], split_value)
+                        temp_bound_ss = intersect
+                    else:
+                        temp_child_idx = 1
+                        temp_node_indices = ss_indices & np.in1d(X[:, feature_idx], split_value, invert=True)
+                        temp_bound_ss = np.setdiff1d(ss_bounds_cat[cat_feature_idx], split_value)
+                    if sum(temp_node_indices) > num_min:
+                        # number of points inside subspace is still greater than num_min, we could go deeper
+                        ss_bounds_cat[cat_feature_idx] = temp_bound_ss
+                        ss_indices = temp_node_indices
+                        node_indices[i] = node.get_child_index(temp_child_idx)
+                    else:
+                        if check_num_min:
+                            stop_update[i] = True
+                        else:
+                            # if we don't check the num_min, we will stay go deeper into the child nodes without
+                            # splitting the subspace
+                            node_indices[i] = node.get_child_index(temp_child_idx)
+            else:
+                # This node split the subspace w.r.t. the continuous hyperparameters
+                split_value = node.get_num_split_value()
+                cont_feature_idx = cont_feature_idx.item()
+                if ss_bounds_cont[cont_feature_idx][0] <= split_value <= ss_bounds_cont[cont_feature_idx][1]:
+                    # the subspace can be further split
+                    if challenger[feature_idx] >= split_value:
+                        temp_bound_ss = np.array([split_value, ss_bounds_cont[cont_feature_idx][1]])
+                        temp_node_indices = ss_indices & (X[:, feature_idx] >= split_value)
+                        temp_child_idx = 1
+                    else:
+                        temp_bound_ss = np.array([ss_bounds_cont[cont_feature_idx][0], split_value])
+                        temp_node_indices = ss_indices & (X[:, feature_idx] <= split_value)
+                        temp_child_idx = 0
+                    if sum(temp_node_indices) > num_min:
+                        # number of points inside subspace is still greater than num_min
+                        ss_bounds_cont[cont_feature_idx] = temp_bound_ss
+                        ss_indices = temp_node_indices
+                        node_indices[i] = node.get_child_index(temp_child_idx)
+                    else:
+                        if check_num_min:
+                            stop_update[i] = True
+                        else:
+                            node_indices[i] = node.get_child_index(temp_child_idx)
+                else:
+                    temp_child_idx = 1 if challenger[feature_idx] >= split_value else 0
+                    node_indices[i] = node.get_child_index(temp_child_idx)
+
+    while sum(stop_update) < num_trees:
+        traverse_forest()
+
+    if sum(ss_indices) > num_max:
+        # number of points inside the subregion have a larger value than num_max
+        stop_update = [False] * num_trees
+        while sum(stop_update) < num_trees:
+            traverse_forest(False)
+
+    return ss_bounds_cont, ss_bounds_cat, ss_indices  # type: ignore[return-value]
diff --git a/smac/optimizer/epm_configuration_chooser.py b/smac/optimizer/configuration_chooser/epm_chooser.py
similarity index 92%
rename from smac/optimizer/epm_configuration_chooser.py
rename to smac/optimizer/configuration_chooser/epm_chooser.py
index d01e94d72..89f14a4df 100644
--- a/smac/optimizer/epm_configuration_chooser.py
+++ b/smac/optimizer/configuration_chooser/epm_chooser.py
@@ -1,4 +1,4 @@
-from typing import Iterator, List, Optional, Tuple
+from typing import Any, Iterator, List, Optional, Tuple
 
 import logging
 
@@ -6,12 +6,15 @@
 
 from smac.configspace import Configuration
 from smac.configspace.util import convert_configurations_to_array
-from smac.epm.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 from smac.optimizer.acquisition import AbstractAcquisitionFunction
-from smac.optimizer.ei_optimization import AcquisitionFunctionMaximizer, RandomSearch
-from smac.optimizer.random_configuration_chooser import (
+from smac.optimizer.acquisition.maximizer import (
+    AcquisitionFunctionMaximizer,
+    RandomSearch,
+)
+from smac.optimizer.configuration_chooser.random_chooser import (
     ChooserNoCoolDown,
-    RandomConfigurationChooser,
+    RandomChooser,
 )
 from smac.runhistory.runhistory import RunHistory
 from smac.runhistory.runhistory2epm import AbstractRunHistory2EPM
@@ -22,8 +25,8 @@
 __license__ = "3-clause BSD"
 
 
-class EPMChooser(object):
-    """Interface to train the EPM and generate next configurations.
+class EPMChooser:
+    """Interface to train the EPM and generate/choose next configurations.
 
     Parameters
     ----------
@@ -51,6 +54,8 @@ class EPMChooser(object):
         Choose x_best for computing the acquisition function via the model instead of via the observations.
     min_samples_model: int
         Minimum number of samples to build a model
+    epm_chooser_kwargs: Any:
+        additional arguments passed to EPMChooser (Might be used by its subclasses)
     """
 
     def __init__(
@@ -64,9 +69,10 @@ def __init__(
         acquisition_func: AbstractAcquisitionFunction,
         rng: np.random.RandomState,
         restore_incumbent: Configuration = None,
-        random_configuration_chooser: RandomConfigurationChooser = ChooserNoCoolDown(modulus=2.0),
+        random_configuration_chooser: RandomChooser = ChooserNoCoolDown(modulus=2.0),
         predict_x_best: bool = True,
         min_samples_model: int = 1,
+        **epm_chooser_kwargs: Any,
     ):
         self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
         self.incumbent = restore_incumbent
diff --git a/smac/optimizer/random_configuration_chooser.py b/smac/optimizer/configuration_chooser/random_chooser.py
similarity index 95%
rename from smac/optimizer/random_configuration_chooser.py
rename to smac/optimizer/configuration_chooser/random_chooser.py
index 617848322..6fc990376 100644
--- a/smac/optimizer/random_configuration_chooser.py
+++ b/smac/optimizer/configuration_chooser/random_chooser.py
@@ -13,7 +13,7 @@
 __version__ = "0.0.1"
 
 
-class RandomConfigurationChooser(ABC):
+class RandomChooser(ABC):
     """Abstract base of helper classes to configure interleaving of random configurations in a list
     of challengers.
     """
@@ -32,7 +32,7 @@ def check(self, iteration: int) -> bool:
         pass
 
 
-class ChooserNoCoolDown(RandomConfigurationChooser):
+class ChooserNoCoolDown(RandomChooser):
     """Interleave a random configuration after a constant number of configurations found by Bayesian
     optimization.
 
@@ -59,7 +59,7 @@ def check(self, iteration: int) -> bool:
         return iteration % self.modulus < 1
 
 
-class ChooserLinearCoolDown(RandomConfigurationChooser):
+class ChooserLinearCoolDown(RandomChooser):
     """Interleave a random configuration, decreasing the fraction of random configurations over
     time.
 
@@ -107,7 +107,7 @@ def check(self, iteration: int) -> bool:
             return False
 
 
-class ChooserProb(RandomConfigurationChooser):
+class ChooserProb(RandomChooser):
     """Interleave a random configuration according to a given probability.
 
     Parameters
@@ -134,7 +134,7 @@ def check(self, iteration: int) -> bool:
             return False
 
 
-class ChooserProbCoolDown(RandomConfigurationChooser):
+class ChooserProbCoolDown(RandomChooser):
     """Interleave a random configuration according to a given probability which is decreased over
     time.
 
@@ -165,7 +165,7 @@ def check(self, iteration: int) -> bool:
             return False
 
 
-class ChooserCosineAnnealing(RandomConfigurationChooser):
+class ChooserCosineAnnealing(RandomChooser):
     """Interleave a random configuration according to a given probability which is decreased
     according to a cosine annealing schedule.
 
diff --git a/smac/optimizer/configuration_chooser/turbo_chooser.py b/smac/optimizer/configuration_chooser/turbo_chooser.py
new file mode 100644
index 000000000..443981181
--- /dev/null
+++ b/smac/optimizer/configuration_chooser/turbo_chooser.py
@@ -0,0 +1,131 @@
+import typing
+
+import numpy as np
+
+from smac.configspace import Configuration
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import get_types
+from smac.optimizer.acquisition import TS, AbstractAcquisitionFunction
+from smac.optimizer.acquisition.maximizer import AcquisitionFunctionMaximizer
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.random_chooser import (
+    ChooserNoCoolDown,
+    RandomChooser,
+)
+from smac.optimizer.subspaces.turbo_subspace import TuRBOSubSpace
+from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory2epm import AbstractRunHistory2EPM
+from smac.scenario.scenario import Scenario
+from smac.stats.stats import Stats
+
+
+class TurBOChooser(EPMChooser):
+    """
+    Interface to train the EPM and generate next configurations with TurBO:
+        D. Eriksson et al. Scalable Global Optimization via Local Bayesian Optimization
+        https://papers.nips.cc/paper/2019/file/6c990b7aca7bc7058f5e98ea909e924b-Paper.pdf
+
+    Parameters
+    ----------
+    length_init: float
+        Initialized length after restarting
+    length_min: float
+        If the subspace length is smaller than length_min, TurBO will restart
+    length_max: float
+        The maximum length of subspace
+    success_tol: int
+        Number of successful suggestions (suggested points become incumbent) required for expanding subspace
+    failure_tol_min: int
+        The minimum number of failure suggestions (suggested points fails to become incumbent) required for shrinking
+        subspace
+    n_init_x_params: int
+        how many configurations will be used at most in the initial design (X*D). Used for restarting the subspace
+    n_candidate_max: int
+        Maximal Number of points used as candidates
+    """
+
+    def __init__(
+        self,
+        scenario: Scenario,
+        stats: Stats,
+        runhistory: RunHistory,
+        runhistory2epm: AbstractRunHistory2EPM,
+        model: RandomForestWithInstances,
+        acq_optimizer: AcquisitionFunctionMaximizer,
+        acquisition_func: AbstractAcquisitionFunction,
+        rng: np.random.RandomState,
+        restore_incumbent: Configuration = None,
+        random_configuration_chooser: RandomChooser = ChooserNoCoolDown(2.0),
+        predict_x_best: bool = False,
+        min_samples_model: int = 1,
+        length_init: float = 0.8,
+        length_min: float = 0.5**8,
+        length_max: float = 1.6,
+        success_tol: int = 3,
+        failure_tol_min: int = 4,
+        n_init_x_params: int = 2,
+        n_candidate_max: int = 5000,
+    ):
+        super(TurBOChooser, self).__init__(
+            scenario=scenario,
+            stats=stats,
+            runhistory=runhistory,
+            runhistory2epm=runhistory2epm,
+            model=model,
+            acquisition_func=acquisition_func,
+            acq_optimizer=acq_optimizer,
+            restore_incumbent=restore_incumbent,
+            rng=rng,
+            random_configuration_chooser=random_configuration_chooser,
+            predict_x_best=predict_x_best,
+            min_samples_model=min_samples_model,
+        )
+        cs = self.scenario.cs  # type: ignore
+        types, bounds = get_types(cs, instance_features=None)
+
+        self.turbo = TuRBOSubSpace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            model_local=model,
+            acq_func_local=TS,
+            length_init=length_init,
+            length_min=length_min,
+            length_max=length_max,
+            success_tol=success_tol,
+            failure_tol_min=failure_tol_min,
+            n_init_x_params=n_init_x_params,
+            n_candidate_max=n_candidate_max,
+        )
+
+    def choose_next(self, incumbent_value: float = None) -> typing.Iterator[Configuration]:
+        """
+        Choose next candidate solution with TuRBO
+
+        Parameters
+        ----------
+        incumbent_value: float
+            Cost value of incumbent configuration (required for acquisition function);
+            If not given, it will be inferred from runhistory or predicted;
+            if not given and runhistory is empty, it will raise a ValueError.
+
+        Returns
+        -------
+        Iterator
+        """
+        self.logger.debug("Search for next configuration")
+        X, Y, X_configurations = self._collect_data_to_train_model()
+
+        num_new_bservations = 1  # here we only consider batch size = 1
+
+        new_observations = Y[-num_new_bservations:]
+        if len(self.turbo.init_configs) > 0:
+            self.turbo.add_new_observations(X[-num_new_bservations:], Y[-num_new_bservations:])
+            return self.turbo.generate_challengers()
+
+        self.turbo.adjust_length(new_observations)
+
+        self.turbo.add_new_observations(X[-num_new_bservations:], Y[-num_new_bservations:])
+
+        challengers = self.turbo.generate_challengers()
+        return challengers
diff --git a/smac/optimizer/multi_objective/README.MD b/smac/optimizer/multi_objective/README.MD
deleted file mode 100644
index 584198990..000000000
--- a/smac/optimizer/multi_objective/README.MD
+++ /dev/null
@@ -1 +0,0 @@
-Intensifiers for multi-objective tasks
\ No newline at end of file
diff --git a/smac/optimizer/smbo.py b/smac/optimizer/smbo.py
index 32c190cd6..a3cfd34d8 100644
--- a/smac/optimizer/smbo.py
+++ b/smac/optimizer/smbo.py
@@ -8,16 +8,16 @@
 
 from smac.callbacks import IncorporateRunResultCallback
 from smac.configspace import Configuration
-from smac.epm.base_epm import AbstractEPM
+from smac.epm.base_epm import BaseEPM
 from smac.initial_design.initial_design import InitialDesign
 from smac.intensification.abstract_racer import AbstractRacer, RunInfoIntent
 from smac.optimizer import pSMAC
 from smac.optimizer.acquisition import AbstractAcquisitionFunction
-from smac.optimizer.ei_optimization import AcquisitionFunctionMaximizer
-from smac.optimizer.epm_configuration_chooser import EPMChooser
-from smac.optimizer.random_configuration_chooser import (
+from smac.optimizer.acquisition.maximizer import AcquisitionFunctionMaximizer
+from smac.optimizer.configuration_chooser.epm_chooser import EPMChooser
+from smac.optimizer.configuration_chooser.random_chooser import (
     ChooserNoCoolDown,
-    RandomConfigurationChooser,
+    RandomChooser,
 )
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue
 from smac.runhistory.runhistory2epm import AbstractRunHistory2EPM
@@ -55,7 +55,7 @@ class SMBO(object):
         (probably with some kind of racing on the instances)
     num_run: int
         id of this run (used for pSMAC)
-    model: AbstractEPM
+    model: BaseEPM
         empirical performance model
     acq_optimizer: AcquisitionFunctionMaximizer
         Optimizer of acquisition function.
@@ -75,6 +75,8 @@ class SMBO(object):
         Choose x_best for computing the acquisition function via the model instead of via the observations.
     min_samples_model: int
         Minimum number of samples to build a model.
+    epm_chooser_kwargs: typing.Optional[typing.Dict]
+        Additional arguments passed to epmchooser
 
     Attributes
     ----------
@@ -102,15 +104,17 @@ def __init__(
         runhistory2epm: AbstractRunHistory2EPM,
         intensifier: AbstractRacer,
         num_run: int,
-        model: AbstractEPM,
+        model: BaseEPM,
         acq_optimizer: AcquisitionFunctionMaximizer,
         acquisition_func: AbstractAcquisitionFunction,
         rng: np.random.RandomState,
         tae_runner: BaseRunner,
         restore_incumbent: Configuration = None,
-        random_configuration_chooser: RandomConfigurationChooser = ChooserNoCoolDown(modulus=2.0),
+        random_configuration_chooser: RandomChooser = ChooserNoCoolDown(modulus=2.0),
         predict_x_best: bool = True,
         min_samples_model: int = 1,
+        epm_chooser: Type[EPMChooser] = EPMChooser,
+        epm_chooser_kwargs: Optional[Dict] = None,
     ):
 
         self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
@@ -129,8 +133,10 @@ def __init__(
 
         self.initial_design_configs = []  # type: List[Configuration]
 
+        if epm_chooser_kwargs is None:
+            epm_chooser_kwargs = {}
         # TODO: consider if we need an additional EPMChooser for multi-objective optimization
-        self.epm_chooser = EPMChooser(
+        self.epm_chooser = epm_chooser(
             scenario=scenario,
             stats=stats,
             runhistory=runhistory,
@@ -143,6 +149,7 @@ def __init__(
             random_configuration_chooser=random_configuration_chooser,
             predict_x_best=predict_x_best,
             min_samples_model=min_samples_model,
+            **epm_chooser_kwargs,
         )
 
         # Internal variable - if this is set to True it will gracefully stop SMAC
@@ -295,7 +302,6 @@ def run(self) -> Configuration:
 
             # Check if there is any result, or else continue
             for run_info, result in self.tae_runner.get_finished_runs():
-
                 # Add the results of the run to the run history
                 # Additionally check for new incumbent
                 self._incorporate_run_results(run_info, result, time_left)
diff --git a/smac/optimizer/subspaces/__init__.py b/smac/optimizer/subspaces/__init__.py
new file mode 100644
index 000000000..66b305a92
--- /dev/null
+++ b/smac/optimizer/subspaces/__init__.py
@@ -0,0 +1,656 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union
+
+import copy
+import inspect
+import logging
+import math
+
+import numpy as np
+from ConfigSpace.forbidden import (
+    AbstractForbiddenComponent,
+    ForbiddenAndConjunction,
+    MultipleValueForbiddenClause,
+)
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    Constant,
+    Hyperparameter,
+    NumericalHyperparameter,
+    OrdinalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+from smac.configspace import Configuration, ConfigurationSpace
+from smac.epm.base_epm import BaseEPM
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.epm.gaussian_process.kernels.boing import construct_gp_kernel
+from smac.epm.utils import check_subspace_points
+from smac.optimizer.acquisition import EI, AbstractAcquisitionFunction
+
+
+class LocalSubspace(ABC):
+    """
+    A subspace that is designed for local Bayesian Optimization. If bounds_ss_cont and bounds_ss_cat are not given,
+    this subspace is equivalent to the original configuration space. Additionally, this subspace
+    supports local BO that only works with a subset of the dimensions, where the missing values are filled by the
+    corresponding values from incumbent_array.
+
+    Parameters
+    ----------
+    config_space: ConfigurationSpace
+        raw Configuration space
+    bounds: List[Tuple[float, float]]
+        raw bounds of the Configuration space, notice that here bounds denotes the bounds of the entire space
+    hps_types: List[int],
+        types of the hyperparameters
+    bounds_ss_cont: np.ndarray(D_cont, 2)
+        subspaces bounds of continuous hyperparameters, its length is the number of continuous hyperparameters
+    bounds_ss_cat: List[Tuple]
+        subspaces bounds of categorical hyperparameters, its length is the number of categorical hyperparameters
+    rng: np.random.RandomState
+        random state
+    model_local: ~smac.epm.base_epm.BaseEPM
+        model in subspace
+    model_local_kwargs: Optional[Dict]
+        argument for subspace model
+    acq_func_local: ~smac.optimizer.ei_optimization.AbstractAcquisitionFunction
+        local acquisition function
+    acq_func_local_kwargs: Optional[Dict]
+        argument for acquisition function
+    activate_dims: Optional[np.ndarray]
+        activate dimensions in the subspace, if it is None, we preserve all the dimensions
+    incumbent_array: Optional[np.ndarray]
+        incumbent array, used when activate_dims has less dimension and this value is used to complementary the
+        resulted configurations
+    """
+
+    def __init__(
+        self,
+        config_space: ConfigurationSpace,
+        bounds: List[Tuple[float, float]],
+        hps_types: List[int],
+        bounds_ss_cont: Optional[np.ndarray] = None,
+        bounds_ss_cat: Optional[List[Tuple]] = None,
+        model_local: Union[BaseEPM, Type[BaseEPM]] = GloballyAugmentedLocalGaussianProcess,
+        model_local_kwargs: Dict = {},
+        acq_func_local: Union[AbstractAcquisitionFunction, Type[AbstractAcquisitionFunction]] = EI,
+        acq_func_local_kwargs: Optional[Dict] = None,
+        rng: Optional[np.random.RandomState] = None,
+        initial_data: Optional[Tuple[np.ndarray, np.ndarray]] = None,
+        activate_dims: Optional[np.ndarray] = None,
+        incumbent_array: Optional[np.ndarray] = None,
+    ):
+        self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
+        self.cs_global = config_space
+        if rng is None:
+            self.rng = np.random.RandomState(1)
+        else:
+            self.rng = np.random.RandomState(rng.randint(0, 2**20))
+
+        n_hypers = len(config_space.get_hyperparameters())
+        model_types = copy.deepcopy(hps_types)
+        model_bounds = copy.deepcopy(bounds)
+
+        cat_dims = np.where(np.array(hps_types) != 0)[0]
+        cont_dims = np.where(np.array(hps_types) == 0)[0]
+
+        if activate_dims is None:
+            activate_dims = np.arange(n_hypers)
+            activate_dims_cont = cont_dims
+            activate_dims_cat = cat_dims
+            self.activate_dims = activate_dims
+            activate_dims_cont_ss = np.arange(len(activate_dims_cont))
+            activate_dims_cat_ss = np.arange(len(activate_dims_cat))
+        else:
+            activate_dims_cont, _, activate_dims_cont_ss = np.intersect1d(
+                activate_dims, cont_dims, assume_unique=True, return_indices=True
+            )
+            activate_dims_cat, _, activate_dims_cat_ss = np.intersect1d(
+                activate_dims, cat_dims, assume_unique=True, return_indices=True
+            )
+            self.activate_dims = activate_dims
+
+        self.activate_dims_cont = activate_dims_cont_ss
+        self.activate_dims_cat = activate_dims_cat_ss
+
+        lbs = np.full(n_hypers, 0.0)
+        scales = np.full(n_hypers, 1.0)
+
+        if bounds_ss_cont is None and bounds_ss_cat is None:
+            # cs_inner is cs
+            self.cs_local = config_space
+            self.new_config_space = False
+            self.bounds_ss_cont = np.tile([0.0, 1.0], [len(self.activate_dims_cont), 1])
+            self.bounds_ss_cat = []  # type: Optional[List[Tuple]]
+            self.lbs = lbs
+            self.scales = scales
+            self.new_config = False
+
+        else:
+            self.new_config = True
+            # we normalize the non-CategoricalHyperparameter by x = (x-lb)*scale
+
+            hps = config_space.get_hyperparameters()
+
+            # deal with categorical hyperaprameters
+            for i, cat_idx in enumerate(activate_dims_cat):
+                hp_cat = hps[cat_idx]  # type: CategoricalHyperparameter
+                parents = config_space.get_parents_of(hp_cat.name)
+                if len(parents) == 0:
+                    can_be_inactive = False
+                else:
+                    can_be_inactive = True
+                if bounds_ss_cat is None:
+                    n_cats = len(hp_cat.choices)
+                else:
+                    n_cats = len(bounds_ss_cat[i])
+                if can_be_inactive:
+                    n_cats = n_cats + 1
+                model_types[cat_idx] = n_cats
+                model_bounds[cat_idx] = (int(n_cats), np.nan)
+
+            # store the dimensions of numerical hyperparameters, UniformFloatHyperparameter and
+            # UniformIntegerHyperparameter
+            dims_cont_num = []
+            idx_cont_num = []
+            dims_cont_ord = []
+            idx_cont_ord = []
+            ord_hps = {}
+
+            # deal with ordinary hyperaprameters
+            for i, cont_idx in enumerate(activate_dims_cont):
+                param = hps[cont_idx]
+                if isinstance(param, OrdinalHyperparameter):
+                    parents = config_space.get_parents_of(param.name)
+                    if len(parents) == 0:
+                        can_be_inactive = False
+                    else:
+                        can_be_inactive = True
+                    if bounds_ss_cont is None:
+                        n_seqs = len(param.sequence)
+                    else:
+                        n_seqs = bounds_ss_cont[i][1] - bounds_ss_cont[i][0] + 1
+                    if can_be_inactive:
+                        model_bounds[cont_idx] = (0, int(n_seqs))
+                    else:
+                        model_bounds[cont_idx] = (0, int(n_seqs) - 1)
+                    if bounds_ss_cont is None:
+                        lbs[cont_idx] = 0  # in subspace, it should start from 0
+                        ord_hps[param.name] = (0, int(n_seqs))
+                    else:
+                        lbs[cont_idx] = bounds_ss_cont[i][0]  # in subspace, it should start from 0
+                        ord_hps[param.name] = bounds_ss_cont[i]
+                    dims_cont_ord.append(cont_idx)
+                    idx_cont_ord.append(i)
+                else:
+                    dims_cont_num.append(cont_idx)
+                    idx_cont_num.append(i)
+
+            if bounds_ss_cat is not None:
+                self.bounds_ss_cat = [bounds_ss_cat[act_dims_cat_ss] for act_dims_cat_ss in activate_dims_cat_ss]
+            else:
+                self.bounds_ss_cat = None
+            self.bounds_ss_cont = bounds_ss_cont[activate_dims_cont_ss] if bounds_ss_cont is not None else None
+
+            if bounds_ss_cont is None:
+                lbs[dims_cont_num] = 0.0
+                scales[dims_cont_num] = 1.0
+            else:
+                lbs[dims_cont_num] = bounds_ss_cont[idx_cont_num, 0]
+                # rescale numerical hyperparameters to [0., 1.]
+                scales[dims_cont_num] = 1.0 / (bounds_ss_cont[idx_cont_num, 1] - bounds_ss_cont[idx_cont_num, 0])
+
+            self.lbs = lbs[activate_dims]
+            self.scales = scales[activate_dims]
+
+            self.cs_local = ConfigurationSpace()
+            hp_list = []
+            idx_cont = 0
+            idx_cat = 0
+
+            hps = config_space.get_hyperparameters()
+
+            for idx in self.activate_dims:
+                param = hps[idx]
+                if isinstance(param, CategoricalHyperparameter):
+                    if bounds_ss_cat is None:
+                        hp_new = copy.deepcopy(param)
+                        idx_cat += 1
+                    else:
+                        choices = [param.choices[int(choice_idx)] for choice_idx in bounds_ss_cat[idx_cat]]
+                        # cat_freq_arr = np.array((cats_freq[idx_cat]))
+                        # weights = cat_freq_arr / np.sum(cat_freq_arr)
+                        hp_new = CategoricalHyperparameter(param.name, choices=choices)  # , weights=weights)
+                        idx_cat += 1
+
+                elif isinstance(param, OrdinalHyperparameter):
+                    param_seq = ord_hps.get(param.name)
+                    raw_seq = param.sequence
+                    ord_indices = np.arange(*param_seq)
+                    new_seq = [raw_seq[int(round(idx))] for idx in ord_indices]
+                    hp_new = OrdinalHyperparameter(param.name, sequence=new_seq)
+                    idx_cont += 1
+
+                elif isinstance(param, Constant):
+                    hp_new = copy.deepcopy(param)
+                elif isinstance(param, (UniformFloatHyperparameter, UniformIntegerHyperparameter)):
+                    if bounds_ss_cont is None:
+                        hp_new = copy.deepcopy(param)
+                        idx_cont += 1
+                    else:
+                        if isinstance(param, UniformFloatHyperparameter):
+                            lower = param.lower
+                            upper = param.upper
+                            if param.log:
+                                lower_log = np.log(lower)
+                                upper_log = np.log(upper)
+                                hp_new_lower = np.exp((upper_log - lower_log) * bounds_ss_cont[idx_cont][0] + lower_log)
+                                hp_new_upper = np.exp((upper_log - lower_log) * bounds_ss_cont[idx_cont][1] + lower_log)
+                                hp_new = UniformFloatHyperparameter(
+                                    name=param.name,
+                                    lower=max(hp_new_lower, lower),
+                                    upper=min(hp_new_upper, upper),
+                                    log=True,
+                                )
+                            else:
+                                hp_new_lower = (upper - lower) * bounds_ss_cont[idx_cont][0] + lower
+                                hp_new_upper = (upper - lower) * bounds_ss_cont[idx_cont][1] + lower
+                                hp_new = UniformFloatHyperparameter(
+                                    name=param.name,
+                                    lower=max(hp_new_lower, lower),
+                                    upper=min(hp_new_upper, upper),
+                                    log=False,
+                                )
+                            idx_cont += 1
+                        elif isinstance(param, UniformIntegerHyperparameter):
+                            lower = param.lower
+                            upper = param.upper
+                            if param.log:
+                                lower_log = np.log(lower)
+                                upper_log = np.log(upper)
+                                hp_new_lower = int(
+                                    math.floor(
+                                        np.exp((upper_log - lower_log) * bounds_ss_cont[idx_cont][0] + lower_log)
+                                    )
+                                )
+                                hp_new_upper = int(
+                                    math.ceil(np.exp((upper_log - lower_log) * bounds_ss_cont[idx_cont][1] + lower_log))
+                                )
+
+                                hp_new_lower_log = np.log(hp_new_lower)
+                                hp_new_upper_log = np.log(hp_new_upper)
+                                new_scale = (upper_log - lower_log) / (hp_new_upper_log - hp_new_lower_log)
+                                new_lb = (hp_new_lower_log - lower_log) / (hp_new_upper_log - hp_new_lower_log)
+
+                                self.scales[idx] = new_scale
+                                self.lbs[idx] = new_lb
+
+                                hp_new = UniformIntegerHyperparameter(
+                                    name=param.name,
+                                    lower=max(hp_new_lower, lower),
+                                    upper=min(hp_new_upper, upper),
+                                    log=True,
+                                )
+                            else:
+                                hp_new_lower = int(math.floor((upper - lower) * bounds_ss_cont[idx_cont][0])) + lower
+                                hp_new_upper = int(math.ceil((upper - lower) * bounds_ss_cont[idx_cont][1])) + lower
+
+                                new_scale = (upper - lower) / (hp_new_upper - hp_new_lower)
+                                new_lb = (hp_new_lower - lower) / (hp_new_upper - hp_new_lower)
+                                self.scales[idx] = new_scale
+                                self.lbs[idx] = new_lb
+
+                                hp_new = UniformIntegerHyperparameter(
+                                    name=param.name,
+                                    lower=max(hp_new_lower, lower),
+                                    upper=min(hp_new_upper, upper),
+                                    log=False,
+                                )
+
+                            idx_cont += 1
+                else:
+                    raise ValueError(f"Unsupported type of Hyperparameter: {type(param)}")
+                hp_list.append(hp_new)
+
+            # We only consider plain hyperparameters
+            self.cs_local.add_hyperparameters(hp_list)
+            forbiddens_ss = []
+            forbiddens = config_space.get_forbiddens()
+            for forbidden in forbiddens:
+                forbiden_ss = self.fit_forbidden_to_ss(cs_local=self.cs_local, forbidden=forbidden)
+                if forbiden_ss is not None:
+                    forbiddens_ss.append(forbiden_ss)
+            if len(forbiddens_ss) > 0:
+                self.cs_local.add_forbidden_clauses(forbiddens_ss)
+
+        model_kwargs = dict(
+            configspace=self.cs_local,
+            types=[model_types[activate_dim] for activate_dim in activate_dims] if model_types is not None else None,
+            bounds=[model_bounds[activate_dim] for activate_dim in activate_dims] if model_bounds is not None else None,
+            bounds_cont=np.array([[0, 1.0] for _ in range(len(activate_dims_cont))]),
+            bounds_cat=self.bounds_ss_cat,
+            seed=self.rng.randint(0, 2**20),
+        )
+
+        if inspect.isclass(model_local):
+            model_local_kwargs_copy = copy.deepcopy(model_local_kwargs)
+            if "kernel_kwargs" in model_local_kwargs_copy:
+                kernel_kwargs = model_local_kwargs_copy["kernel_kwargs"]
+                kernel = construct_gp_kernel(kernel_kwargs, activate_dims_cont_ss, activate_dims_cat_ss)
+                del model_local_kwargs_copy["kernel_kwargs"]
+                model_local_kwargs_copy["kernel"] = kernel
+
+            if model_local_kwargs is not None:
+                model_kwargs.update(model_local_kwargs_copy)
+
+            all_arguments = inspect.signature(model_local).parameters.keys()
+            if "bounds_cont" not in all_arguments:
+                del model_kwargs["bounds_cont"]
+            if "bounds_cat" not in all_arguments:
+                del model_kwargs["bounds_cat"]
+            model = model_local(**model_kwargs)  # type: ignore
+        else:
+            model = model_local
+
+        self.model = model
+
+        if inspect.isclass(acq_func_local):
+            acq_func_kwargs = {"model": self.model}
+            if acq_func_local_kwargs is not None:
+                acq_func_kwargs.update(acq_func_local_kwargs)
+            acquisition_function = acq_func_local(**acq_func_kwargs)  # type: ignore
+        else:
+            acquisition_function = acq_func_local
+
+        self.acquisition_function = acquisition_function
+
+        self.incumbent_array = incumbent_array
+
+        self.model_x = np.empty([0, len(activate_dims)])
+        self.ss_x = np.empty([0, len(activate_dims)])
+        self.model_y = np.empty([0, 1])
+        self.ss_y = np.empty([0, 1])
+
+        if initial_data is not None:
+            X = initial_data[0]
+            y = initial_data[1]
+
+            self.add_new_observations(X, y)
+
+        self.config_origin = "subspace"
+
+    @staticmethod
+    def fit_forbidden_to_ss(
+        cs_local: ConfigurationSpace, forbidden: AbstractForbiddenComponent
+    ) -> Optional[AbstractForbiddenComponent]:
+        """
+        Fit the forbidden to subspaces. If the target forbidden can be added to subspace, we return a new forbidden
+        with exactly the same type of the input forbidden. Otherwise, None is returned.
+
+        Parameters
+        ----------
+        cs_local: ConfigurationSpace
+            local configuration space of the subspace
+        forbidden: AbstractForbiddenComponent
+            forbidden to check
+        Returns
+        -------
+        forbidden_ss: Optional[AbstractForbiddenComponent]
+            forbidden in subspaces
+
+        """
+        if isinstance(forbidden, ForbiddenAndConjunction):
+            forbidden_ss_components = []
+            for forbid in forbidden.components:
+                # If any of the AndConjunction is not supported by the subspace, we simply ignore them
+                forbid_ss = LocalSubspace.fit_forbidden_to_ss(cs_local, forbid)
+                if forbid_ss is None:
+                    return None
+                forbidden_ss_components.append(forbid_ss)
+            return type(forbidden)(*forbidden_ss_components)
+        else:
+            forbidden_hp_name = forbidden.hyperparameter.name
+            if forbidden_hp_name not in cs_local:
+                return None
+            hp_ss = cs_local.get_hyperparameter(forbidden_hp_name)
+
+            def is_value_in_hp(value: Any, hp: Hyperparameter) -> bool:
+                """Check if the value is in the range of the hp."""
+                if isinstance(hp, NumericalHyperparameter):
+                    return hp.lower <= value <= hp.upper
+                elif isinstance(hp, OrdinalHyperparameter):
+                    return value in hp.sequence
+                elif isinstance(hp, CategoricalHyperparameter):
+                    return value in hp.choices
+                else:
+                    raise NotImplementedError("Unsupported type of hyperparameter!")
+
+            if isinstance(forbidden, MultipleValueForbiddenClause):
+                forbidden_values = forbidden.values
+                for forbidden_value in forbidden_values:
+                    if not is_value_in_hp(forbidden_value, hp_ss):
+                        return None
+                return type(forbidden)(hp_ss, forbidden_values)
+            else:
+                forbidden_value = forbidden.value
+                if is_value_in_hp(forbidden_value, hp_ss):
+                    return type(forbidden)(hp_ss, forbidden_value)
+            return None
+
+    def update_model(self, predict_x_best: bool = True, update_incumbent_array: bool = False) -> None:
+        """
+        Update the model and acquisition function parameters
+
+        Parameters
+        ----------
+        predict_x_best: bool,
+            if the incumbent is acquired by the predicted mean of a surrogate model
+        update_incumbent_array: bool
+            if the incumbent_array of this subspace is replaced with the newly updated incumbent
+        """
+        acq_func_kwargs = {"model": self.model, "num_data": len(self.ss_x)}
+
+        if predict_x_best:
+            try:
+                mu, _ = self.model.predict(self.ss_x)
+            except Exception as e:
+                # Some times it could occur that LGPGA fails to predict the mean value of ss_x because of
+                # numerical issues
+                self.logger.warning(f"Fail to predict ss_x due to {e}")
+                mu = self.ss_y
+            idx_eta = np.argmin(mu)
+            incumbent_array = self.ss_x[idx_eta]
+            acq_func_kwargs.update({"incumbent_array": incumbent_array, "eta": mu[idx_eta]})
+        else:
+            idx_eta = np.argmin(self.ss_y)
+            incumbent_array = self.ss_x[idx_eta]
+            acq_func_kwargs.update({"incumbent_array": incumbent_array, "eta": self.ss_y[idx_eta]})
+        if update_incumbent_array:
+            if self.incumbent_array is None:
+                self.incumbent_array = self.ss_x[idx_eta]
+            else:
+                self.incumbent_array[self.activate_dims] = self.ss_x[idx_eta]
+
+        self.acquisition_function.update(**acq_func_kwargs)
+
+    def add_new_observations(self, X: np.ndarray, y: np.ndarray) -> None:
+        """
+        Add new observations to the subspace
+
+        Parameters
+        ----------
+        X: np.ndarray(N,D),
+            new feature vector of the observations, constructed by the global configuration space
+        y: np.ndarray(N)
+           new performances of the observations
+        Return
+        ----------
+        indices_in_ss:np.ndarray(N)
+            indices of data that included in subspaces
+        """
+        if len(X.shape) == 1:
+            X = X[np.newaxis, :]
+        if len(y.shape) == 1:
+            y = y[:, np.newaxis]
+
+        X = X[:, self.activate_dims]
+
+        ss_indices = check_subspace_points(
+            X=X,
+            cont_dims=self.activate_dims_cont,
+            cat_dims=self.activate_dims_cat,
+            bounds_cont=self.bounds_ss_cont,
+            bounds_cat=self.bounds_ss_cat,
+        )
+
+        X = self.normalize_input(X=X)
+
+        self.model_x = np.vstack([self.model_x, X])
+        self.model_y = np.vstack([self.model_y, y])
+
+        self.ss_x = np.vstack([self.ss_x, X[ss_indices]])
+        self.ss_y = np.vstack([self.ss_y, y[ss_indices]])
+
+    def update_incumbent_array(self, new_incumbent: np.ndarray) -> None:
+        """
+        Update a new incumbent array. The array is generated from the global configuration
+
+        Parameters
+        ----------
+        new_incumbent: np.ndarray(D)
+            new incumbent, which correspondences to the global configuration
+        """
+        self.incumbent_array = self.normalize_input(X=new_incumbent)
+
+    def generate_challengers(self, **optimizer_kwargs: Any) -> Iterator:
+        """
+        Generate a list of challengers that will be transformed into the global configuration space
+
+        Parameters
+        ----------
+        optimizer_kwargs: Any
+            additional configurations passed to 'self._generate_challengers'
+
+        Returns
+        -------
+            A list of challengers in the global configuration space
+
+        """
+        challengers = self._generate_challengers(**optimizer_kwargs)
+        return ChallengerListLocal(
+            cs_local=self.cs_local,
+            cs_global=self.cs_global,
+            challengers=challengers,
+            config_origin=self.config_origin,
+            incumbent_array=self.incumbent_array,
+        )
+
+    @abstractmethod
+    def _generate_challengers(self, **optimizer_kwargs: Dict) -> List[Tuple[float, Configuration]]:
+        """Generate new challengers list for this subspace"""
+        raise NotImplementedError
+
+    def normalize_input(self, X: np.ndarray) -> np.ndarray:
+        """
+        Normalize X to fit the local configuration space
+
+        Parameters
+        ----------
+        X: np.ndarray(N,D)
+            input X, configurations arrays
+        Returns
+        -------
+        X_normalized: np.ndarray(N,D)
+            normalized input X
+        """
+        if not self.new_config:
+            return X
+
+        if len(X.shape) == 1:
+            X = X[np.newaxis, :]
+
+        # normalize X
+        X_normalized = (X - self.lbs) * self.scales
+        if self.bounds_ss_cat is not None:
+            # normalize categorical function, for instance, if bounds_subspace[i] is a categorical bound contains
+            # elements [1, 3, 5], then we map 1->0, 3->1, 5->2
+            for cat_idx, cat_bound in zip(self.activate_dims_cat, self.bounds_ss_cat):
+                X_i = X_normalized[:, cat_idx]
+                cond_list = [X_i == cat for cat in cat_bound]
+                choice_list = np.arange(len(cat_bound))
+                X_i = np.select(cond_list, choice_list)
+                X_normalized[:, cat_idx] = X_i
+
+        return X_normalized
+
+
+class ChallengerListLocal(Iterator):
+    def __init__(
+        self,
+        cs_local: ConfigurationSpace,
+        cs_global: ConfigurationSpace,
+        challengers: List[Tuple[float, Configuration]],
+        config_origin: str,
+        incumbent_array: Optional[np.ndarray] = None,
+    ):
+        """
+        A Challenger list to convert the configuration from the local configuration space to the global configuration
+         space
+
+        Parameters
+        ----------
+        cs_local: ConfigurationSpace
+            local configuration space
+        cs_global: ConfigurationSpace
+            global configuration space
+        challengers: List[Tuple[float, Configuration]],
+            challenger lists
+        config_origin: str
+            configuration origin
+        incumbent_array: Optional[np.ndarray] = None,
+            global incumbent array, used when cs_local and cs_global have different number of dimensions and we need to
+            supplement the missing values.
+        """
+        self.cs_local = cs_local
+        self.challengers = challengers
+        self.cs_global = cs_global
+        self._index = 0
+        self.config_origin = config_origin
+        # In case cs_in and cs_out have different dimensions
+        self.expand_dims = len(cs_global.get_hyperparameters()) != len(cs_local.get_hyperparameters())
+        self.incumbent_array = incumbent_array
+
+        if self.expand_dims and self.incumbent_array is None:
+            raise ValueError(
+                "Incumbent array must be provided if the global configuration space has more "
+                "hyperparameters then the local configuration space"
+            )
+
+    def __next__(self) -> Configuration:
+        if self.challengers is not None and self._index == len(self.challengers):
+            raise StopIteration
+        challenger = self.challengers[self._index][1]
+        self._index += 1
+        value = challenger.get_dictionary()
+        if self.expand_dims:
+            incumbent_array = Configuration(
+                configuration_space=self.cs_global, vector=self.incumbent_array
+            ).get_dictionary()
+            # we replace the cooresponding value in incumbent array with the value suggested by our optimizer
+            for k in value.keys():
+                incumbent_array[k] = value[k]
+            config = Configuration(configuration_space=self.cs_global, values=incumbent_array)
+        else:
+            config = Configuration(configuration_space=self.cs_global, values=value)
+        if self.config_origin is not None:
+            config.origin = self.config_origin
+        else:
+            config.origin = challenger.origin
+        return config
+
+    def __len__(self) -> int:
+        if self.challengers is None:
+            self.challengers = []
+        return len(self.challengers) - self._index
diff --git a/smac/optimizer/subspaces/boing_subspace.py b/smac/optimizer/subspaces/boing_subspace.py
new file mode 100644
index 000000000..b9c55fd12
--- /dev/null
+++ b/smac/optimizer/subspaces/boing_subspace.py
@@ -0,0 +1,175 @@
+from typing import Dict, List, Optional, Tuple, Type, Union
+
+import inspect
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+
+from smac.configspace import Configuration
+from smac.epm.base_epm import BaseEPM
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.optimizer.acquisition import EI, AbstractAcquisitionFunction
+from smac.optimizer.acquisition.maximizer import (
+    AcquisitionFunctionMaximizer,
+    LocalAndSortedRandomSearch,
+)
+from smac.optimizer.subspaces import LocalSubspace
+
+
+class BOinGSubspace(LocalSubspace):
+    """
+    Subspace for BOinG optimizer. Each time we create a new epm model for the subspace and optimize to maximize the
+    acquisition function inside this subregion.
+
+    Parameters
+    ----------
+    acq_optimizer_local: Optional[AcquisitionFunctionMaximizer]
+        Subspace optimizer, used to give a set of suggested points. Unlike the optimizer implemented in epm_chooser,
+        this optimizer does not require runhistory objects.
+    acq_optimizer_local_kwargs
+        Parameters for acq_optimizer_local
+    """
+
+    def __init__(
+        self,
+        config_space: ConfigurationSpace,
+        bounds: List[Tuple[float, float]],
+        hps_types: List[int],
+        bounds_ss_cont: Optional[np.ndarray] = None,
+        bounds_ss_cat: Optional[List[Tuple]] = None,
+        model_local: Union[BaseEPM, Type[BaseEPM]] = GloballyAugmentedLocalGaussianProcess,
+        model_local_kwargs: Dict = {},
+        acq_func_local: Union[AbstractAcquisitionFunction, Type[AbstractAcquisitionFunction]] = EI,
+        acq_func_local_kwargs: Optional[Dict] = None,
+        rng: Optional[np.random.RandomState] = None,
+        initial_data: Optional[Tuple[np.ndarray, np.ndarray]] = None,
+        activate_dims: Optional[np.ndarray] = None,
+        incumbent_array: Optional[np.ndarray] = None,
+        acq_optimizer_local: Optional[AcquisitionFunctionMaximizer] = None,
+        acq_optimizer_local_kwargs: Optional[dict] = None,
+    ):
+        super(BOinGSubspace, self).__init__(
+            config_space=config_space,
+            bounds=bounds,
+            hps_types=hps_types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=model_local,
+            model_local_kwargs=model_local_kwargs,
+            acq_func_local=acq_func_local,
+            acq_func_local_kwargs=acq_func_local_kwargs,
+            rng=rng,
+            initial_data=initial_data,
+            activate_dims=activate_dims,
+            incumbent_array=incumbent_array,
+        )
+        if bounds_ss_cont is None and bounds_ss_cat is None:
+            self.config_origin = None  # type: ignore
+        else:
+            self.config_origin = "BOinG"
+        if isinstance(self.model, GloballyAugmentedLocalGaussianProcess):
+            num_inducing_points = min(max(min(2 * len(self.activate_dims_cont), 10), self.model_x.shape[0] // 20), 50)
+            self.model.update_attribute(num_inducing_points=num_inducing_points)
+
+        subspace_acq_func_opt_kwargs = {
+            "acquisition_function": self.acquisition_function,
+            "config_space": self.cs_local,  # type: ignore[attr-defined] # noqa F821
+            "rng": np.random.RandomState(self.rng.randint(1, 2**20)),
+        }
+
+        if isinstance(acq_optimizer_local, AcquisitionFunctionMaximizer):
+            # we copy the attribute of the local acquisition function optimizer but replace it with our local model
+            # setting. This helps a better exploration in the beginning.
+            for key in inspect.signature(acq_optimizer_local.__init__).parameters.keys():  # type: ignore[misc]
+                if key == "self":
+                    continue
+                elif key in subspace_acq_func_opt_kwargs:
+                    continue
+                elif hasattr(acq_func_local, key):
+                    subspace_acq_func_opt_kwargs[key] = getattr(acq_func_local, key)
+            self.acq_optimizer_local = type(acq_optimizer_local)(**subspace_acq_func_opt_kwargs)
+        else:
+            if acq_optimizer_local is None:
+                acq_optimizer_local = LocalAndSortedRandomSearch  # type: ignore
+                if acq_optimizer_local_kwargs is not None:
+                    subspace_acq_func_opt_kwargs.update(acq_optimizer_local_kwargs)
+                else:
+                    # Here are the setting used by squirrel-optimizer
+                    # https://github.com/automl/Squirrel-Optimizer-BBO-NeurIPS20-automlorg/blob/main/squirrel-optimizer/smac_optim.py
+                    n_sls_iterations = {
+                        1: 10,
+                        2: 10,
+                        3: 10,
+                        4: 10,
+                        5: 10,
+                        6: 10,
+                        7: 8,
+                        8: 6,
+                    }.get(len(self.cs_local.get_hyperparameters()), 5)
+
+                    subspace_acq_func_opt_kwargs.update(
+                        {"n_steps_plateau_walk": 5, "n_sls_iterations": n_sls_iterations}
+                    )
+
+            elif inspect.isclass(acq_optimizer_local, AcquisitionFunctionMaximizer):
+                subspace_acq_func_opt_kwargs.update(acq_optimizer_local_kwargs)
+            else:
+                raise TypeError(
+                    f"subspace_optimizer must be None or an object implementing the "
+                    f"AcquisitionFunctionMaximizer, but is '{acq_optimizer_local}'"
+                )
+
+            self.acq_optimizer_local = acq_optimizer_local(**subspace_acq_func_opt_kwargs)  # type: ignore
+
+    def _generate_challengers(self, **optimizer_kwargs: Dict) -> List[Tuple[float, Configuration]]:
+        """
+        Generate new challengers list for this subspace. This optimizer is similar to
+        smac.optimizer.ei_optimization.LocalAndSortedRandomSearch except that we don't read the past evaluated
+        information from the runhistory but directly assign new values to the
+        """
+        self.model.train(self.model_x, self.model_y)
+        self.update_model(predict_x_best=True, update_incumbent_array=True)
+        num_points_rs = 1000
+
+        if isinstance(self.acq_optimizer_local, LocalAndSortedRandomSearch):
+            next_configs_random = self.acq_optimizer_local.random_search._maximize(
+                runhistory=None,  # type: ignore
+                stats=None,  # type: ignore
+                num_points=num_points_rs,
+                _sorted=True,
+            )
+            if len(self.ss_x) == 0:
+                init_points_local = self.cs_local.sample_configuration(size=self.acq_optimizer_local.n_sls_iterations)
+            else:
+                previous_configs = [Configuration(configuration_space=self.cs_local, vector=ss_x) for ss_x in self.ss_x]
+                init_points_local = self.acq_optimizer_local.local_search._get_init_points_from_previous_configs(
+                    self.acq_optimizer_local.n_sls_iterations, previous_configs, next_configs_random
+                )
+
+            configs_acq_local = self.acq_optimizer_local.local_search._do_search(init_points_local)
+
+            # shuffle for random tie-break
+            self.rng.shuffle(configs_acq_local)
+
+            # sort according to acq value
+            configs_acq_local.sort(reverse=True, key=lambda x: x[0])
+
+            for _, inc in configs_acq_local:
+                inc.origin = "Local Search"
+
+            # Having the configurations from random search, sorted by their
+            # acquisition function value is important for the first few iterations
+            # of SMAC. As long as the random forest predicts constant value, we
+            # want to use only random configurations. Having them at the begging of
+            # the list ensures this (even after adding the configurations by local
+            # search, and then sorting them)
+            next_configs_by_acq_value = next_configs_random + configs_acq_local
+
+            next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0])
+            self.logger.debug(
+                "First 5 acq func (origin) values of selected configurations: %s",
+                str([[_[0], _[1].origin] for _ in next_configs_by_acq_value[:5]]),
+            )
+            return next_configs_by_acq_value
+        else:
+            return self.acq_optimizer_local._maximize(None, None, num_points_rs)  # type: ignore
diff --git a/smac/optimizer/subspaces/turbo_subspace.py b/smac/optimizer/subspaces/turbo_subspace.py
new file mode 100644
index 000000000..ddacebf46
--- /dev/null
+++ b/smac/optimizer/subspaces/turbo_subspace.py
@@ -0,0 +1,319 @@
+from typing import Dict, List, Optional, Tuple, Type, Union
+
+import math
+import warnings
+
+import numpy as np
+from ConfigSpace.hyperparameters import NumericalHyperparameter
+from ConfigSpace.util import deactivate_inactive_hyperparameters
+from scipy.stats.qmc import LatinHypercube, Sobol
+
+from smac.configspace import Configuration, ConfigurationSpace
+from smac.epm.base_epm import BaseEPM
+from smac.epm.gaussian_process import GaussianProcess
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.epm.gaussian_process.gpytorch import GPyTorchGaussianProcess
+from smac.epm.gaussian_process.mcmc import MCMCGaussianProcess
+from smac.optimizer.acquisition import TS, AbstractAcquisitionFunction
+from smac.optimizer.subspaces import LocalSubspace
+
+warnings.filterwarnings("ignore", message="The balance properties of Sobol' points require" " n to be a power of 2.")
+
+
+class TuRBOSubSpace(LocalSubspace):
+    """
+    Subspace designed for TurBO:
+        D. Eriksson et al. Scalable Global Optimization via Local Bayesian Optimization
+        https://proceedings.neurips.cc/paper/2019/hash/6c990b7aca7bc7058f5e98ea909e924b-Abstract.html
+
+    The hyperparameters follow the illustration under supplementary D, `TuRBO details`.
+
+    Parameters
+    ----------
+    length_init: float
+        initialized length of subspace
+    length_min: float
+        the minimal length of subspace, if the subspace has a length smaller than this value, turbo will restart
+    length_max: float
+        the maximal length of subspace
+    success_tol: float
+       the number of successive successful evaluations required for expanding the subregion
+    failure_tol_min: float
+       the minimal number of successive successful evaluations required for shrinking the subregion (otherwise
+       this value is set as number of feature dimensions)
+    n_init_x_params: int
+        how many configurations will be used at most in the initial design (X*D). Used for restarting the subspace
+    n_candidate_max: int
+        The maximal Number of points used as candidates
+    """
+
+    def __init__(
+        self,
+        config_space: ConfigurationSpace,
+        bounds: List[Tuple[float, float]],
+        hps_types: List[int],
+        bounds_ss_cont: Optional[np.ndarray] = None,
+        bounds_ss_cat: Optional[List[Tuple]] = None,
+        model_local: Union[BaseEPM, Type[BaseEPM]] = GPyTorchGaussianProcess,
+        model_local_kwargs: Dict = {},
+        acq_func_local: Union[AbstractAcquisitionFunction, Type[AbstractAcquisitionFunction]] = TS,
+        acq_func_local_kwargs: Optional[Dict] = None,
+        rng: Optional[np.random.RandomState] = None,
+        initial_data: Optional[Tuple[np.ndarray, np.ndarray]] = None,
+        activate_dims: Optional[np.ndarray] = None,
+        incumbent_array: Optional[np.ndarray] = None,
+        length_init: float = 0.8,
+        length_min: float = 0.5**7,
+        length_max: float = 1.6,
+        success_tol: int = 3,
+        failure_tol_min: int = 4,
+        n_init_x_params: int = 2,
+        n_candidate_max: int = 5000,
+    ):
+        self.num_valid_observations = 0
+        super(TuRBOSubSpace, self).__init__(
+            config_space=config_space,
+            bounds=bounds,
+            hps_types=hps_types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=model_local,
+            model_local_kwargs=model_local_kwargs,
+            acq_func_local=acq_func_local,
+            acq_func_local_kwargs=acq_func_local_kwargs,
+            rng=rng,
+            initial_data=initial_data,
+            activate_dims=activate_dims,
+            incumbent_array=incumbent_array,
+        )
+        hps = config_space.get_hyperparameters()
+        for hp in hps:
+            if not isinstance(hp, NumericalHyperparameter):
+                raise ValueError("Current TurBO Optimizer only supports Numerical Hyperparameters")
+        if len(config_space.get_conditions()) > 0 or len(config_space.get_forbiddens()) > 0:
+            raise ValueError("Currently TurBO does not support Conditional or Forbidden Hyperparameters")
+
+        n_hps = len(self.activate_dims)
+        self.n_dims = n_hps
+        self.n_init = n_init_x_params * self.n_dims
+        self.n_candidates = min(100 * n_hps, n_candidate_max)
+
+        self.failure_tol = max(failure_tol_min, n_hps)
+        self.success_tol = success_tol
+        self.length = length_init
+        self.length_init = length_init
+        self.length_min = length_min
+        self.length_max = length_max
+        self._restart_turbo(n_init_points=self.n_init)
+
+        if initial_data is not None:
+            self.add_new_observations(initial_data[0], initial_data[1])
+            self.init_configs = []  # type: List[Configuration]
+
+        self.lb = np.zeros(self.n_dims)
+        self.ub = np.ones(self.n_dims)
+        self.config_origin = "TuRBO"
+
+    def _restart_turbo(
+        self,
+        n_init_points: int,
+    ) -> None:
+        """
+        Restart TurBO with a certain number of initialized points. New points are initialized with latin hypercube
+
+        Parameters
+        ----------
+        n_init_points: int
+            number of points required for initializing a new subspace
+        """
+        self.logger.debug("Current length is smaller than the minimal value, a new TuRBO restarts")
+        self.success_count = 0
+        self.failure_count = 0
+
+        self.num_eval_this_round = 0
+        self.last_incumbent_value = np.inf
+        self.length = self.length_init
+
+        self.num_valid_observations = 0
+
+        init_vectors = LatinHypercube(d=self.n_dims, seed=np.random.seed(self.rng.randint(1, 2**20))).random(
+            n=n_init_points
+        )
+
+        self.init_configs = [Configuration(self.cs_local, vector=init_vector) for init_vector in init_vectors]
+
+    def adjust_length(self, new_observation: Union[float, np.ndarray]) -> None:
+        """
+        Adjust the subspace length according to the performance of the latest suggested values
+        Parameters
+        ----------
+        new_observation: Union[float, np.ndarray]
+            new observations
+        """
+        # see Section 2: 'Trust regions'
+        optim_observation = new_observation if np.isscalar(new_observation) else np.min(new_observation)
+
+        # We define a ``success'' as a candidate that improves upon $\xbest$, and a ``failure'' as a candidate that
+        # does not.
+        if optim_observation < np.min(self.model_y) - 1e-3 * math.fabs(np.min(self.model_y)):
+            self.logger.debug("New suggested value is better than the incumbent, success_count increases")
+            self.success_count += 1
+            self.failure_count = 0
+        else:
+            self.logger.debug("New suggested value is worse than the incumbent, failure_count increases")
+            self.success_count = 0
+            self.failure_count += 1
+
+        # After $\tau_{\text{succ}}$ consecutive successes, we double the size of the TR,
+        # i.e., $\len \gets \min\{\len_{\textrm{max}}, 2\len\}$.
+        if self.success_count == self.success_tol:  # Expand trust region
+            self.length = min([2.0 * self.length, self.length_max])
+            self.success_count = 0
+            self.logger.debug(f"Subspace length expands to {self.length}")
+        # After $\tau_{\text{fail}}$ consecutive failures, we halve the size of the TR: $\len \gets \len/2$.
+        # We reset the success and failure counters to zero after we change the size of the TR.
+        elif self.failure_count == self.failure_tol:  # Shrink trust region
+            self.length /= 2.0
+            self.failure_count = 0
+            self.logger.debug(f"Subspace length shrinks to {self.length}")
+
+    def _generate_challengers(  # type: ignore[override]
+        self, _sorted: bool = True
+    ) -> List[Tuple[float, Configuration]]:
+        """
+        Generate new challengers list for this subspace
+
+        Parameters
+        ----------
+        _sorted: bool
+            if the generated challengers are sorted by their acquisition function values
+        """
+        if len(self.init_configs) > 0:
+            config_next = self.init_configs.pop()
+            return [(0, config_next)]
+
+        if self.length < self.length_min:
+            self._restart_turbo(n_init_points=self.n_init)
+            config_next = self.init_configs.pop()
+            return [(0, config_next)]
+
+        self.model.train(self.model_x[-self.num_valid_observations :], self.model_y[-self.num_valid_observations :])
+        self.update_model(predict_x_best=False, update_incumbent_array=True)
+
+        sobol_gen = Sobol(d=self.n_dims, scramble=True, seed=self.rng.randint(low=0, high=10000000))
+        sobol_seq = sobol_gen.random(self.n_candidates)
+
+        # adjust length according to kernel length
+        if isinstance(
+            self.model,
+            (GaussianProcess, MCMCGaussianProcess, GloballyAugmentedLocalGaussianProcess, GPyTorchGaussianProcess),
+        ):
+            if isinstance(self.model, GaussianProcess):
+                kernel_length = np.exp(self.model.hypers[1:-1])
+            elif isinstance(self.model, MCMCGaussianProcess):
+                kernel_length = np.exp(np.mean((np.array(self.model.hypers)[:, 1:-1]), axis=0))
+            elif isinstance(self.model, (GPyTorchGaussianProcess, GloballyAugmentedLocalGaussianProcess)):
+                kernel_length = self.model.kernel.base_kernel.lengthscale.cpu().detach().numpy()
+
+            # See section 'Trust regions' of section 2
+            #  $\len_i = \lambda_i L / (\prod_{j=1}^d \lambda_j)^{1/d}$,
+            # We now have weights.prod() = 1
+            # This makes the result more stable
+            subspace_scale = kernel_length / np.prod(np.power(kernel_length, 1.0 / self.n_dims))
+
+            subspace_length = self.length * subspace_scale
+
+            subspace_lb = np.clip(self.incumbent_array - subspace_length * 0.5, 0.0, 1.0)
+            subspace_ub = np.clip(self.incumbent_array + subspace_length * 0.5, 0.0, 1.0)
+            sobol_seq = sobol_seq * (subspace_ub - subspace_lb) + subspace_lb
+
+        prob_perturb = min(20.0 / self.n_dims, 1.0)
+        design = self._perturb_samples(prob_perturb, sobol_seq)
+
+        # Only numerical hyperpameters are considered for TuRBO, we don't need to transfer the vectors to fit the
+        # requirements of other sorts of hyperparameters
+        configs = []
+        for vector in design:
+            conf = deactivate_inactive_hyperparameters(
+                configuration=None, configuration_space=self.cs_local, vector=vector
+            )
+            configs.append(conf)
+
+        if _sorted:
+            return self._sort_configs_by_acq_value(configs)
+        else:
+            return [(0, configs[i]) for i in range(len(configs))]
+
+    def _perturb_samples(self, prob_perturb: float, design: np.ndarray) -> np.ndarray:
+        """
+        See Supplementary D, 'TuRBO details':
+        In order to not perturb all coordinates at once, we use the value in the Sobol sequence
+        with probability min{1,20/d} for a given candidate and dimension, and the value of the center otherwise
+
+        perturb the generated design with the incumbent array accordingly
+
+        Parameters
+        ----------
+        prob_perturb: float
+            probability that a design is perturbed by the incumbent value
+        design: np.ndarray(self.n_candidates, self.n_dims)
+            design array to be perturbed
+        Returns
+        -------
+        design_perturbed: np.ndarray(self.n_candidates, self.n_dims)
+            perturbed design array
+        """
+        # we will use masked array, thus the indices that will be replaced will be marked with True
+        mask = self.rng.rand(self.n_candidates, self.n_dims) > prob_perturb
+
+        ind = np.where(np.sum(mask, axis=1) == self.n_dims)[0]
+        if self.n_dims == 1:
+            mask[ind, 0] = 0
+        else:
+            # ensure that no candidate will be completely replaced by the incumbent value
+            mask[ind, self.rng.randint(0, self.n_dims, size=len(ind))] = 0
+        return np.ma.array(design, mask=mask, fill_value=self.incumbent_array).filled()
+
+    def add_new_observations(self, X: np.ndarray, y: np.ndarray) -> None:
+        """
+        Add new observations to the subspace, meanwhile, we add the number of valid observation to ensure that the
+        subspace could be scaled properly.
+
+        Parameters
+        ----------
+        X: np.ndarray(N,D),
+            new feature vector of the observations, constructed by the global configuration space
+        y: np.ndarray(N)
+           new performances of the observations
+        Return
+        ----------
+        indices_in_ss:np.ndarray(N)
+            indices of data that included in subspaces
+        """
+        super(TuRBOSubSpace, self).add_new_observations(X, y)
+        self.num_valid_observations += len(y)
+
+    def _sort_configs_by_acq_value(self, configs: List[Configuration]) -> List[Tuple[float, Configuration]]:
+        """Sort the given configurations by acquisition value
+        comes from smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
+
+        Parameters
+        ----------
+        configs : list(Configuration)
+
+        Returns
+        -------
+        list: (acquisition value, Candidate solutions),
+                ordered by their acquisition function value
+        """
+        acq_values = self.acquisition_function(configs)
+
+        # From here
+        # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
+        random = self.rng.rand(len(acq_values))
+        # Last column is primary sort key!
+        indices = np.lexsort((random.flatten(), acq_values.flatten()))
+
+        # Cannot use zip here because the indices array cannot index the
+        # rand_configs list, because the second is a pure python list
+        return [(acq_values[ind][0], configs[ind]) for ind in indices[::-1]]
diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py
index a84a06a7c..75a44d292 100644
--- a/smac/runhistory/runhistory.py
+++ b/smac/runhistory/runhistory.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import (
     Any,
     Dict,
@@ -19,9 +21,9 @@
 import numpy as np
 
 from smac.configspace import Configuration, ConfigurationSpace
+from smac.multi_objective.utils import normalize_costs
 from smac.tae import StatusType
 from smac.utils.logging import PickableLoggerAdapter
-from smac.utils.multi_objective import normalize_costs
 
 __author__ = "Marius Lindauer"
 __copyright__ = "Copyright 2015, ML4AAD"
@@ -30,6 +32,8 @@
 __email__ = "lindauer@cs.uni-freiburg.de"
 __version__ = "0.0.1"
 
+logger = PickableLoggerAdapter(__name__)
+
 
 # NOTE class instead of collection to have a default value for budget in RunKey
 class RunKey(collections.namedtuple("RunKey", ["config_id", "instance_id", "seed", "budget"])):
@@ -192,9 +196,9 @@ def __init__(
         self._n_id = 0
 
         # Stores cost for each configuration ID
-        self._cost_per_config = {}  # type: Dict[int, float]
+        self._cost_per_config = {}  # type: Dict[int, float | list[float]]
         # Stores min cost across all budgets for each configuration ID
-        self._min_cost_per_config = {}  # type: Dict[int, float]
+        self._min_cost_per_config = {}  # type: Dict[int, float | list[float]]
         # runs_per_config maps the configuration ID to the number of runs for that configuration
         # and is necessary for computing the moving average
         self.num_runs_per_config = {}  # type: Dict[int, int]
@@ -207,111 +211,35 @@ def __init__(
         self.num_obj = -1  # type: int
         self.objective_bounds = []  # type: List[Tuple[float, float]]
 
-    def add(
-        self,
-        config: Configuration,
-        cost: Union[int, float, list, np.ndarray],
-        time: float,
-        status: StatusType,
-        instance_id: Optional[str] = None,
-        seed: Optional[int] = None,
-        budget: float = 0.0,
-        starttime: float = 0.0,
-        endtime: float = 0.0,
-        additional_info: Optional[Dict] = None,
-        origin: DataOrigin = DataOrigin.INTERNAL,
-        force_update: bool = False,
-    ) -> None:
-        """Adds a data of a new target algorithm (TA) run; it will update data if the same key
-        values are used (config, instance_id, seed)
-
-        Parameters
-        ----------
-            config : dict (or other type -- depending on config space module)
-                Parameter configuration
-            cost: Union[int, float, list, np.ndarray]
-                Cost of TA run (will be minimized)
-            time: float
-                Runtime of TA run
-            status: str
-                Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
-            instance_id: str
-                String representing an instance (default: None)
-            seed: int
-                Random seed used by TA (default: None)
-            budget: float
-                budget (cutoff) used in intensifier to limit TA (default: 0)
-            starttime: float
-                starting timestamp of TA evaluation
-            endtime: float
-                ending timestamp of TA evaluation
-            additional_info: dict
-                Additional run infos (could include further returned
-                information from TA or fields such as start time and host_id)
-            origin: DataOrigin
-                Defines how data will be used.
-            force_update: bool (default: False)
-                Forces the addition of a config to the history
-        """
-        if config is None:
-            raise TypeError("Configuration to add to the runhistory must not be None")
-        elif not isinstance(config, Configuration):
-            raise TypeError(
-                "Configuration to add to the runhistory is not of type Configuration, but %s" % type(config)
-            )
+    def __contains__(self, k: object) -> bool:
+        """Dictionary semantics for `k in runhistory`"""
+        return k in self.data
 
-        # Squeeze is important to reduce arrays with one element
-        # to scalars.
-        cost = np.asarray(cost).squeeze()
+    def __getitem__(self, k: RunKey) -> RunValue:
+        """Dictionary semantics for `v = runhistory[k]`"""
+        return self.data[k]
 
-        # Get the config id
-        config_id_tmp = self.config_ids.get(config)
-        if config_id_tmp is None:
-            self._n_id += 1
-            self.config_ids[config] = self._n_id
-            config_id = cast(int, self.config_ids.get(config))
-            self.ids_config[self._n_id] = config
-        else:
-            config_id = cast(int, config_id_tmp)
+    def __iter__(self) -> Iterator[RunKey]:
+        """Dictionary semantics for `for k in runhistory.keys()`, enables.
 
-        if self.num_obj == -1:
-            self.num_obj = np.size(cost)
-        else:
-            if np.size(cost) != self.num_obj:
-                raise ValueError(
-                    f"Cost is not of the same length ({np.size(cost)}) as the number " f"of objectives ({self.num_obj})"
-                )
+        .items()
+        """
+        return iter(self.data.keys())
 
-        k = RunKey(config_id, instance_id, seed, budget)
-        v = RunValue(cost.tolist(), time, status, starttime, endtime, additional_info)
+    def __len__(self) -> int:
+        """Enables the `len(runhistory)`"""
+        return len(self.data)
 
-        # Construct keys and values for the data dictionary
-        for key, value in (
-            ("config", config.get_dictionary()),
-            ("config_id", config_id),
-            ("instance_id", instance_id),
-            ("seed", seed),
-            ("budget", budget),
-            ("cost", cost.tolist()),
-            ("time", time),
-            ("status", status),
-            ("starttime", starttime),
-            ("endtime", endtime),
-            ("additional_info", additional_info),
-            ("origin", config.origin),
-        ):
-            self._check_json_serializable(key, value, EnumEncoder, k, v)
+    def empty(self) -> bool:
+        """Check whether or not the RunHistory is empty.
 
-        # Each runkey is supposed to be used only once. Repeated tries to add
-        # the same runkey will be ignored silently if not capped.
-        if self.overwrite_existing_runs or force_update or self.data.get(k) is None:
-            self._add(k, v, status, origin)
-        elif status != StatusType.CAPPED and self.data[k].status == StatusType.CAPPED:
-            # overwrite capped runs with uncapped runs
-            self._add(k, v, status, origin)
-        elif status == StatusType.CAPPED and self.data[k].status == StatusType.CAPPED and cost > self.data[k].cost:
-            # overwrite if censored with a larger cutoff
-            self._add(k, v, status, origin)
+        Returns
+        -------
+        emptiness: bool
+            True if runs have been added to the RunHistory,
+            False otherwise
+        """
+        return len(self.data) == 0
 
     def _check_json_serializable(
         self,
@@ -384,24 +312,174 @@ def _add(self, k: RunKey, v: RunValue, status: StatusType, origin: DataOrigin) -
                 # append new budget to existing inst-seed-key dict
                 self._configid_to_inst_seed_budget[k.config_id][is_k].append(k.budget)
 
-            # Update costs in multi-objective setting s.t. all costs are
-            # normalized the same.
-            # TODO: This is only a temporary solution because the caching is not used.
-            if self.num_obj > 1:
-                self.update_all_costs()
+            # if budget is used, then update cost instead of incremental updates
+            if not self.overwrite_existing_runs and k.budget == 0:
+                # assumes an average across runs as cost function aggregation, this is used for
+                # algorithm configuration (incremental updates are used to save time as getting the
+                # cost for > 100 instances is high)
+                self.incremental_update_cost(self.ids_config[k.config_id], v.cost)
             else:
-                # if budget is used, then update cost instead of incremental updates
-                if not self.overwrite_existing_runs and k.budget == 0:
-                    # assumes an average across runs as cost function aggregation, this is used for
-                    # algorithm configuration (incremental updates are used to save time as getting the
-                    # cost for > 100 instances is high)
-                    self.incremental_update_cost(self.ids_config[k.config_id], v.cost)
-                else:
-                    # this is when budget > 0 (only successive halving and hyperband so far)
-                    self.update_cost(config=self.ids_config[k.config_id])
-                    if k.budget > 0:
-                        if self.num_runs_per_config[k.config_id] != 1:  # This is updated in update_cost
-                            raise ValueError("This should not happen!")
+                # this is when budget > 0 (only successive halving and hyperband so far)
+                self.update_cost(config=self.ids_config[k.config_id])
+                if k.budget > 0:
+                    if self.num_runs_per_config[k.config_id] != 1:  # This is updated in update_cost
+                        raise ValueError("This should not happen!")
+
+    def _cost(
+        self,
+        config: Configuration,
+        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
+    ) -> list[list[float] | list[list[float]]]:
+        """Returns a list of all costs for the given config for further calculations.
+        The costs are directly taken from the runhistory data.
+
+        Parameters
+        ----------
+        config : Configuration
+            Configuration to calculate objective for.
+        instance_seed_budget_keys : list, optional (default=None)
+            List of tuples of instance-seeds-budget keys. If None, the run_history is
+            queried for all runs of the given configuration.
+
+        Returns
+        -------
+        Costs: list[list[float] | list[list[float]]]
+            List of all found costs. In case of multi-objective, the list contains lists.
+        """
+        try:
+            id_ = self.config_ids[config]
+        except KeyError:  # challenger was not running so far
+            return []
+
+        if instance_seed_budget_keys is None:
+            instance_seed_budget_keys = self.get_runs_for_config(config, only_max_observed_budget=True)
+
+        costs = []
+        for i, r, b in instance_seed_budget_keys:
+            k = RunKey(id_, i, r, b)
+            costs.append(self.data[k].cost)
+
+        return costs
+
+    def add(
+        self,
+        config: Configuration,
+        cost: Union[int, float, list, np.ndarray],
+        time: float,
+        status: StatusType,
+        instance_id: Optional[str] = None,
+        seed: Optional[int] = None,
+        budget: float = 0.0,
+        starttime: float = 0.0,
+        endtime: float = 0.0,
+        additional_info: Optional[Dict] = None,
+        origin: DataOrigin = DataOrigin.INTERNAL,
+        force_update: bool = False,
+    ) -> None:
+        """Adds a data of a new target algorithm (TA) run; it will update data if the same key
+        values are used (config, instance_id, seed)
+
+        Parameters
+        ----------
+        config : dict (or other type -- depending on config space module)
+            Parameter configuration
+        cost: Union[int, float, list, np.ndarray]
+            Cost of TA run (will be minimized)
+        time: float
+            Runtime of TA run
+        status: str
+            Status in {SUCCESS, TIMEOUT, CRASHED, ABORT, MEMOUT}
+        instance_id: str
+            String representing an instance (default: None)
+        seed: int
+            Random seed used by TA (default: None)
+        budget: float
+            budget (cutoff) used in intensifier to limit TA (default: 0)
+        starttime: float
+            starting timestamp of TA evaluation
+        endtime: float
+            ending timestamp of TA evaluation
+        additional_info: dict
+            Additional run infos (could include further returned
+            information from TA or fields such as start time and host_id)
+        origin: DataOrigin
+            Defines how data will be used.
+        force_update: bool (default: False)
+            Forces the addition of a config to the history
+        """
+        if config is None:
+            raise TypeError("Configuration to add to the runhistory must not be None")
+        elif not isinstance(config, Configuration):
+            raise TypeError(
+                "Configuration to add to the runhistory is not of type Configuration, but %s" % type(config)
+            )
+
+        # Squeeze is important to reduce arrays with one element
+        # to scalars.
+        cost_array = np.asarray(cost).squeeze()
+        num_obj = np.size(cost_array)
+
+        # Get the config id
+        config_id_tmp = self.config_ids.get(config)
+        if config_id_tmp is None:
+            self._n_id += 1
+            self.config_ids[config] = self._n_id
+            config_id = cast(int, self.config_ids.get(config))
+            self.ids_config[self._n_id] = config
+        else:
+            config_id = cast(int, config_id_tmp)
+
+        if self.num_obj == -1:
+            self.num_obj = num_obj
+        elif self.num_obj != num_obj:
+            raise ValueError(
+                f"Cost is not of the same length ({num_obj}) as the number " f"of objectives ({self.num_obj})"
+            )
+
+        # Let's always work with floats; Makes it easier to deal with later on
+        # array.tolist() returns a scalar if the array has one element.
+        c = cost_array.tolist()
+        if self.num_obj == 1:
+            c = float(c)
+        else:
+            c = [float(i) for i in c]
+
+        k = RunKey(config_id, instance_id, seed, budget)
+        v = RunValue(c, time, status, starttime, endtime, additional_info)
+
+        # Construct keys and values for the data dictionary
+        for key, value in (
+            ("config", config.get_dictionary()),
+            ("config_id", config_id),
+            ("instance_id", instance_id),
+            ("seed", seed),
+            ("budget", budget),
+            ("cost", c),
+            ("time", time),
+            ("status", status),
+            ("starttime", starttime),
+            ("endtime", endtime),
+            ("additional_info", additional_info),
+            ("origin", config.origin),
+        ):
+            self._check_json_serializable(key, value, EnumEncoder, k, v)
+
+        # Each runkey is supposed to be used only once. Repeated tries to add
+        # the same runkey will be ignored silently if not capped.
+        if self.overwrite_existing_runs or force_update or self.data.get(k) is None:
+            self._add(k, v, status, origin)
+        elif status != StatusType.CAPPED and self.data[k].status == StatusType.CAPPED:
+            # overwrite capped runs with uncapped runs
+            self._add(k, v, status, origin)
+        elif status == StatusType.CAPPED and self.data[k].status == StatusType.CAPPED:
+            if self.num_obj > 1:
+                raise RuntimeError("Not supported yet.")
+
+            # Overwrite if censored with a larger cutoff
+            if cost > self.data[k].cost:
+                self._add(k, v, status, origin)
+        else:
+            logger.info("Entry was not added to the runhistory because existing runs will not overwritten.")
 
     def update_cost(self, config: Configuration) -> None:
         """Stores the performance of a configuration across the instances in self.cost_per_config
@@ -417,7 +495,8 @@ def update_cost(self, config: Configuration) -> None:
             configuration to update cost based on all runs in runhistory
         """
         config_id = self.config_ids[config]
-        # removing duplicates while keeping the order
+
+        # Removing duplicates while keeping the order
         inst_seed_budgets = list(dict.fromkeys(self.get_runs_for_config(config, only_max_observed_budget=True)))
         self._cost_per_config[config_id] = self.average_cost(config, inst_seed_budgets)
         self.num_runs_per_config[config_id] = len(inst_seed_budgets)
@@ -425,11 +504,6 @@ def update_cost(self, config: Configuration) -> None:
         all_inst_seed_budgets = list(dict.fromkeys(self.get_runs_for_config(config, only_max_observed_budget=False)))
         self._min_cost_per_config[config_id] = self.min_cost(config, all_inst_seed_budgets)
 
-    def update_all_costs(self) -> None:
-        """Update all costs in the runhistory."""
-        for config in self.ids_config.values():
-            self.update_cost(config)
-
     def incremental_update_cost(self, config: Configuration, cost: Union[np.ndarray, list, float, int]) -> None:
         """Incrementally updates the performance of a configuration by using a moving average.
 
@@ -442,12 +516,18 @@ def incremental_update_cost(self, config: Configuration, cost: Union[np.ndarray,
         """
         config_id = self.config_ids[config]
         n_runs = self.num_runs_per_config.get(config_id, 0)
-        old_cost = self._cost_per_config.get(config_id, 0.0)
 
         if self.num_obj > 1:
-            cost = self.average_cost(config)
+            costs = np.array(cost)
+            old_costs = self._cost_per_config.get(config_id, np.array([0.0 for _ in range(self.num_obj)]))
+            old_costs = np.array(old_costs)
+
+            new_costs = ((old_costs * n_runs) + costs) / (n_runs + 1)  # type: ignore
+            self._cost_per_config[config_id] = new_costs.tolist()
+        else:
+            old_cost = self._cost_per_config.get(config_id, 0.0)
+            self._cost_per_config[config_id] = ((old_cost * n_runs) + cost) / (n_runs + 1)  # type: ignore
 
-        self._cost_per_config[config_id] = ((old_cost * n_runs) + cost) / (n_runs + 1)  # type: ignore
         self.num_runs_per_config[config_id] = n_runs + 1
 
     def get_cost(self, config: Configuration) -> float:
@@ -464,96 +544,290 @@ def get_cost(self, config: Configuration) -> float:
             Computed cost for configuration
         """
         config_id = self.config_ids.get(config)
-        return self._cost_per_config.get(config_id, np.nan)  # type: ignore[arg-type] # noqa F821
 
-    def get_runs_for_config(self, config: Configuration, only_max_observed_budget: bool) -> List[InstSeedBudgetKey]:
-        """Return all runs (instance seed pairs) for a configuration.
+        # Cost is always a single value (Single objective) or a list of values (Multi-objective)
+        # For example, _cost_per_config always holds the value on the highest budget
+        cost = self._cost_per_config.get(config_id, np.nan)  # type: ignore[arg-type] # noqa F821
 
-        Note
-        ----
-        This method ignores capped runs.
+        if self.num_obj > 1:
+            assert type(cost) == list
+            # We have to normalize the costs here
+            costs = normalize_costs(cost, self.objective_bounds)
+            return float(np.mean(costs))
+
+        assert type(cost) == float
+        return float(cost)
+
+    def get_min_cost(self, config: Configuration) -> float:
+        """Returns the lowest empirical cost for a configuration, across all runs (budgets)
+
+        See the class docstring for how the costs are computed. The costs are not re-computed,
+        but are read from cache.
 
         Parameters
         ----------
-        config : Configuration from ConfigSpace
-            Parameter configuration
-        only_max_observed_budget : bool
-            Select only the maximally observed budget run for this configuration
+        config: Configuration
+
         Returns
         -------
-        instance_seed_budget_pairs : list<tuples of instance, seed, budget>
+        min_cost: float
+            Computed cost for configuration
         """
         config_id = self.config_ids.get(config)
-        runs = self._configid_to_inst_seed_budget.get(config_id, {}).copy()  # type: ignore[arg-type] # noqa F821
-
-        # select only the max budget run if specified
-        if only_max_observed_budget:
-            for k, v in runs.items():
-                runs[k] = [max(v)]
+        cost = self._min_cost_per_config.get(config_id, np.nan)  # type: ignore[arg-type] # noqa F821
 
-        # convert to inst-seed-budget key
-        rval = [InstSeedBudgetKey(k.instance, k.seed, budget) for k, v in runs.items() for budget in v]
-        return rval
+        if self.num_obj > 1:
+            assert type(cost) == list
+            costs = normalize_costs(cost, self.objective_bounds)
 
-    def get_all_configs(self) -> List[Configuration]:
-        """Return all configurations in this RunHistory object.
+            # Note: We have to mean here because we already got the min cost
+            return float(np.mean(costs))
 
-        Returns
-        -------
-            parameter configurations: list
-        """
-        return list(self.config_ids.keys())
+        assert type(cost) == float
+        return float(cost)
 
-    def get_all_configs_per_budget(
+    def average_cost(
         self,
-        budget_subset: Optional[List] = None,
-    ) -> List[Configuration]:
-        """Return all configs in this RunHistory object that have been run on one of these budgets.
+        config: Configuration,
+        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
+        normalize: bool = False,
+    ) -> float | list[float]:
+        """Return the average cost of a configuration. This is the mean of costs of all instance-
+        seed pairs.
 
         Parameters
         ----------
-            budget_subset: list
-
+        config : Configuration
+            Configuration to calculate objective for.
+        instance_seed_budget_keys : list, optional (default=None)
+            List of tuples of instance-seeds-budget keys. If None, the run_history is
+            queried for all runs of the given configuration.
+        normalize : bool, optional (default=False)
+            Normalizes the costs wrt objective bounds in the multi-objective setting.
+            Only a float is returned if normalize is True. Warning: The value can change
+            over time because the objective bounds are changing.
+
         Returns
         -------
-            parameter configurations: list
+        Cost: float | list[float]
+            Average cost. In case of multiple objectives, the mean of each objective is returned.
         """
-        if budget_subset is None:
-            return self.get_all_configs()
-        configs = []
-        for c, i, s, b in self.data.keys():
-            if b in budget_subset:
-                configs.append(self.ids_config[c])
-        return configs
+        costs = self._cost(config, instance_seed_budget_keys)
+        if costs:
+            if self.num_obj > 1:
+                # Each objective is averaged separately
+                # [[100, 200], [0, 0]] -> [50, 100]
+                averaged_costs = np.mean(costs, axis=0).tolist()
 
-    def get_min_cost(self, config: Configuration) -> float:
-        """Returns the lowest empirical cost for a configuration, across all runs (budgets)
+                if normalize:
+                    normalized_costs = normalize_costs(averaged_costs, self.objective_bounds)
+                    return float(np.mean(normalized_costs))
+                else:
+                    return averaged_costs
 
-        See the class docstring for how the costs are computed. The costs are not re-computed,
-        but are read from cache.
+            return float(np.mean(costs))
+
+        return np.nan
+
+    def sum_cost(
+        self,
+        config: Configuration,
+        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
+        normalize: bool = False,
+    ) -> float | list[float]:
+        """Return the sum of costs of a configuration. This is the sum of costs of all instance-seed
+        pairs.
 
         Parameters
         ----------
-        config: Configuration
+        config : Configuration
+            Configuration to calculate objective for.
+        instance_seed_budget_keys : list, optional (default=None)
+            List of tuples of instance-seeds-budget keys. If None, the run_history is
+            queried for all runs of the given configuration.
+        normalize : bool, optional (default=False)
+            Normalizes the costs wrt objective bounds in the multi-objective setting.
+            Only a float is returned if normalize is True. Warning: The value can change
+            over time because the objective bounds are changing.
 
         Returns
         -------
-        min_cost: float
-            Computed cost for configuration
+        sum_cost: float | list[float]
+            Sum of costs of config. In case of multiple objectives, the costs are summed up for each
+            objective individually.
+        """
+        costs = self._cost(config, instance_seed_budget_keys)
+        if costs:
+            if self.num_obj > 1:
+                # Each objective is summed separately
+                # [[100, 200], [20, 10]] -> [120, 210]
+                summed_costs = np.sum(costs, axis=0).tolist()
+
+                if normalize:
+                    normalized_costs = normalize_costs(summed_costs, self.objective_bounds)
+                    return float(np.mean(normalized_costs))
+                else:
+                    return summed_costs
+
+        return float(np.sum(costs))
+
+    def min_cost(
+        self,
+        config: Configuration,
+        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
+        normalize: bool = False,
+    ) -> float | list[float]:
+        """Return the minimum cost of a configuration.
+
+        This is the minimum cost of all instance-seed pairs.
+
+        Warning
+        -------
+        In the case of multi-fidelity, the minimum cost per objectives is returned.
+
+        Parameters
+        ----------
+        config : Configuration
+            Configuration to calculate objective for.
+        instance_seed_budget_keys : list, optional (default=None)
+            List of tuples of instance-seeds-budget keys. If None, the run_history is
+            queried for all runs of the given configuration.
+
+        Returns
+        -------
+        min_cost: float | list[float]
+            Minimum cost of the config. In case of multi-objective, the minimum cost per objective
+            is returned.
+        """
+        costs = self._cost(config, instance_seed_budget_keys)
+        if costs:
+            if self.num_obj > 1:
+                # Each objective is viewed separately
+                # [[100, 200], [20, 500]] -> [20, 200]
+                min_costs = np.min(costs, axis=0).tolist()
+
+                if normalize:
+                    normalized_costs = normalize_costs(min_costs, self.objective_bounds)
+                    return float(np.mean(normalized_costs))
+                else:
+                    return min_costs
+
+            return float(np.min(costs))
+
+        return np.nan
+
+    def compute_all_costs(self, instances: Optional[List[str]] = None) -> None:
+        """Computes the cost of all configurations from scratch and overwrites self.cost_perf_config
+        and self.runs_per_config accordingly.
+
+        Note
+        ----
+        This method is only used for ``merge_foreign_data`` and should be removed.
+
+        Parameters
+        ----------
+        instances: List[str]
+            List of instances; if given, cost is only computed wrt to this instance set.
+        """
+        self._cost_per_config = {}
+        self.num_runs_per_config = {}
+        for config, config_id in self.config_ids.items():
+            # removing duplicates while keeping the order
+            inst_seed_budgets = list(dict.fromkeys(self.get_runs_for_config(config, only_max_observed_budget=True)))
+            if instances is not None:
+                inst_seed_budgets = list(filter(lambda x: x.instance in cast(List, instances), inst_seed_budgets))
+
+            if inst_seed_budgets:  # can be empty if never saw any runs on <instances>
+                self._cost_per_config[config_id] = self.average_cost(config, inst_seed_budgets)
+                self._min_cost_per_config[config_id] = self.min_cost(config, inst_seed_budgets)
+                self.num_runs_per_config[config_id] = len(inst_seed_budgets)
+
+    def get_instance_costs_for_config(self, config: Configuration) -> Dict[str, List[float]]:
+        """Returns the average cost per instance (across seeds) for a configuration. If the
+        runhistory contains budgets, only the highest budget for a configuration is returned.
+
+        Note
+        ----
+        This is used by the pSMAC facade to determine the incumbent after the evaluation.
+
+        Parameters
+        ----------
+        config : Configuration from ConfigSpace
+            Parameter configuration
+
+        Returns
+        -------
+        cost_per_inst: Dict<instance name<str>, cost<float>>
+        """
+        runs_ = self.get_runs_for_config(config, only_max_observed_budget=True)
+        cost_per_inst = {}  # type: Dict[str, List[float]]
+        for inst, seed, budget in runs_:
+            cost_per_inst[inst] = cost_per_inst.get(inst, [])
+            rkey = RunKey(self.config_ids[config], inst, seed, budget)
+            vkey = self.data[rkey]
+            cost_per_inst[inst].append(vkey.cost)
+        cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()])
+        return cost_per_inst
+
+    def get_runs_for_config(self, config: Configuration, only_max_observed_budget: bool) -> List[InstSeedBudgetKey]:
+        """Return all runs (instance seed pairs) for a configuration.
+
+        Note
+        ----
+        This method ignores capped runs.
+
+        Parameters
+        ----------
+        config : Configuration from ConfigSpace
+            Parameter configuration
+        only_max_observed_budget : bool
+            Select only the maximally observed budget run for this configuration
+        Returns
+        -------
+        instance_seed_budget_pairs : list<tuples of instance, seed, budget>
         """
         config_id = self.config_ids.get(config)
-        return self._min_cost_per_config.get(config_id, np.nan)  # type: ignore[arg-type] # noqa F821
+        runs = self._configid_to_inst_seed_budget.get(config_id, {}).copy()  # type: ignore[arg-type] # noqa F821
 
-    def empty(self) -> bool:
-        """Check whether or not the RunHistory is empty.
+        # select only the max budget run if specified
+        if only_max_observed_budget:
+            for k, v in runs.items():
+                runs[k] = [max(v)]
+
+        # convert to inst-seed-budget key
+        rval = [InstSeedBudgetKey(k.instance, k.seed, budget) for k, v in runs.items() for budget in v]
+        return rval
+
+    def get_all_configs(self) -> List[Configuration]:
+        """Return all configurations in this RunHistory object.
 
         Returns
         -------
-        emptiness: bool
-            True if runs have been added to the RunHistory,
-            False otherwise
+            parameter configurations: list
         """
-        return len(self.data) == 0
+        return list(self.config_ids.keys())
+
+    def get_all_configs_per_budget(
+        self,
+        budget_subset: Optional[List] = None,
+    ) -> List[Configuration]:
+        """Return all configs in this RunHistory object that have been run on one of these budgets.
+
+        Parameters
+        ----------
+            budget_subset: list
+
+        Returns
+        -------
+            parameter configurations: list
+        """
+        if budget_subset is None:
+            return self.get_all_configs()
+        configs = []
+        for c, i, s, b in self.data.keys():
+            if b in budget_subset:
+                configs.append(self.ids_config[c])
+        return configs
 
     def save_json(self, fn: str = "runhistory.json", save_external: bool = False) -> None:
         """Saves runhistory on disk.
@@ -716,205 +990,3 @@ def update(
                 additional_info=additional_info,
                 origin=origin,
             )
-
-    def _cost(
-        self,
-        config: Configuration,
-        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
-    ) -> List[np.ndarray]:
-        """Return array of all costs for the given config for further calculations.
-
-        Parameters
-        ----------
-        config : Configuration
-            Configuration to calculate objective for.
-        instance_seed_budget_keys : list, optional (default=None)
-            List of tuples of instance-seeds-budget keys. If None, the run_history is
-            queried for all runs of the given configuration.
-
-        Returns
-        -------
-        Costs: list
-            Array of all costs.
-        """
-        try:
-            id_ = self.config_ids[config]
-        except KeyError:  # challenger was not running so far
-            return []
-
-        if instance_seed_budget_keys is None:
-            instance_seed_budget_keys = self.get_runs_for_config(config, only_max_observed_budget=True)
-
-        costs = []
-        for i, r, b in instance_seed_budget_keys:
-            k = RunKey(id_, i, r, b)
-            costs.append(self.data[k].cost)
-
-        return costs
-
-    def average_cost(
-        self,
-        config: Configuration,
-        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
-    ) -> float:
-        """Return the average cost of a configuration. This is the mean of costs of all instance-
-        seed pairs.
-
-        Parameters
-        ----------
-        config : Configuration
-            Configuration to calculate objective for.
-        instance_seed_budget_keys : list, optional (default=None)
-            List of tuples of instance-seeds-budget keys. If None, the run_history is
-            queried for all runs of the given configuration.
-
-        Returns
-        -------
-        Cost: float
-            Average cost
-        """
-        costs = self._cost(config, instance_seed_budget_keys)
-        if costs:
-            if self.num_obj > 1:
-                # Normalize costs
-                costs = normalize_costs(costs, self.objective_bounds)  # type: ignore[assignment]
-
-            return float(np.mean(costs))
-
-        return np.nan
-
-    def sum_cost(
-        self,
-        config: Configuration,
-        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
-    ) -> float:
-        """Return the sum of costs of a configuration. This is the sum of costs of all instance-seed
-        pairs.
-
-        Parameters
-        ----------
-        config : Configuration
-            Configuration to calculate objective for.
-        instance_seed_budget_keys : list, optional (default=None)
-            List of tuples of instance-seeds-budget keys. If None, the run_history is
-            queried for all runs of the given configuration.
-
-        Returns
-        -------
-        sum_cost: float
-            Sum of costs of config.
-        """
-        costs = self._cost(config, instance_seed_budget_keys)
-        if costs:
-            if self.num_obj > 1:
-                # Normalize costs
-                costs = normalize_costs(costs, self.objective_bounds)  # type: ignore[assignment]
-                costs = np.mean(costs, axis=1)
-
-        return float(np.sum(costs))
-
-    def min_cost(
-        self,
-        config: Configuration,
-        instance_seed_budget_keys: Optional[Iterable[InstSeedBudgetKey]] = None,
-    ) -> float:
-        """Return the minimum cost of a configuration.
-
-        This is the minimum cost of all instance-seed pairs.
-        Warning: In the case of multi-fidelity, the minimum cost per objectives is returned.
-
-        Parameters
-        ----------
-        config : Configuration
-            Configuration to calculate objective for.
-        instance_seed_budget_keys : list, optional (default=None)
-            List of tuples of instance-seeds-budget keys. If None, the run_history is
-            queried for all runs of the given configuration.
-
-        Returns
-        -------
-        min_cost: float
-            minimum cost of config.
-        """
-        costs = self._cost(config, instance_seed_budget_keys)
-        if costs:
-            if self.num_obj > 1:
-                # Normalize costs
-                costs = normalize_costs(costs, self.objective_bounds)  # type: ignore[assignment]
-                costs = np.mean(costs, axis=1)
-
-            return float(np.min(costs))
-
-        return np.nan
-
-    def compute_all_costs(self, instances: Optional[List[str]] = None) -> None:
-        """Computes the cost of all configurations from scratch and overwrites self.cost_perf_config
-        and self.runs_per_config accordingly.
-
-        Note
-        ----
-        This method is only used for ``merge_foreign_data`` and should be removed.
-
-        Parameters
-        ----------
-        instances: List[str]
-            List of instances; if given, cost is only computed wrt to this instance set.
-        """
-        self._cost_per_config = {}
-        self.num_runs_per_config = {}
-        for config, config_id in self.config_ids.items():
-            # removing duplicates while keeping the order
-            inst_seed_budgets = list(dict.fromkeys(self.get_runs_for_config(config, only_max_observed_budget=True)))
-            if instances is not None:
-                inst_seed_budgets = list(filter(lambda x: x.instance in cast(List, instances), inst_seed_budgets))
-
-            if inst_seed_budgets:  # can be empty if never saw any runs on <instances>
-                self._cost_per_config[config_id] = self.average_cost(config, inst_seed_budgets)
-                self._min_cost_per_config[config_id] = self.min_cost(config, inst_seed_budgets)
-                self.num_runs_per_config[config_id] = len(inst_seed_budgets)
-
-    def get_instance_costs_for_config(self, config: Configuration) -> Dict[str, List[float]]:
-        """Returns the average cost per instance (across seeds) for a configuration. If the
-        runhistory contains budgets, only the highest budget for a configuration is returned.
-
-        Note
-        ----
-        This is used by the pSMAC facade to determine the incumbent after the evaluation.
-
-        Parameters
-        ----------
-        config : Configuration from ConfigSpace
-            Parameter configuration
-
-        Returns
-        -------
-        cost_per_inst: Dict<instance name<str>, cost<float>>
-        """
-        runs_ = self.get_runs_for_config(config, only_max_observed_budget=True)
-        cost_per_inst = {}  # type: Dict[str, List[float]]
-        for inst, seed, budget in runs_:
-            cost_per_inst[inst] = cost_per_inst.get(inst, [])
-            rkey = RunKey(self.config_ids[config], inst, seed, budget)
-            vkey = self.data[rkey]
-            cost_per_inst[inst].append(vkey.cost)
-        cost_per_inst = dict([(inst, np.mean(costs)) for inst, costs in cost_per_inst.items()])
-        return cost_per_inst
-
-    def __contains__(self, k: object) -> bool:
-        """Dictionary semantics for `k in runhistory`"""
-        return k in self.data
-
-    def __getitem__(self, k: RunKey) -> RunValue:
-        """Dictionary semantics for `v = runhistory[k]`"""
-        return self.data[k]
-
-    def __iter__(self) -> Iterator[RunKey]:
-        """Dictionary semantics for `for k in runhistory.keys()`, enables.
-
-        .items()
-        """
-        return iter(self.data.keys())
-
-    def __len__(self) -> int:
-        """Enables the `len(runhistory)`"""
-        return len(self.data)
diff --git a/smac/runhistory/runhistory2epm.py b/smac/runhistory/runhistory2epm.py
index 10c72cf48..2bf85d979 100644
--- a/smac/runhistory/runhistory2epm.py
+++ b/smac/runhistory/runhistory2epm.py
@@ -7,12 +7,12 @@
 
 from smac.configspace import convert_configurations_to_array
 from smac.epm.base_imputor import BaseImputor
-from smac.optimizer.multi_objective.aggregation_strategy import AggregationStrategy
+from smac.multi_objective.aggregation_strategy import AggregationStrategy
+from smac.multi_objective.utils import normalize_costs
 from smac.runhistory.runhistory import RunHistory, RunKey, RunValue
 from smac.scenario.scenario import Scenario
 from smac.tae import StatusType
 from smac.utils import constants
-from smac.utils.multi_objective import normalize_costs
 
 __author__ = "Katharina Eggensperger"
 __copyright__ = "Copyright 2015, ML4AAD"
@@ -473,7 +473,7 @@ def _build_matrix(
 
                 # Let's normalize y here
                 # We use the objective_bounds calculated by the runhistory
-                y_ = normalize_costs([run.cost], runhistory.objective_bounds)
+                y_ = normalize_costs(run.cost, runhistory.objective_bounds)
                 y_agg = self.multi_objective_algorithm(y_)
                 y[row] = y_agg
             else:
@@ -489,7 +489,6 @@ def _build_matrix(
                 self.max_y = np.max(y, axis=0)
 
         y = self.transform_response_values(values=y)
-
         return X, y
 
     def transform_response_values(self, values: np.ndarray) -> np.ndarray:
@@ -698,7 +697,7 @@ def _build_matrix(
 
                 # Let's normalize y here
                 # We use the objective_bounds calculated by the runhistory
-                y_ = normalize_costs([run.cost], runhistory.objective_bounds)
+                y_ = normalize_costs(run.cost, runhistory.objective_bounds)
                 y_agg = self.multi_objective_algorithm(y_)
                 y[row, 0] = y_agg
             else:
diff --git a/smac/runhistory/runhistory2epm_boing.py b/smac/runhistory/runhistory2epm_boing.py
new file mode 100644
index 000000000..009abf0e8
--- /dev/null
+++ b/smac/runhistory/runhistory2epm_boing.py
@@ -0,0 +1,92 @@
+import typing
+
+import copy
+
+import numpy as np
+
+from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory2epm import (
+    RunHistory2EPM4Cost,
+    RunHistory2EPM4LogScaledCost,
+)
+
+
+class RunHistory2EPM4CostWithRaw(RunHistory2EPM4Cost):
+    """
+    A transformer that transform RunHistroy to vectors, this set of classes will return the raw cost values in
+    addition to the transformed cost values. The raw cost values can then be applied for local BO approaches.
+    """
+
+    def transform_with_raw(
+        self,
+        runhistory: RunHistory,
+        budget_subset: typing.Optional[typing.List] = None,
+    ) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """Returns vector representation of runhistory; if imputation is
+        disabled, censored (TIMEOUT with time < cutoff) will be skipped. This function returns both the raw
+        and transformed cost values
+
+        Parameters
+        ----------
+        runhistory : smac.runhistory.runhistory.RunHistory
+            Runhistory containing all evaluated configurations/instances
+        budget_subset : list of budgets to consider
+
+        Returns
+        -------
+        X: numpy.ndarray
+            configuration vector x instance features
+        Y: numpy.ndarray
+            cost values
+        Y_raw: numpy.ndarray
+            cost values before transformation
+        """
+        X, Y_raw = RunHistory2EPM4Cost.transform(self, runhistory, budget_subset)
+        Y = copy.deepcopy(Y_raw)
+        Y = self.transform_raw_values(Y)
+        return X, Y, Y_raw
+
+    def transform_response_values(self, values: np.ndarray) -> np.ndarray:
+        """Transform function response values. Returns the input values.
+
+        Parameters
+        ----------
+        values : np.ndarray
+            Response values to be transformed.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        # otherwise it will be overwritten by its superclass
+        return values
+
+    def transform_raw_values(self, values: np.ndarray) -> np.ndarray:
+        """Transform function response values. Returns the raw input values before transformation
+
+        Parameters
+        ----------
+        values : np.ndarray
+            Response values to be transformed.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return values
+
+
+class RunHistory2EPM4ScaledLogCostWithRaw(RunHistory2EPM4CostWithRaw, RunHistory2EPM4LogScaledCost):
+    def transform_raw_values(self, values: np.ndarray) -> np.ndarray:
+        """Transform function response values. Returns the raw input values before transformation
+
+        Parameters
+        ----------
+        values : np.ndarray
+            Response values to be transformed.
+
+        Returns
+        -------
+        np.ndarray
+        """
+        return RunHistory2EPM4LogScaledCost.transform_response_values(self, values)
diff --git a/smac/smac_cli.py b/smac/smac_cli.py
index b11205584..d92e7c9b9 100644
--- a/smac/smac_cli.py
+++ b/smac/smac_cli.py
@@ -10,9 +10,7 @@
 from smac.facade.experimental.hydra_facade import (  # type: ignore[attr-defined] # noqa F821
     Hydra,
 )
-from smac.facade.experimental.psmac_facade import (  # type: ignore[attr-defined] # noqa F821
-    PSMAC,
-)
+from smac.facade.psmac_facade import PSMAC  # type: ignore[attr-defined] # noqa F821
 from smac.facade.roar_facade import ROAR
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.facade.smac_bb_facade import SMAC4BB
diff --git a/smac/utils/io/result_merging.py b/smac/utils/io/result_merging.py
new file mode 100644
index 000000000..dd13968ab
--- /dev/null
+++ b/smac/utils/io/result_merging.py
@@ -0,0 +1,207 @@
+from typing import Any, Dict, List, Optional, Sequence, Union
+
+import json
+import os
+from pathlib import Path
+
+import numpy as np
+import regex as re
+from ConfigSpace.read_and_write import json as csjson
+
+from smac.runhistory.runhistory import RunHistory
+
+
+def glob_re(pattern: str, strings: List[str]) -> filter:
+    """
+    Filter strings according to pattern.
+
+    Parameters
+    ----------
+    pattern: str
+        Regex pattern
+    strings: List[str]
+        List of strings to filter.
+
+    Returns
+    -------
+    filter[str]
+    """
+    return filter(re.compile(pattern).match, strings)
+
+
+def get_rundirs(pattern: str, path: Union[str, Path]) -> Sequence[str]:
+    """
+    Get SMAC run dirs, often starting with `run_`.
+
+    Parameters
+    ----------
+    pattern: str
+        Regex expresssion.
+    path: Union[str, Path]
+        Path to folder containing single SMAC rundirs
+
+    Returns
+    -------
+    Sequence[str]
+        Single SMAC rundirs
+
+    """
+    subdirs = list(glob_re(pattern, os.listdir(path)))
+    rundirs = [os.path.join(path, sd) for sd in subdirs]
+    return rundirs
+
+
+class ResultMerger:
+    def __init__(
+        self,
+        output_dir: Optional[Union[str, Path]] = None,
+        rundir_pattern: str = r"run_*\d$",
+        rundirs: Optional[List[Union[str, Path]]] = None,
+    ):
+        """
+        Merge runhistories from different SMAC runs.
+
+        Parameters
+        ----------
+        output_dir : Optional[Union[str, Path]]
+            Output directory containing single SMAC run folders. The rundirs are inside
+            and collected via the pattern `rundir_pattern`.
+        rundir_pattern : str
+            Regex expression to find single rundirs in `output_dir`.
+        rundirs : Optional[List[Union[str, Path]]]
+            Paths to all SMAC output folders.
+            If not specified, please specify `output_dir`.
+        """
+        self.output_dir = output_dir
+        self.run_dirs: Sequence[Union[str, Path]]
+        if rundirs:
+            self.run_dirs = rundirs
+        else:
+            if self.output_dir is None:
+                raise ValueError("Please provide either `rundirs` or `output_dir` with" " an optional pattern.")
+            self.run_dirs = get_rundirs(pattern=rundir_pattern, path=self.output_dir)
+
+        cs_fn = Path(self.run_dirs[0]) / "configspace.json"
+        with open(cs_fn, "r") as fh:
+            json_string = fh.read()
+            self.configuration_space = csjson.read(json_string)
+
+    def get_runhistory(self) -> RunHistory:
+        """
+        Get runhistory
+
+        For this, merge all runhistories in pSMAC subfolders.
+
+        Returns
+        -------
+        RunHistory
+            Empty, if `self.run_dirs` is None.
+
+        """
+        runhistory = RunHistory()
+        if self.run_dirs:
+            runhistory_filenames = [os.path.join(d, "runhistory.json") for d in self.run_dirs]
+            for fn in runhistory_filenames:
+                runhistory.update_from_json(fn=fn, cs=self.configuration_space)
+        return runhistory
+
+    def get_trajectory(self) -> Optional[List[Dict[str, Any]]]:
+        """
+        Get trajectory
+
+        For this, extract trajectory from merged runhistories.
+        Return trajectories in json format.
+
+        Returns
+        -------
+        Optional[List[Dict[str, Any]]
+            - None, if `self.run_dirs` is None.
+            - List of trajectory entries. Each trajectory entry is a dict with keys
+                ['cpu_time', 'wallclock_time', 'evaluations', 'cost', 'incumbent', 'budget', 'origin'].
+
+        """
+        trajectory = None
+        if self.run_dirs is None:
+            return trajectory
+        rh = self.get_runhistory()
+
+        # Sort configurations chronologically by starttime
+        rvals = rh.values()
+        starttimes = np.array([rv.starttime for rv in rvals])
+        ids = np.argsort(starttimes)
+        rhitems = list(rh.items())
+        rhitems = [rhitems[i] for i in ids]
+
+        # Find incumbents
+        # Incumbent = cost is lower than alltime cost
+        trajectory = []
+
+        # Inject first trajectory entry from file from first rundir
+        rundir = self.run_dirs[0]
+        traj_fn = Path(rundir) / "traj.json"
+        with open(traj_fn, "r") as file:
+            line = file.readline()
+        traj_entry = json.loads(line)
+        trajectory.append(traj_entry)
+
+        # Populate from merged runhistory
+        cost = np.inf
+        for i, (rk, rv) in enumerate(rhitems):
+            if rv.cost < cost:
+                cost = rv.cost
+                # traj_entry = TrajEntry(
+                #     rv.cost,  # train_perf
+                #     rk.config_id,  # incumbent_id
+                #     rh.ids_config[rk.config_id],  # incumbent
+                #     i + 1,  # ta_runs
+                #     rv.time,  # ta_time_used
+                #     rv.starttime,  # wallclock_time
+                #     rk.budget,  # budget
+                # )  # TODO return traj_entry as TrajEntry and convert to json for write_trajectory
+                incumbent = rh.ids_config[rk.config_id]
+                traj_entry = {
+                    "cpu_time": rv.time,
+                    "wallclock_time": rv.starttime,
+                    "evaluations": i + 1,
+                    "cost": rv.cost,
+                    "incumbent": incumbent.get_dictionary(),
+                    "budget": rk.budget,
+                    "origin": incumbent.origin,
+                }
+                trajectory.append(traj_entry)
+
+        return trajectory
+
+    def write_trajectory(self) -> None:
+        """
+        Write trajectory to traj.json
+
+        Returns
+        -------
+        None
+
+        """
+        if self.output_dir is not None:
+            traj_fn = Path(self.output_dir) / "traj.json"
+            traj = self.get_trajectory()
+
+            traj_fn.open("w")
+            if traj is not None:
+                for traj_entry in traj:
+                    with open(traj_fn, "a") as fp:  # TODO: write or append?
+                        json.dump(traj_entry, fp)
+                        fp.write("\n")
+
+    def write_runhistory(self) -> None:
+        """
+        Write runhistory to runhistory.json
+
+        Returns
+        -------
+        None
+
+        """
+        if self.output_dir is not None:
+            rh_fn = Path(self.output_dir) / "runhistory.json"
+            rh = self.get_runhistory()
+            rh.save_json(fn=str(rh_fn), save_external=True)
diff --git a/smac/utils/multi_objective.py b/smac/utils/multi_objective.py
deleted file mode 100644
index 9a5f7f7b5..000000000
--- a/smac/utils/multi_objective.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import List, Optional, Tuple, Union
-
-import numpy as np
-
-
-def normalize_costs(
-    values: Union[np.ndarray, List, List[List], List[np.ndarray]],
-    bounds: Optional[List[Tuple[float, float]]] = None,
-) -> np.ndarray:
-    """Normalizes the costs to be between 0 and 1 if no bounds are given. Otherwise, the costs are
-    normalized according to the bounds.
-
-    Example
-    -------
-
-    [0, 10, 5] -> [[0], [1], [0.5]]
-    [[0], [10], [5]] -> [[0], [1], [0.5]]
-    [[0, 0], [10, 50], [5, 200]] -> [[0, 0], [1, 0.25], [0.5, 1]]
-
-    Parameters
-    ----------
-    values : Union[np.ndarray, List, List[List]]
-        Cost values which should be normalized.
-        If array/list is one-dimensional, it is expanded by one dimension.
-    bounds : Optional[List[Tuple[float, float]]], optional
-        Min and max bounds which should be applied to the values, by default None.
-        If bounds are None the min and max values from the data are used.
-
-    Returns
-    -------
-    np.ndarray
-        Normalized costs.
-    """
-    _values: np.ndarray
-    if isinstance(values, list):
-        _values = np.array(values)
-    else:
-        _values = values
-
-    if len(_values.shape) == 1:
-        _values = np.expand_dims(_values, axis=-1)
-
-    normalized_values = []
-    for col in range(_values.shape[1]):
-        data = _values[:, col].astype(float)
-
-        if bounds is not None:
-            assert len(bounds) == _values.shape[1]
-
-            min_value = bounds[col][0]
-            max_value = bounds[col][1]
-        else:
-            min_value = np.min(data)
-            max_value = np.max(data)
-
-        denominator = max_value - min_value
-
-        # Prevent divide by zero
-        if denominator < 1e-10:
-            # Return ones
-            normalized_values.append(np.ones_like(data))
-        else:
-            numerator = data - min_value
-            normalized_values.append(numerator / denominator)
-
-    return np.swapaxes(np.array(normalized_values), 0, 1)
diff --git a/smac/utils/validate.py b/smac/utils/validate.py
index 653661d14..2ef1beffb 100644
--- a/smac/utils/validate.py
+++ b/smac/utils/validate.py
@@ -8,9 +8,9 @@
 from joblib import Parallel, delayed
 
 from smac.configspace import Configuration, convert_configurations_to_array
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.rfr_imputator import RFRImputator
-from smac.epm.util_funcs import get_types
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rfr_imputator import RFRImputator
+from smac.epm.utils import get_types
 from smac.runhistory.runhistory import RunHistory, RunInfo, RunKey, RunValue, StatusType
 from smac.runhistory.runhistory2epm import RunHistory2EPM4Cost
 from smac.scenario.scenario import Scenario
diff --git a/tests/test_cli/random_configuration_chooser_impl.py b/tests/test_cli/random_configuration_chooser_impl.py
index abf7fcf13..d47175768 100644
--- a/tests/test_cli/random_configuration_chooser_impl.py
+++ b/tests/test_cli/random_configuration_chooser_impl.py
@@ -1,4 +1,4 @@
-from smac.optimizer.random_configuration_chooser import ChooserNoCoolDown
+from smac.optimizer.configuration_chooser.random_chooser import ChooserNoCoolDown
 
 __author__ = "Aaron Kimmig"
 __copyright__ = "Copyright 2015, ML4AAD"
diff --git a/tests/test_cli/test_deterministic_smac.py b/tests/test_cli/test_deterministic_smac.py
index 1d45832ce..28c329177 100644
--- a/tests/test_cli/test_deterministic_smac.py
+++ b/tests/test_cli/test_deterministic_smac.py
@@ -38,7 +38,7 @@ def ignore_timestamps(self, rh):
             rh["data"][i][1] = [v for j, v in enumerate(val) if j not in [3, 4]]  # 3, 4 are start and end timestamps
         return rh
 
-    @unittest.mock.patch("smac.optimizer.ei_optimization.get_one_exchange_neighbourhood")
+    @unittest.mock.patch("smac.optimizer.acquisition.maximizer.get_one_exchange_neighbourhood")
     def test_deterministic(self, patch):
         """
         Testing deterministic behaviour.
diff --git a/tests/test_epm/test_base_epm.py b/tests/test_epm/test_base_epm.py
index 790201688..333461f8a 100644
--- a/tests/test_epm/test_base_epm.py
+++ b/tests/test_epm/test_base_epm.py
@@ -6,8 +6,8 @@
 
 import smac
 import smac.configspace
-from smac.epm.base_epm import AbstractEPM
-from smac.epm.util_funcs import get_types
+from smac.epm.base_epm import BaseEPM
+from smac.epm.utils import get_types
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -33,12 +33,12 @@ def get_X_y(num_samples, num_instance_features):
             y = np.random.rand(num_samples)
             return X, y
 
-        with unittest.mock.patch.object(AbstractEPM, "_train"):
-            with unittest.mock.patch.object(AbstractEPM, "_predict") as predict_mock:
+        with unittest.mock.patch.object(BaseEPM, "_train"):
+            with unittest.mock.patch.object(BaseEPM, "_predict") as predict_mock:
 
                 predict_mock.side_effect = lambda x, _: (x, x)
 
-                epm = AbstractEPM(
+                epm = BaseEPM(
                     configspace=cs,
                     types=types,
                     bounds=bounds,
diff --git a/tests/test_epm/test_boing_kernel.py b/tests/test_epm/test_boing_kernel.py
new file mode 100644
index 000000000..cbee007f5
--- /dev/null
+++ b/tests/test_epm/test_boing_kernel.py
@@ -0,0 +1,217 @@
+import copy
+import unittest.mock
+
+import gpytorch
+import numpy as np
+import torch
+from gpytorch.constraints.constraints import Interval
+from gpytorch.distributions import MultivariateNormal
+from gpytorch.kernels import MaternKernel, ScaleKernel
+from gpytorch.kernels.inducing_point_kernel import InducingPointKernel
+from gpytorch.lazy import LazyEvaluatedKernelTensor, delazify
+from gpytorch.likelihoods import GaussianLikelihood
+from gpytorch.means.zero_mean import ZeroMean
+from gpytorch.models.exact_gp import ExactGP
+
+from smac.epm.gaussian_process.kernels.boing import FITCKernel, FITCMean
+from smac.epm.utils import check_subspace_points
+
+
+class FITC(ExactGP):
+    def __init__(self, train_x, train_y, likelihood, base_kernel, inducing_points):
+        super().__init__(train_x, train_y, likelihood)
+        self.mean_module = ZeroMean()
+        self.covar_module = InducingPointKernel(base_kernel, inducing_points, likelihood)
+        self.prediction_strategy = self.covar_module.prediction_strategy
+
+    def forward(self, x):
+        mean = self.mean_module(x)
+        covar = self.covar_module(x)
+        return MultivariateNormal(mean, covar)
+
+
+def generate_test_data(
+    rs: np.random.RandomState, num_inducing=4, num_points_in=8, num_points_out=10, num_dims=5, expand_bound=True
+):
+    X_out = rs.rand(num_points_out, num_dims)
+    Y_out = rs.rand(num_points_out, 1)
+    # X \in [-0.6, 1.4] and the bound is [0, 1]
+    X_out = (X_out - 0.3) * 2
+
+    X_in = rs.rand(num_points_in, num_dims)
+    Y_in = rs.rand(num_points_in, 1)
+
+    X_inducing = rs.rand(num_inducing, num_dims)
+
+    X = np.vstack([X_in, X_out])
+    Y = np.vstack([Y_in, Y_out])
+
+    Y = np.squeeze(Y)
+
+    data_indices = np.arange((num_points_in + num_points_out))
+
+    rs.shuffle(data_indices)
+
+    X = X[data_indices]
+    Y = Y[data_indices]
+
+    ss_in = check_subspace_points(
+        X, cont_dims=np.arange(num_dims), bounds_cont=np.tile([0.0, 1.0], [num_dims, 1]), expand_bound=expand_bound
+    )
+
+    X_in = X[ss_in]
+    Y_in = Y[ss_in]
+    X_out = X[~ss_in]
+    Y_out = Y[~ss_in]
+
+    return X_in, Y_in, X_out, Y_out, X_inducing
+
+
+def generate_kernel(n_dimensions):
+    exp_kernel = MaternKernel(
+        2.5,
+        lengthscale_constraint=Interval(
+            torch.tensor(np.exp(-6.754111155189306).repeat(n_dimensions)),
+            torch.tensor(np.exp(0.0858637988771976).repeat(n_dimensions)),
+            transform=None,
+            initial_value=1.0,
+        ),
+        ard_num_dims=n_dimensions,
+        active_dims=torch.arange(n_dimensions),
+    ).double()
+
+    kernel = ScaleKernel(
+        exp_kernel, outputscale_constraint=Interval(np.exp(-10.0), np.exp(2.0), transform=None, initial_value=2.0)
+    ).double()
+    return kernel
+
+
+class TestFITCKernel(unittest.TestCase):
+    def setUp(self) -> None:
+        rs = np.random.RandomState(1)
+        num_dims = 5
+        self.likelihood = GaussianLikelihood().double()
+        X_in, Y_in, X_out, Y_out, X_inducing = generate_test_data(rs, num_dims=num_dims)
+        self.kernel = generate_kernel(num_dims)
+        self.X_in = torch.from_numpy(X_in)
+        self.Y_in = torch.from_numpy(Y_in)
+        self.X_out = torch.from_numpy(X_out)
+        self.Y_out = torch.from_numpy(Y_out)
+        self.X_inducing = torch.from_numpy(X_inducing)
+        self.ga_kernel = FITCKernel(
+            base_kernel=self.kernel,
+            X_inducing=self.X_inducing,
+            likelihood=self.likelihood,
+            X_out=self.X_out,
+            y_out=self.Y_out,
+        ).double()
+        self.ga_mean = FITCMean(covar_module=self.ga_kernel)
+        self.fitc = FITC(
+            train_x=self.X_out,
+            train_y=self.Y_out,
+            likelihood=self.likelihood,
+            base_kernel=self.kernel,
+            inducing_points=self.X_inducing,
+        )
+
+        self.fitc_eval_cache = {
+            "_cached_kernel_mat": "_inducing_mat",
+            "_cached_inducing_sigma": "_inducing_sigma",
+            "_cached_poster_mean_mat": "_poster_mean_mat",
+            "_cached_kernel_inv_root": "_inducing_inv_root",
+        }
+        self.fitc_train_cache = {
+            "_train_cached_k_u1": "_k_u1",
+            "_train_cached_lambda_diag_inv": "_lambda_diag_inv",
+            "_train_cached_posterior_mean": "posterior_mean",
+        }
+
+    def test_init(self):
+        ga_kernel = FITCKernel(
+            base_kernel=self.kernel, X_inducing=torch.from_numpy(np.empty(2)), likelihood=None, X_out=None, y_out=None
+        )
+        self.assertTrue(hasattr(ga_kernel, "X_inducing"))
+        self.assertEqual(len(ga_kernel.X_inducing.shape), 2)
+        self.assertTrue("X_inducing" in dict(ga_kernel.named_parameters()))
+        self.assertTrue(self.ga_mean.covar_module is self.ga_kernel)
+
+    def test_forward(self):
+        ga_covar = self.ga_kernel(self.X_in)
+        ga_mean = delazify(self.ga_mean(self.X_in))
+        ga_covar_diag = self.ga_kernel(self.X_in).diag()
+
+        self.assertIsInstance(ga_covar, LazyEvaluatedKernelTensor)
+        ga_covar = delazify(ga_covar)
+        self.fitc.eval()
+
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            fitc_output = self.fitc(self.X_in)
+            fitc_mean = fitc_output.mean
+            fitc_covar = fitc_output.covariance_matrix
+
+        torch.testing.assert_allclose(fitc_mean, ga_mean)
+        torch.testing.assert_allclose(ga_covar, fitc_covar)
+        torch.testing.assert_allclose(ga_covar_diag, fitc_output.variance)
+
+        with self.assertRaises(RuntimeError):
+            delazify(self.ga_kernel(self.X_in, self.X_out))
+
+        self.ga_kernel.eval()
+        ga_covar_train_test = delazify(self.ga_kernel(self.X_in, self.X_out))
+
+        self.assertTrue(ga_covar_train_test.shape == (len(self.X_in), len(self.X_out)))
+
+    def test_cache(self):
+        self.ga_kernel.train()
+        delazify(self.ga_kernel(self.X_in))
+        for cache in self.fitc_eval_cache.keys():
+            self.assertFalse(hasattr(self.ga_kernel, cache))
+        # Make sure that all the cached values are successfully stored
+        for cache, value in self.fitc_train_cache.items():
+            self.assertTrue(hasattr(self.ga_kernel, cache))
+            if cache == "_train_cached_posterior_mean":
+                torch.testing.assert_allclose(getattr(self.ga_kernel, cache), getattr(self.ga_kernel, value)(self.X_in))
+            else:
+                torch.testing.assert_allclose(getattr(self.ga_kernel, cache), getattr(self.ga_kernel, value))
+
+        self.ga_kernel.eval()
+        delazify(self.ga_kernel(self.X_in))
+        delazify(self.ga_mean(self.X_in))
+        for cache, value in self.fitc_eval_cache.items():
+            self.assertTrue(hasattr(self.ga_kernel, cache))
+            self.assertTrue(getattr(self.ga_kernel, cache) is getattr(self.ga_kernel, value))
+        for cache in self.fitc_train_cache:
+            self.assertFalse(hasattr(self.ga_kernel, cache))
+
+        self.ga_kernel.train()
+        for cache in self.fitc_eval_cache:
+            self.assertFalse(hasattr(self.ga_kernel, cache))
+        for cache in self.fitc_train_cache:
+            self.assertFalse(hasattr(self.ga_kernel, cache))
+
+    def test_copy(self):
+        # clear all the cache
+        self.ga_kernel.train()
+        gp_kernel_copy_1 = copy.deepcopy(self.ga_kernel)
+        # ga_kernel does not have the cached values thus they should all be empty
+        for cache in self.fitc_train_cache.keys():
+            self.assertFalse(hasattr(gp_kernel_copy_1, cache))
+        for cache in self.fitc_eval_cache.keys():
+            self.assertFalse(hasattr(gp_kernel_copy_1, cache))
+
+        delazify(self.ga_kernel(self.X_in))
+        gp_kernel_copy_2 = copy.deepcopy(self.ga_kernel)
+        for cache in self.fitc_train_cache.keys():
+            self.assertFalse(hasattr(gp_kernel_copy_2, cache))
+        for cache in self.fitc_eval_cache.keys():
+            self.assertFalse(hasattr(gp_kernel_copy_2, cache))
+
+        self.ga_kernel.eval()
+        delazify(self.ga_kernel(self.X_in))
+        delazify(self.ga_mean(self.X_in))
+
+        gp_kernel_copy_3 = copy.deepcopy(self.ga_kernel)
+        for cache, value in self.fitc_eval_cache.items():
+            self.assertTrue(hasattr(gp_kernel_copy_3, cache))
+        for cache in self.fitc_train_cache:
+            self.assertFalse(hasattr(gp_kernel_copy_3, cache))
diff --git a/tests/test_epm/test_gp.py b/tests/test_epm/test_gp.py
index 4f227ba9c..5ed6624c3 100644
--- a/tests/test_epm/test_gp.py
+++ b/tests/test_epm/test_gp.py
@@ -13,14 +13,14 @@
     convert_configurations_to_array,
 )
 from smac.epm.gaussian_process import GaussianProcess
-from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior
+from smac.epm.gaussian_process.utils.prior import HorseshoePrior, LognormalPrior
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
 def get_gp(n_dimensions, rs, noise=1e-3, normalize_y=True) -> GaussianProcess:
-    from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel
+    from smac.epm.gaussian_process.kernels import ConstantKernel, Matern, WhiteKernel
 
     cov_amp = ConstantKernel(
         2.0,
@@ -76,7 +76,12 @@ def get_cat_data(rs):
 
 
 def get_mixed_gp(cat_dims, cont_dims, rs, noise=1e-3, normalize_y=True):
-    from smac.epm.gp_kernels import ConstantKernel, HammingKernel, Matern, WhiteKernel
+    from smac.epm.gaussian_process.kernels import (
+        ConstantKernel,
+        HammingKernel,
+        Matern,
+        WhiteKernel,
+    )
 
     cat_dims = np.array(cat_dims, dtype=np.int)
     cont_dims = np.array(cont_dims, dtype=np.int)
diff --git a/tests/test_epm/test_gp_gpytorch.py b/tests/test_epm/test_gp_gpytorch.py
new file mode 100644
index 000000000..6cc5bfe4d
--- /dev/null
+++ b/tests/test_epm/test_gp_gpytorch.py
@@ -0,0 +1,341 @@
+import unittest.mock
+
+import numpy as np
+import pyro
+import torch
+from botorch.models.kernels.categorical import CategoricalKernel
+from gpytorch.constraints.constraints import Interval
+from gpytorch.kernels import MaternKernel, ProductKernel, ScaleKernel
+from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
+from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
+from gpytorch.models.exact_gp import ExactGP
+from gpytorch.priors import HorseshoePrior, LogNormalPrior, UniformPrior
+from gpytorch.utils.errors import NotPSDError
+
+from smac.configspace import (
+    CategoricalHyperparameter,
+    ConfigurationSpace,
+    UniformFloatHyperparameter,
+)
+from smac.epm.gaussian_process.gpytorch import GPyTorchGaussianProcess
+
+from .test_gp import TestGP, get_cat_data, get_cont_data
+
+torch.manual_seed(0)
+pyro.set_rng_seed(0)
+
+
+def get_gp(n_dimensions, rs, noise=None, normalize_y=True) -> GPyTorchGaussianProcess:
+    exp_kernel = MaternKernel(
+        2.5,
+        lengthscale_constraint=Interval(
+            torch.tensor(np.exp(-6.754111155189306).repeat(n_dimensions)),
+            torch.tensor(np.exp(0.0858637988771976).repeat(n_dimensions)),
+            transform=None,
+            initial_value=1.0,
+        ),
+        ard_num_dims=n_dimensions,
+        active_dims=torch.arange(n_dimensions),
+        lengthscale_prior=UniformPrior(np.exp(-6.754111155189306), np.exp(0.858637988771976)),
+    ).double()
+
+    kernel = ScaleKernel(
+        exp_kernel,
+        outputscale_constraint=Interval(np.exp(-10.0), np.exp(2.0), transform=None, initial_value=2.0),
+        outputscale_prior=LogNormalPrior(0.0, 1.0),
+    ).double()
+    if noise is None:
+        likelihood = None
+    else:
+        noise_prior = HorseshoePrior(0.1)
+        likelihood = GaussianLikelihood(
+            noise_prior=noise_prior, noise_constraint=Interval(np.exp(-25), np.exp(2), transform=None)
+        ).double()
+        likelihood.initialize(noise=noise)
+
+    bounds = [(0.0, 1.0) for _ in range(n_dimensions)]
+    types = np.zeros(n_dimensions)
+
+    configspace = ConfigurationSpace()
+    for i in range(n_dimensions):
+        configspace.add_hyperparameter(UniformFloatHyperparameter("x%d" % i, 0, 1))
+
+    model = GPyTorchGaussianProcess(
+        configspace=configspace,
+        bounds=bounds,
+        types=types,
+        kernel=kernel,
+        seed=rs.randint(low=1, high=10000),
+        normalize_y=normalize_y,
+        likelihood=likelihood,
+        n_opt_restarts=2,
+    )
+    return model
+
+
+def get_mixed_gp(cat_dims, cont_dims, rs, normalize_y=True):
+    cat_dims = np.array(cat_dims, dtype=np.int)
+    cont_dims = np.array(cont_dims, dtype=np.int)
+    n_dimensions = len(cat_dims) + len(cont_dims)
+    exp_kernel = MaternKernel(
+        2.5,
+        lengthscale_constraint=Interval(
+            torch.tensor(np.exp(-6.754111155189306).repeat(cont_dims.shape[-1])),
+            torch.tensor(np.exp(0.0858637988771976).repeat(cont_dims.shape[-1])),
+            transform=None,
+            initial_value=1.0,
+        ),
+        ard_num_dims=cont_dims.shape[-1],
+        active_dims=tuple(cont_dims),
+        lengthscale_prior=UniformPrior(np.exp(-6.754111155189306), np.exp(0.858637988771976)),
+    ).double()
+
+    ham_kernel = CategoricalKernel(
+        lengthscale_constraint=Interval(
+            torch.tensor(np.exp(-6.754111155189306).repeat(cat_dims.shape[-1])),
+            torch.tensor(np.exp(0.0858637988771976).repeat(cat_dims.shape[-1])),
+            transform=None,
+            initial_value=1.0,
+        ),
+        ard_num_dims=cat_dims.shape[-1],
+        active_dims=tuple(cat_dims),
+        lengthscale_prior=UniformPrior(np.exp(-6.754111155189306), np.exp(0.858637988771976)),
+    ).double()
+
+    kernel = ProductKernel(exp_kernel, ham_kernel)
+
+    kernel = ScaleKernel(
+        kernel,
+        outputscale_constraint=Interval(np.exp(-10.0), np.exp(2.0), transform=None, initial_value=2.0),
+        outputscale_prior=LogNormalPrior(0.0, 1.0),
+    )
+
+    bounds = [0] * n_dimensions
+    types = np.zeros(n_dimensions)
+    for c in cont_dims:
+        bounds[c] = (0.0, 1.0)
+    for c in cat_dims:
+        types[c] = 3
+        bounds[c] = (3, np.nan)
+
+    cs = ConfigurationSpace()
+    for c in cont_dims:
+        cs.add_hyperparameter(UniformFloatHyperparameter("X%d" % c, 0, 1))
+    for c in cat_dims:
+        cs.add_hyperparameter(CategoricalHyperparameter("X%d" % c, [0, 1, 2, 3]))
+
+    model = GPyTorchGaussianProcess(
+        configspace=cs,
+        bounds=bounds,
+        types=types,
+        kernel=kernel,
+        seed=rs.randint(low=1, high=10000),
+        normalize_y=normalize_y,
+    )
+    return model
+
+
+class TestGPGPyTorch(TestGP):
+    def test_gp_model(self):
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+        model = get_gp(n_dims, rs, normalize_y=True)
+        self.assertTrue(model.normalize_y)
+        self.assertIsNone(model.gp)
+        self.assertEqual(np.shape(model.hypers), (0,))
+        self.assertEqual(model.is_trained, False)
+        self.assertEqual(bool(model.property_dict), False)
+
+        mll = model._get_gp(X, Y)
+        self.assertIsInstance(mll, ExactMarginalLogLikelihood)
+        self.assertIsInstance(mll.model, ExactGP)
+
+    def test_likelihood(self):
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+        model = get_gp(n_dims, rs)
+        self.assertIsInstance(model.likelihood, GaussianLikelihood)
+        for prior in model.likelihood.named_priors():
+            self.assertIsInstance(prior[1].noise_prior, HorseshoePrior)
+
+        for constraint_name, constraint in model.likelihood.named_constraints():
+            self.assertIsInstance(constraint, Interval)
+            np.testing.assert_almost_equal(constraint.lower_bound.numpy(), torch.tensor(np.exp(-25)).numpy())
+            np.testing.assert_almost_equal(constraint.upper_bound.numpy(), torch.tensor(np.exp(2)).numpy())
+
+        self.assertEqual(torch.tensor([0.0]), model.likelihood.raw_noise.data)
+        noise_level = 1e-3
+        model = get_gp(n_dims, rs, noise=1e-3)
+        self.assertEqual(torch.tensor([noise_level]), model.likelihood.raw_noise.data)
+
+    def test_predict(self):
+        rs = np.random.RandomState(1)
+        # cont
+        X, Y, n_dims = get_cont_data(rs)
+        # cat
+        X, Y, cat_dims, cont_dims = get_cat_data(rs)
+
+        for model in (get_gp(n_dims, rs), get_mixed_gp(cat_dims, cont_dims, rs)):
+            model.train(X[:10], Y[:10])
+            m_hat, v_hat = model.predict(X[10:])
+            self.assertEqual(m_hat.shape, (10, 1))
+            self.assertEqual(v_hat.shape, (10, 1))
+
+    def test_train_do_optimize(self):
+        # Check that do_optimize does not mess with the kernel hyperparameters given to the Gaussian process!
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+
+        model = get_gp(n_dims, rs)
+        model._train(X[:10], Y[:10], do_optimize=False)
+
+        fixture = np.array([0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+        hypers = model.hypers
+
+        np.testing.assert_array_almost_equal(hypers, fixture)
+
+        model._train(X[:10], Y[:10], do_optimize=True)
+        hypers = model.hypers
+        self.assertFalse(np.any(hypers == fixture))
+
+    @unittest.mock.patch("gpytorch.module.Module.pyro_sample_from_prior")
+    def test_exception_1(self, fit_mock):
+        # Check that training will not continue sampling if pyro raises an error
+        class Dummy1:
+            counter = 0
+
+            def __call__(self):
+                self.counter += 1
+                raise RuntimeError("Unable to sample new cfgs")
+
+        fit_mock.side_effect = Dummy1()
+
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+
+        model = get_gp(n_dims, rs)
+        with self.assertRaises(RuntimeError):
+            model._train(X[:10], Y[:10], do_optimize=True)
+
+    @unittest.mock.patch("gpytorch.models.exact_gp.ExactGP.__init__")
+    def test_exception_2(self, fit_mock):
+        class Dummy2:
+            counter = 0
+
+            def __call__(self, train_inputs, train_targets, likelihood):
+                self.counter += 1
+                raise RuntimeError("Unable to initialize a new GP")
+
+        fit_mock.side_effect = Dummy2()
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+
+        model = get_gp(n_dims, rs)
+        with self.assertRaises(RuntimeError):
+            model._train(X[:10], Y[:10], do_optimize=False)
+        with self.assertRaises(RuntimeError):
+            model._get_gp(X[:10], Y[:10])
+
+    @unittest.mock.patch("gpytorch.mlls.exact_marginal_log_likelihood.ExactMarginalLogLikelihood.forward")
+    def test_train_continue_on_linalg_error(self, fit_mock):
+        # Check that training does not stop on a NotPSDError error, but that uncertainty is increased!
+        class Dummy:
+            counter = 0
+
+            def __call__(self, function_dist, target, *params):
+                if self.counter >= 13:
+                    return None
+                else:
+                    self.counter += 1
+                    raise NotPSDError
+
+        fit_mock.side_effect = Dummy()
+
+        rs = np.random.RandomState(1)
+        X, Y, n_dims = get_cont_data(rs)
+
+        model = get_gp(n_dims, rs)
+        model._train(X[:10], Y[:10], do_optimize=True)
+
+        fixture = np.array([0.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
+
+        hypers = model.hypers
+
+        np.testing.assert_array_almost_equal(hypers, fixture)
+
+    def test_predict_with_actual_values(self):
+        X = np.array(
+            [
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 1.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 1.0, 1.0],
+                [1.0, 0.0, 0.0],
+                [1.0, 0.0, 1.0],
+                [1.0, 1.0, 0.0],
+                [1.0, 1.0, 1.0],
+            ],
+            dtype=np.float64,
+        )
+        y = np.array([[0.1], [0.2], [9], [9.2], [100.0], [100.2], [109.0], [109.2]], dtype=np.float64)
+        rs = np.random.RandomState(1)
+        model = get_gp(3, rs)
+        model.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y)))
+
+        self.assertEqual(model.is_trained, True)
+
+        mu_hat, var_hat = model.predict(X)
+        for y_i, y_hat_i, mu_hat_i in zip(
+            y.reshape((1, -1)).flatten(),
+            mu_hat.reshape((1, -1)).flatten(),
+            var_hat.reshape((1, -1)).flatten(),
+        ):
+            self.assertAlmostEqual(y_hat_i, y_i, delta=2)
+            self.assertAlmostEqual(mu_hat_i, 0, delta=2)
+
+        # Regression test that performance does not drastically decrease in the near future
+        mu_hat, var_hat = model.predict(np.array([[10.0, 10.0, 10.0]]))
+
+        self.assertAlmostEqual(mu_hat[0][0], 54.612500000000004)
+        # There's a slight difference between my local installation and travis
+        self.assertLess(abs(var_hat[0][0] - 1017.1374468449195), 15)
+
+        # test other covariance results
+        _, var_fc = model.predict(X, cov_return_type="full_cov")
+        self.assertEqual(var_fc.shape, (8, 8))
+        _, var_sd = model.predict(X, cov_return_type="diagonal_std")
+        self.assertEqual(var_sd.shape, (8, 1))
+        _, var_no = model.predict(np.array([[10.0, 10.0, 10.0]]), cov_return_type=None)
+        self.assertIsNone(var_no)
+        # check values
+        _, var_fc = model.predict(np.array([[10.0, 10.0, 10.0]]), cov_return_type="full_cov")
+        self.assertAlmostEqual(var_fc[0][0], var_hat[0][0])
+        _, var_sd = model.predict(np.array([[10.0, 10.0, 10.0]]), cov_return_type="diagonal_std")
+        self.assertAlmostEqual(var_sd[0][0] ** 2, var_hat[0][0])
+
+        _, var_fc = model.predict(np.array([[10.0, 10.0, 10.0], [5.0, 5.0, 5.0]]), cov_return_type="full_cov")
+        self.assertEqual(var_fc.shape, (2, 2))
+
+    def test_normalization(self):
+        super(TestGPGPyTorch, self).test_normalization()
+
+    def test_sampling_shape(self):
+        X = np.arange(-5, 5, 0.1).reshape((-1, 1))
+        X_test = np.arange(-5.05, 5.05, 0.1).reshape((-1, 1))
+        for shape in (None, (-1, 1)):
+
+            if shape is None:
+                y = np.sin(X).flatten()
+            else:
+                y = np.sin(X).reshape(shape)
+
+            rng = np.random.RandomState(1)
+            for gp in (
+                get_gp(n_dimensions=1, rs=rng, noise=1e-10, normalize_y=False),
+                get_gp(n_dimensions=1, rs=rng, noise=1e-10, normalize_y=True),
+            ):
+                gp._train(X, y)
+                func = gp.sample_functions(X_test=X_test, n_funcs=1)
+                self.assertEqual(func.shape, (101, 1), msg=shape)
+                func = gp.sample_functions(X_test=X_test, n_funcs=2)
+                self.assertEqual(func.shape, (101, 2))
diff --git a/tests/test_epm/test_gp_mcmc.py b/tests/test_epm/test_gp_mcmc.py
index 9f0afad8f..7d18bf853 100644
--- a/tests/test_epm/test_gp_mcmc.py
+++ b/tests/test_epm/test_gp_mcmc.py
@@ -5,15 +5,15 @@
 import sklearn.model_selection
 
 from smac.configspace import ConfigurationSpace, UniformFloatHyperparameter
-from smac.epm.gaussian_process_mcmc import GaussianProcessMCMC
-from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior
+from smac.epm.gaussian_process.mcmc import MCMCGaussianProcess
+from smac.epm.gaussian_process.utils.prior import HorseshoePrior, LognormalPrior
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
 
 
 def get_gp(n_dimensions, rs, noise=1e-3, normalize_y=True, average_samples=False, n_iter=50):
-    from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel
+    from smac.epm.gaussian_process.kernels import ConstantKernel, Matern, WhiteKernel
 
     cov_amp = ConstantKernel(
         2.0,
@@ -44,7 +44,7 @@ def get_gp(n_dimensions, rs, noise=1e-3, normalize_y=True, average_samples=False
     for i in range(n_dimensions):
         configspace.add_hyperparameter(UniformFloatHyperparameter("x%d" % i, 0, 1))
 
-    model = GaussianProcessMCMC(
+    model = MCMCGaussianProcess(
         configspace=configspace,
         types=types,
         bounds=bounds,
@@ -124,7 +124,7 @@ def test_predict(self):
         self.assertEqual(m_hat.shape, (10, 1))
         self.assertEqual(v_hat.shape, (10, 1))
 
-    @unittest.mock.patch.object(GaussianProcessMCMC, "predict")
+    @unittest.mock.patch.object(MCMCGaussianProcess, "predict")
     def test_predict_marginalized_over_instances_no_features(self, rf_mock):
         """The GP should fall back to the regular predict() method."""
 
diff --git a/tests/test_epm/test_gp_priors.py b/tests/test_epm/test_gp_priors.py
index 86cd345b3..e03105647 100644
--- a/tests/test_epm/test_gp_priors.py
+++ b/tests/test_epm/test_gp_priors.py
@@ -4,7 +4,7 @@
 import numpy as np
 import scipy.optimize
 
-from smac.epm.gp_base_prior import (
+from smac.epm.gaussian_process.utils.prior import (
     GammaPrior,
     HorseshoePrior,
     LognormalPrior,
diff --git a/tests/test_epm/test_lgpga.py b/tests/test_epm/test_lgpga.py
new file mode 100644
index 000000000..8c79aeffb
--- /dev/null
+++ b/tests/test_epm/test_lgpga.py
@@ -0,0 +1,240 @@
+from typing import Tuple
+
+import unittest.mock
+
+import gpytorch
+import numpy as np
+import pyro
+import torch
+from gpytorch.constraints.constraints import Interval
+from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
+from gpytorch.priors import HorseshoePrior
+
+from smac.configspace import ConfigurationSpace, UniformFloatHyperparameter
+from smac.epm.gaussian_process.augmented import (
+    AugmentedLocalGaussianProcess,
+    GloballyAugmentedLocalGaussianProcess,
+)
+from smac.epm.gaussian_process.gpytorch import ExactGPModel
+
+from .test_boing_kernel import generate_kernel, generate_test_data
+from .test_gp_gpytorch import TestGPGPyTorch
+
+torch.manual_seed(0)
+pyro.set_rng_seed(0)
+
+
+def generate_lgpga(
+    kernel, n_dimensions, rs, noise=None, num_inducing=2, normalize_y=True
+) -> Tuple[GloballyAugmentedLocalGaussianProcess, ConfigurationSpace]:
+    if noise is None:
+        likelihood = None
+    else:
+        noise_prior = HorseshoePrior(0.1)
+        likelihood = GaussianLikelihood(
+            noise_prior=noise_prior, noise_constraint=Interval(np.exp(-25), np.exp(2), transform=None)
+        ).double()
+        likelihood.initialize(noise=noise)
+
+    bounds = [(0.0, 1.0) for _ in range(n_dimensions)]
+    types = np.zeros(n_dimensions)
+
+    configspace = ConfigurationSpace()
+    for i in range(n_dimensions):
+        configspace.add_hyperparameter(UniformFloatHyperparameter("x%d" % i, 0, 1))
+
+    model = GloballyAugmentedLocalGaussianProcess(
+        configspace=configspace,
+        bounds=bounds,
+        types=types,
+        bounds_cont=np.array(bounds),
+        bounds_cat=[],
+        kernel=kernel,
+        seed=rs.randint(low=1, high=10000),
+        normalize_y=normalize_y,
+        likelihood=likelihood,
+        n_opt_restarts=2,
+        num_inducing_points=num_inducing,
+    )
+    return model, configspace
+
+
+class TestLGPGA(TestGPGPyTorch):
+    def setUp(self) -> None:
+        rs = np.random.RandomState(1)
+        num_dims = 5
+        X_in, Y_in, X_out, Y_out, _ = generate_test_data(rs, num_dims=num_dims, expand_bound=True)
+        self.kernel = generate_kernel(num_dims)
+        self.num_dims = num_dims
+        self.X_in = X_in
+        self.Y_in = Y_in
+        self.X_out = X_out
+        self.Y_out = Y_out
+
+        self.X_all = np.vstack([self.X_in, self.X_out])
+        self.Y_all = np.hstack([self.Y_in, self.Y_out])
+        self.gp_model, self.cs = generate_lgpga(self.kernel, n_dimensions=num_dims, rs=rs)
+
+    def test_init(self):
+        np.testing.assert_equal(self.gp_model.cont_dims, np.arange(len(self.cs.get_hyperparameters())))
+        np.testing.assert_equal(self.gp_model.cat_dims, np.array([]))
+
+    def test_update_attribute(self):
+        rs = np.random.RandomState(1)
+        num_dims = 5
+        num_inducing = 2
+        gp_model, _ = generate_lgpga(self.kernel, n_dimensions=num_dims, num_inducing=2, rs=rs)
+        self.assertTrue(gp_model.num_inducing_points, num_inducing)
+        num_inducing = 4
+
+        gp_model.update_attribute(num_inducing_points=num_inducing)
+        self.assertTrue(gp_model.num_inducing_points, num_inducing)
+
+        with self.assertRaises(AttributeError):
+            gp_model.update_attribute(unknown_param=1)
+
+    def test_get_gp(self):
+        self.assertIsNone(self.gp_model.gp)
+        self.gp_model._get_gp(self.X_in, self.Y_in)
+        self.assertIsInstance(self.gp_model.gp_model, ExactGPModel)
+
+        self.gp_model._get_gp(self.X_in, self.Y_in, self.X_out, self.Y_out)
+        self.assertIsInstance(self.gp_model.gp_model, AugmentedLocalGaussianProcess)
+
+        # num_outer is not enough, we return to a vanilla GP model
+        self.gp_model._train(self.X_in, self.Y_in, do_optimize=False)
+        self.assertIsInstance(self.gp_model.gp_model, ExactGPModel)
+
+        self.gp_model._train(self.X_all, self.Y_all, do_optimize=False)
+        self.assertIsInstance(self.gp_model.gp_model, AugmentedLocalGaussianProcess)
+        self.assertFalse(self.gp_model.gp_model.augmented)
+        self.assertFalse(hasattr(self.gp_model.gp_model, "covar_module"))
+
+    def test_normalize(self):
+        self.gp_model._train(self.X_all, self.Y_all, do_optimize=False)
+        y_in_mean = np.mean(self.Y_in)
+        y_in_std = np.std(self.Y_in)
+        np.testing.assert_allclose(self.gp_model.gp_model.y_out.numpy(), (self.Y_out - y_in_mean) / y_in_std)
+
+        rs = np.random.RandomState(1)
+        model_unnormalize, configspace = generate_lgpga(self.kernel, self.num_dims, rs, normalize_y=False)
+        model_unnormalize._train(self.X_all, self.Y_all, do_optimize=False)
+        np.testing.assert_allclose(model_unnormalize.gp_model.y_out.numpy(), self.Y_out)
+
+    def test_augmented_gp(self):
+        X_in = torch.from_numpy(self.X_in)
+        Y_in = torch.from_numpy(self.Y_in)
+        X_out = torch.from_numpy(self.X_out)
+        Y_out = torch.from_numpy(self.Y_out)
+
+        augmented_gp = AugmentedLocalGaussianProcess(
+            X_in, Y_in, X_out, Y_out, self.gp_model.likelihood, self.kernel
+        ).double()
+        exact_gp = ExactGPModel(X_in, Y_in, self.kernel, self.gp_model.likelihood).double()
+
+        # if augmented_gp.augmented is false, it should behave the same as an exact gp
+        output_agp = augmented_gp(X_in)
+        output_exact_gp = exact_gp(X_in)
+        torch.testing.assert_allclose(output_agp.mean, output_exact_gp.mean)
+        torch.testing.assert_allclose(output_agp.covariance_matrix, output_exact_gp.covariance_matrix)
+
+        augmented_gp.eval()
+        exact_gp.eval()
+        output_agp = augmented_gp(X_out)
+        output_exact_gp = exact_gp(X_out)
+
+        torch.testing.assert_allclose(output_agp.mean, output_exact_gp.mean)
+        torch.testing.assert_allclose(output_agp.covariance_matrix, output_exact_gp.covariance_matrix)
+
+        # now augmentd_gp is augmented with inducing points, it no longer provides the same output as exact gp
+        augmented_gp.set_augment_module(X_inducing=torch.ones([1, self.num_dims]))
+        augmented_gp.eval()
+        output_agp = augmented_gp(X_out)
+        self.assertFalse(torch.equal(output_agp.mean, output_exact_gp.mean))
+        self.assertFalse(torch.equal(output_agp.covariance_matrix, output_exact_gp.covariance_matrix))
+
+    @unittest.mock.patch("gpytorch.models.exact_gp.ExactGP.__init__")
+    def test_exception(self, fit_mock):
+        # Check that training will not continue sampling if pyro raises an error
+        class Dummy:
+            counter = 0
+
+            def __call__(self):
+                self.counter += 1
+                raise RuntimeError("Unable to sample new cfgs")
+
+        fit_mock.side_effect = Dummy()
+
+        with self.assertRaises(RuntimeError):
+            self.gp_model._train(self.X_all, self.Y_all, do_optimize=True)
+
+    def test_predict_with_actual_values(self):
+        self.gp_model._train(self.X_all, self.Y_all, do_optimize=False)
+        self.assertFalse(hasattr(self.gp_model.gp_model, "covar_module"))
+
+        self.gp_model._train(self.X_all, self.Y_all, do_optimize=True)
+        self.assertTrue(hasattr(self.gp_model.gp_model, "covar_module"))
+
+        X = np.array(
+            [
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 1.0],
+                [0.0, 1.0, 0.0],
+                [0.0, 1.0, 1.0],
+                [1.0, 0.0, 0.0],
+                [1.0, 0.0, 1.0],
+                [1.0, 1.0, 0.0],
+                [1.0, 1.0, 1.0],
+                [-1.0, -1.0, -1.0],
+                [-1.0, -1.0, 2.0],
+                [-1.0, 2.0, -1.0],
+                [-1.0, 2.0, 2.0],
+                [2.0, -1.0, -1.0],
+                [2.0, -1.0, 2.0],
+                [2.0, 2.0, -1.0],
+                [2.0, 2.0, 2.0],
+            ],
+            dtype=np.float64,
+        )
+        y = np.array(
+            [
+                [0.1],
+                [0.2],
+                [9],
+                [9.2],
+                [100.0],
+                [100.2],
+                [109.0],
+                [109.2],
+                [1.0],
+                [1.2],
+                [14.0],
+                [14.2],
+                [110.0],
+                [111.2],
+                [129.0],
+                [129.2],
+            ],
+            dtype=np.float64,
+        )
+        rs = np.random.RandomState(1)
+        num_inducing = 4
+        model, _ = generate_lgpga(kernel=generate_kernel(3), n_dimensions=3, rs=rs, num_inducing=num_inducing)
+        model.train(np.vstack((X, X, X)), np.vstack((y, y, y)))
+
+        self.assertEqual(model.is_trained, True)
+
+        self.assertTrue(hasattr(self.gp_model.gp_model, "covar_module"))
+        mu_hat, var_hat = model.predict(np.array([[0.5, 0.5, 0.5]]))
+
+        self.assertAlmostEqual(mu_hat[0][0], 54.612500000000004)
+        # There's a slight difference between my local installation and travis
+        self.assertLess(abs(var_hat[0][0] - 1026.149240121437), 15)
+
+    def test_varitional_inference(self):
+        # test taht variational inference is actually called
+        # https://github.com/cornellius-gp/gpytorch/blob/master/test/kernels/test_inducing_point_kernel.py#L45
+        _wrapped_ps = unittest.mock.MagicMock(wraps=gpytorch.variational.TrilNaturalVariationalDistribution)
+        with unittest.mock.patch("gpytorch.variational.TrilNaturalVariationalDistribution", new=_wrapped_ps) as rf_mock:
+            self.gp_model._train(self.X_all, self.Y_all, do_optimize=True)
+            self.assertTrue(rf_mock.called)
diff --git a/tests/test_epm/test_rf_with_instances.py b/tests/test_epm/test_rf_with_instances.py
index f545858f6..931d1c4d1 100644
--- a/tests/test_epm/test_rf_with_instances.py
+++ b/tests/test_epm/test_rf_with_instances.py
@@ -12,8 +12,8 @@
 
 import smac
 import smac.configspace
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.util_funcs import get_types
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import get_types
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
diff --git a/tests/test_epm/test_uncorrelated_mo_rf_with_instances.py b/tests/test_epm/test_uncorrelated_mo_rf_with_instances.py
index e3d44cee1..890b16bf0 100644
--- a/tests/test_epm/test_uncorrelated_mo_rf_with_instances.py
+++ b/tests/test_epm/test_uncorrelated_mo_rf_with_instances.py
@@ -4,10 +4,8 @@
 import numpy as np
 
 import smac.configspace
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.uncorrelated_mo_rf_with_instances import (
-    UncorrelatedMultiObjectiveRandomForestWithInstances,
-)
+from smac.epm.random_forest.rf_mo import MultiObjectiveRandomForest
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -24,7 +22,7 @@ def test_train_and_predict_with_rf(self):
         rs = np.random.RandomState(1)
         X = rs.rand(20, 10)
         Y = rs.rand(10, 2)
-        model = UncorrelatedMultiObjectiveRandomForestWithInstances(
+        model = MultiObjectiveRandomForest(
             configspace=self._get_cs(10),
             target_names=["cost", "ln(runtime)"],
             types=np.zeros((10,), dtype=np.uint),
@@ -74,7 +72,7 @@ def __call__(self, X):
         rs = np.random.RandomState(1)
         X = rs.rand(20, 10)
         Y = rs.rand(10, 3)
-        model = UncorrelatedMultiObjectiveRandomForestWithInstances(
+        model = MultiObjectiveRandomForest(
             target_names=["cost", "ln(runtime)", "foo"],
             configspace=self._get_cs(10),
             types=np.zeros((10,), dtype=np.uint),
diff --git a/tests/test_epm/test_util_funcs.py b/tests/test_epm/test_util_funcs.py
index 1eeef1b31..476ddc589 100644
--- a/tests/test_epm/test_util_funcs.py
+++ b/tests/test_epm/test_util_funcs.py
@@ -10,7 +10,7 @@
     UniformIntegerHyperparameter,
 )
 
-from smac.epm.util_funcs import get_types
+from smac.epm.utils import check_subspace_points, get_types
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -61,3 +61,55 @@ def test_get_types_with_inactive(self):
         np.testing.assert_array_equal(bounds[4], [0, 2])
         self.assertEqual(bounds[5][0], 3)
         self.assertFalse(np.isfinite(bounds[5][1]))
+
+    def test_check_subspace_points(self):
+        # 1D array
+        np.testing.assert_equal([True], check_subspace_points(np.array([0.5, 0.5])))
+        bounds_cont_base = np.array([0.0, 1.0])
+        X_cont = np.array([[0.5, 0.8], [-0.2, 0.2], [-0.7, 0.3]])
+        np.testing.assert_equal(check_subspace_points(X_cont), [True] * len(X_cont))
+        cont_dims = np.arange(X_cont.shape[-1])
+
+        with self.assertRaises(ValueError):
+            # bounds_cont missing
+            check_subspace_points(X_cont, cont_dims=cont_dims)
+
+        with self.assertRaises(ValueError):
+            # bounds_cont does not match
+            check_subspace_points(X_cont, cont_dims=cont_dims, bounds_cont=bounds_cont_base)
+
+        bounds_cont = np.tile(bounds_cont_base, [X_cont.shape[-1], 1])
+
+        np.testing.assert_equal(
+            check_subspace_points(X_cont, cont_dims=cont_dims, bounds_cont=bounds_cont), [True, False, False]
+        )
+        np.testing.assert_equal(
+            check_subspace_points(X_cont, cont_dims=cont_dims, bounds_cont=bounds_cont, expand_bound=True),
+            [True, True, False],
+        )
+
+        # categorical hps
+        X_cat = np.array([[0, 1], [2, 1], [1, 4]])
+        cat_dims = np.arange(X_cat.shape[-1])
+
+        bounds_cat = [(0, 2), (1, 4)]
+
+        with self.assertRaises(ValueError):
+            # bounds_cont missing
+            check_subspace_points(X_cat, cat_dims=cat_dims)
+
+        with self.assertRaises(ValueError):
+            # bounds_cat doe not match
+            check_subspace_points(X_cat, cat_dims=cat_dims, bounds_cat=[(0, 1)])
+
+        np.testing.assert_equal(
+            check_subspace_points(X_cat, cat_dims=cat_dims, bounds_cat=bounds_cat), [True, True, False]
+        )
+
+        # cat + cont
+        X_mix = np.hstack([X_cont, X_cat])
+        cat_dims += len(cont_dims)
+        ss_mix = check_subspace_points(
+            X_mix, cont_dims=cont_dims, cat_dims=cat_dims, bounds_cont=bounds_cont, bounds_cat=bounds_cat
+        )
+        np.testing.assert_equal(ss_mix, [True, False, False])
diff --git a/tests/test_facade/test_boing_facade.py b/tests/test_facade/test_boing_facade.py
new file mode 100644
index 000000000..07658e137
--- /dev/null
+++ b/tests/test_facade/test_boing_facade.py
@@ -0,0 +1,61 @@
+import shutil
+import unittest
+from contextlib import suppress
+
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.conditions import EqualsCondition
+from ConfigSpace.forbidden import (
+    ForbiddenAndConjunction,
+    ForbiddenEqualsClause,
+    ForbiddenInClause,
+)
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+from smac.facade.smac_boing_facade import SMAC4BOING
+from smac.optimizer.configuration_chooser.boing_chooser import BOinGChooser
+from smac.scenario.scenario import Scenario
+
+
+def rosenbrock_2d(x):
+    x0 = x["x0"]
+    x1 = x["x1"]
+    return 100.0 * (x1 - x0**2.0) ** 2.0 + (1 - x0) ** 2.0
+
+
+class TestSMAC4BOinGFacade(unittest.TestCase):
+    def setUp(self) -> None:
+        cs = ConfigurationSpace()
+        x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3)
+        x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4)
+        x2 = CategoricalHyperparameter("x2", [0, 1], default_value=0)
+        x3 = UniformFloatHyperparameter("x3", -5, 10, default_value=-4)
+        cs.add_hyperparameters([x0, x1, x2, x3])
+        cs.add_condition(EqualsCondition(x3, x2, 0))
+        cs.add_forbidden_clause(ForbiddenAndConjunction(ForbiddenInClause(x2, [0, 1]), ForbiddenEqualsClause(x0, 0.1)))
+        # Scenario object
+        scenario = Scenario({"run_obj": "quality", "runcount-limit": 10, "cs": cs, "deterministic": "true"})
+        self.scenario = scenario
+        self.output_dirs = []
+
+    def tearDown(self):
+        shutil.rmtree("run_1", ignore_errors=True)
+        for i in range(20):
+            with suppress(Exception):
+                dirname = "run_1" + (".OLD" * i)
+                shutil.rmtree(dirname)
+        for output_dir in self.output_dirs:
+            if output_dir:
+                shutil.rmtree(output_dir, ignore_errors=True)
+
+    def test_smac4boing(self):
+
+        smac = SMAC4BOING(
+            scenario=self.scenario,
+            tae_runner=rosenbrock_2d,
+        )
+        smac.optimize()
+        self.assertIsInstance(smac.solver.epm_chooser, BOinGChooser)
+        self.output_dirs.append(smac.scenario.output_dir)
diff --git a/tests/test_facade/test_hydra_facade.py b/tests/test_facade/test_hydra_facade.py
index f0c405557..c97ab2926 100644
--- a/tests/test_facade/test_hydra_facade.py
+++ b/tests/test_facade/test_hydra_facade.py
@@ -27,7 +27,7 @@ def __init__(self, **kwargs):
         MOCKCALLS += 1
 
     def optimize(self):
-        return np.array(self.scenario.cs.sample_configuration(self.n_optimizers))
+        return np.array(self.scenario.cs.sample_configuration(self.n_workers))
 
     def get_best_incumbents_ids(self, incs):
         cost_per_conf_v = cost_per_conf_e = {}
diff --git a/tests/test_facade/test_psmac_facade.py b/tests/test_facade/test_psmac_facade.py
index b369e523f..0ac3b2558 100644
--- a/tests/test_facade/test_psmac_facade.py
+++ b/tests/test_facade/test_psmac_facade.py
@@ -1,3 +1,4 @@
+import os
 import glob
 import shutil
 import unittest
@@ -22,30 +23,36 @@ def run(self):  # mock call such that we don't have to test with real algorithm
 
 class TestPSMACFacade(unittest.TestCase):
     def setUp(self):
+        base_directory = os.path.split(__file__)[0]
+        base_directory = os.path.abspath(os.path.join(base_directory, "../../tests", ".."))
+        os.chdir(base_directory)
         self.output_dirs = []
         fn = "tests/test_files/spear_hydra_test_scenario.txt"
+        fn = "tests/test_files/test_deterministic_scenario.txt"
         self.scenario = Scenario(fn)
         self.scenario.limit_resources = True
 
     @patch("smac.facade.smac_ac_facade.SMBO", new=MockSMBO)
     def test_psmac(self):
-        # TODO: Fix tests
-        """
         import joblib
-        from smac.facade.experimental.psmac_facade import PSMAC
-
-        with joblib.parallel_backend("multiprocessing", n_jobs=1):
-            optimizer = PSMAC(self.scenario, n_optimizers=3, n_incs=2, validate=False)
-            incs = optimizer.optimize()
-            self.assertEqual(len(incs), 2)
-            optimizer = PSMAC(self.scenario, n_optimizers=1, n_incs=4, validate=False)
-            incs = optimizer.optimize()
-            self.assertEqual(len(incs), 2)
-            optimizer = PSMAC(self.scenario, n_optimizers=5, n_incs=4, validate=False)
-            incs = optimizer.optimize()
-            self.assertEqual(len(incs), 4)
-        """
-        ...
+        from smac.facade.psmac_facade import PSMAC
+        from smac.facade.smac_ac_facade import SMAC4AC
+        from smac.facade.smac_bb_facade import SMAC4BB
+        from smac.facade.smac_hpo_facade import SMAC4HPO
+        from smac.facade.smac_mf_facade import SMAC4MF
+
+        facades = [None, SMAC4AC, SMAC4BB, SMAC4HPO, SMAC4MF]
+        n_workers_list = [1, 2, 3, 4]
+        n_facades = len(facades)
+        target = {"x1": 7.290709845323256, "x2": 10.285684762665337}
+        for i, facade in enumerate(facades):
+            for j, n_workers in enumerate(n_workers_list):
+                idx = n_facades * i + j
+                with self.subTest(i=idx):
+                    with joblib.parallel_backend("multiprocessing", n_jobs=1):
+                        optimizer = PSMAC(self.scenario, facade_class=facade, n_workers=n_workers, validate=False)
+                        inc = optimizer.optimize()
+                        self.assertDictEqual(target, dict(inc))
 
     def tearDown(self):
         hydras = glob.glob1(".", "psmac*")
diff --git a/tests/test_facade/test_smac_facade.py b/tests/test_facade/test_smac_facade.py
index 39fa75f40..51fe23758 100644
--- a/tests/test_facade/test_smac_facade.py
+++ b/tests/test_facade/test_smac_facade.py
@@ -11,11 +11,9 @@
 from smac.callbacks import IncorporateRunResultCallback
 from smac.configspace import ConfigurationSpace
 from smac.epm.random_epm import RandomEPM
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.uncorrelated_mo_rf_with_instances import (
-    UncorrelatedMultiObjectiveRandomForestWithInstances,
-)
-from smac.epm.util_funcs import get_rng
+from smac.epm.random_forest.rf_mo import MultiObjectiveRandomForest
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import get_rng
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.initial_design.default_configuration_design import DefaultConfiguration
 from smac.initial_design.factorial_design import FactorialInitialDesign
@@ -27,7 +25,10 @@
 from smac.intensification.intensification import Intensifier
 from smac.intensification.successive_halving import SuccessiveHalving
 from smac.optimizer.acquisition import EI, EIPS, LCB
-from smac.optimizer.random_configuration_chooser import ChooserNoCoolDown, ChooserProb
+from smac.optimizer.configuration_chooser.random_chooser import (
+    ChooserNoCoolDown,
+    ChooserProb,
+)
 from smac.runhistory.runhistory import RunHistory
 from smac.runhistory.runhistory2epm import (
     RunHistory2EPM4Cost,
@@ -316,19 +317,19 @@ def test_init_EIPS_as_arguments(self):
             self.scenario.run_obj = objective
             smbo = SMAC4AC(
                 self.scenario,
-                model=UncorrelatedMultiObjectiveRandomForestWithInstances,
+                model=MultiObjectiveRandomForest,
                 model_kwargs={"target_names": ["a", "b"], "model_kwargs": {"seed": 1}},
                 acquisition_function=EIPS,
                 runhistory2epm=RunHistory2EPM4EIPS,
             ).solver
             self.assertIsInstance(
                 smbo.epm_chooser.model,
-                UncorrelatedMultiObjectiveRandomForestWithInstances,
+                MultiObjectiveRandomForest,
             )
             self.assertIsInstance(smbo.epm_chooser.acquisition_func, EIPS)
             self.assertIsInstance(
                 smbo.epm_chooser.acquisition_func.model,
-                UncorrelatedMultiObjectiveRandomForestWithInstances,
+                MultiObjectiveRandomForest,
             )
             self.assertIsInstance(smbo.epm_chooser.rh2EPM, RunHistory2EPM4EIPS)
 
@@ -404,7 +405,7 @@ def test_check_random_states(self, patch):
         self.assertEqual(run_id, 2505)
         self.assertIs(rng_1, rs)
 
-    @unittest.mock.patch("smac.optimizer.ei_optimization.get_one_exchange_neighbourhood")
+    @unittest.mock.patch("smac.optimizer.acquisition.maximizer.get_one_exchange_neighbourhood")
     def test_check_deterministic_rosenbrock(self, patch):
 
         # Make SMAC a bit faster
diff --git a/tests/test_files/example_run/configspace.json b/tests/test_files/example_run/configspace.json
new file mode 100644
index 000000000..ebe9cb128
--- /dev/null
+++ b/tests/test_files/example_run/configspace.json
@@ -0,0 +1,24 @@
+{
+  "hyperparameters": [
+    {
+      "name": "x0",
+      "type": "uniform_float",
+      "log": false,
+      "lower": -5.0,
+      "upper": 10.0,
+      "default": -3.0
+    },
+    {
+      "name": "x1",
+      "type": "uniform_float",
+      "log": false,
+      "lower": -5.0,
+      "upper": 10.0,
+      "default": -4.0
+    }
+  ],
+  "conditions": [],
+  "forbiddens": [],
+  "python_module_version": "0.5.0",
+  "json_format_version": 0.3
+}
\ No newline at end of file
diff --git a/tests/test_files/example_run/configspace.pcs b/tests/test_files/example_run/configspace.pcs
new file mode 100644
index 000000000..149264b36
--- /dev/null
+++ b/tests/test_files/example_run/configspace.pcs
@@ -0,0 +1,2 @@
+x0 real [-5.0, 10.0] [-3.0]
+x1 real [-5.0, 10.0] [-4.0]
\ No newline at end of file
diff --git a/tests/test_files/example_run/runhistory.json b/tests/test_files/example_run/runhistory.json
new file mode 100644
index 000000000..8d03b198d
--- /dev/null
+++ b/tests/test_files/example_run/runhistory.json
@@ -0,0 +1,467 @@
+{
+  "data": [
+    [
+      [
+        1,
+        null,
+        0,
+        0.0
+      ],
+      [
+        35306.427514114344,
+        7.3909759521484375e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.286752,
+        1656580052.2867658,
+        {}
+      ]
+    ],
+    [
+      [
+        2,
+        null,
+        0,
+        0.0
+      ],
+      [
+        4790.9918833769925,
+        9.298324584960938e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.2892885,
+        1656580052.2893078,
+        {}
+      ]
+    ],
+    [
+      [
+        4,
+        null,
+        0,
+        0.0
+      ],
+      [
+        16916.0,
+        8.821487426757812e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.2927234,
+        1656580052.2927403,
+        {}
+      ]
+    ],
+    [
+      [
+        6,
+        null,
+        0,
+        0.0
+      ],
+      [
+        10608.016312533568,
+        1.2874603271484375e-05,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.2975984,
+        1656580052.2976253,
+        {}
+      ]
+    ],
+    [
+      [
+        8,
+        null,
+        0,
+        0.0
+      ],
+      [
+        353670.81639177905,
+        8.58306884765625e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.3019018,
+        1656580052.30192,
+        {}
+      ]
+    ],
+    [
+      [
+        9,
+        null,
+        0,
+        0.0
+      ],
+      [
+        586130.2616971313,
+        7.867813110351562e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580052.3053734,
+        1656580052.3053882,
+        {}
+      ]
+    ],
+    [
+      [
+        11,
+        null,
+        0,
+        0.0
+      ],
+      [
+        3069.4098218212166,
+        7.62939453125e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580053.0159395,
+        1656580053.0159583,
+        {}
+      ]
+    ],
+    [
+      [
+        13,
+        null,
+        0,
+        0.0
+      ],
+      [
+        3069.4378507187926,
+        7.867813110351562e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580053.0301464,
+        1656580053.0301633,
+        {}
+      ]
+    ],
+    [
+      [
+        14,
+        null,
+        0,
+        0.0
+      ],
+      [
+        22453.13089714366,
+        9.5367431640625e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580054.1407666,
+        1656580054.1407883,
+        {}
+      ]
+    ],
+    [
+      [
+        17,
+        null,
+        0,
+        0.0
+      ],
+      [
+        1122.0405239677336,
+        1.239776611328125e-05,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580055.4874706,
+        1656580055.4875,
+        {}
+      ]
+    ],
+    [
+      [
+        18,
+        null,
+        0,
+        0.0
+      ],
+      [
+        1114.7602703228965,
+        1.4066696166992188e-05,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580055.5124798,
+        1656580055.51251,
+        {}
+      ]
+    ],
+    [
+      [
+        19,
+        null,
+        0,
+        0.0
+      ],
+      [
+        1125.9942517107133,
+        7.152557373046875e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580055.5299509,
+        1656580055.529967,
+        {}
+      ]
+    ],
+    [
+      [
+        20,
+        null,
+        0,
+        0.0
+      ],
+      [
+        276.63099176419354,
+        7.3909759521484375e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580056.6161146,
+        1656580056.6161325,
+        {}
+      ]
+    ],
+    [
+      [
+        22,
+        null,
+        0,
+        0.0
+      ],
+      [
+        292.9345783569001,
+        1.2874603271484375e-05,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580056.638499,
+        1656580056.6385262,
+        {}
+      ]
+    ],
+    [
+      [
+        23,
+        null,
+        0,
+        0.0
+      ],
+      [
+        16.631655546321536,
+        8.58306884765625e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580057.808115,
+        1656580057.8081357,
+        {}
+      ]
+    ],
+    [
+      [
+        25,
+        null,
+        0,
+        0.0
+      ],
+      [
+        16.655482927330727,
+        7.62939453125e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580057.8390152,
+        1656580057.8390324,
+        {}
+      ]
+    ],
+    [
+      [
+        26,
+        null,
+        0,
+        0.0
+      ],
+      [
+        5921.25150782499,
+        7.62939453125e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580059.0303519,
+        1656580059.0303702,
+        {}
+      ]
+    ],
+    [
+      [
+        30,
+        null,
+        0,
+        0.0
+      ],
+      [
+        37.17181126173809,
+        7.62939453125e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580060.185514,
+        1656580060.185532,
+        {}
+      ]
+    ],
+    [
+      [
+        32,
+        null,
+        0,
+        0.0
+      ],
+      [
+        5052.653708552866,
+        6.9141387939453125e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580061.4077194,
+        1656580061.407737,
+        {}
+      ]
+    ],
+    [
+      [
+        35,
+        null,
+        0,
+        0.0
+      ],
+      [
+        499.63250389257433,
+        7.152557373046875e-06,
+        {
+          "__enum__": "StatusType.SUCCESS"
+        },
+        1656580062.6834888,
+        1656580062.6835065,
+        {}
+      ]
+    ]
+  ],
+  "config_origins": {
+    "1": "Sobol",
+    "2": "Sobol",
+    "6": "Sobol",
+    "8": "Sobol",
+    "9": "Sobol",
+    "11": "Local Search",
+    "13": "Local Search",
+    "14": "Local Search",
+    "17": "Local Search",
+    "18": "Local Search",
+    "19": "Local Search",
+    "20": "Local Search",
+    "22": "Local Search",
+    "23": "Local Search",
+    "25": "Local Search",
+    "26": "Local Search",
+    "30": "Local Search",
+    "32": "Local Search",
+    "35": "Local Search"
+  },
+  "configs": {
+    "1": {
+      "x0": 3.9592544734477997,
+      "x1": -3.1119782524183393
+    },
+    "2": {
+      "x0": 0.18435552716255188,
+      "x1": 6.955205500125885
+    },
+    "4": {
+      "x0": -3.0,
+      "x1": -4.0
+    },
+    "6": {
+      "x0": -3.3045367896556854,
+      "x1": 0.6294399499893188
+    },
+    "8": {
+      "x0": 7.916896343231201,
+      "x1": 3.2110410928726196
+    },
+    "9": {
+      "x0": 8.741189241409302,
+      "x1": -0.1468414068222046
+    },
+    "11": {
+      "x0": -0.7338459842998875,
+      "x1": -4.998981786821483
+    },
+    "13": {
+      "x0": -0.7350118937940833,
+      "x1": -4.997290887842019
+    },
+    "14": {
+      "x0": -4.996274385480495,
+      "x1": 9.99039132530636
+    },
+    "17": {
+      "x0": 2.50241005426406,
+      "x1": 2.9157397646922227
+    },
+    "18": {
+      "x0": 2.501623772758304,
+      "x1": 2.922697394910098
+    },
+    "19": {
+      "x0": 2.5033081137958684,
+      "x1": 2.9143368469972177
+    },
+    "20": {
+      "x0": -2.894621968919399,
+      "x1": 9.995817825477454
+    },
+    "22": {
+      "x0": -2.8858075002583674,
+      "x1": 9.994723488300119
+    },
+    "23": {
+      "x0": -0.9143679946679786,
+      "x1": 1.1961639660928434
+    },
+    "25": {
+      "x0": -0.9140660380949015,
+      "x1": 1.1959585915023512
+    },
+    "26": {
+      "x0": 1.5181997972097419,
+      "x1": 9.999723004029732
+    },
+    "30": {
+      "x0": 2.6077548954523264,
+      "x1": 7.388492179125169
+    },
+    "32": {
+      "x0": 2.277884729585529,
+      "x1": -1.918294499713205
+    },
+    "35": {
+      "x0": 0.8904953064181615,
+      "x1": 3.0282011475923767
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/test_files/example_run/scenario.txt b/tests/test_files/example_run/scenario.txt
new file mode 100644
index 000000000..9086d36c5
--- /dev/null
+++ b/tests/test_files/example_run/scenario.txt
@@ -0,0 +1,14 @@
+execdir = .
+deterministic = True
+run_obj = quality
+multi_objectives = cost
+overall_obj = par10
+save_results_instantly = True
+par_factor = 10
+cost_for_crash = 2147483647.0
+algo_runs_timelimit = inf
+wallclock_limit = inf
+always_race_default = False
+ta_run_limit = 20.0
+initial_incumbent = DEFAULT
+pcs_fn = psmac3-output_2022-06-30_11:07:32_274878/run_0/configspace.json
diff --git a/tests/test_files/example_run/stats.json b/tests/test_files/example_run/stats.json
new file mode 100644
index 000000000..7cc47ef5e
--- /dev/null
+++ b/tests/test_files/example_run/stats.json
@@ -0,0 +1 @@
+{"submitted_ta_runs": 20, "finished_ta_runs": 20, "n_configs": 35, "wallclock_time_used": 10.399263620376587, "ta_time_used": 0.000179290771484375, "inc_changed": 7, "_n_configs_per_intensify": 0, "_n_calls_of_intensify": 14, "_ema_n_configs_per_intensifiy": 0.0, "_EMA_ALPHA": 0.2}
\ No newline at end of file
diff --git a/tests/test_files/example_run/traj.json b/tests/test_files/example_run/traj.json
new file mode 100644
index 000000000..90e9fdac4
--- /dev/null
+++ b/tests/test_files/example_run/traj.json
@@ -0,0 +1,8 @@
+{"cpu_time": 0.0, "wallclock_time": 0.0006117820739746094, "evaluations": 0, "cost": 2147483648.0, "incumbent": {"x0": 3.9592544734477997, "x1": -3.1119782524183393}, "budget": 0, "origin": "Sobol"}
+{"cpu_time": 7.3909759521484375e-06, "wallclock_time": 0.0018901824951171875, "evaluations": 1, "cost": 35306.4275, "incumbent": {"x0": 3.9592544734477997, "x1": -3.1119782524183393}, "budget": 0, "origin": "Sobol"}
+{"cpu_time": 1.6689300537109375e-05, "wallclock_time": 0.004310131072998047, "evaluations": 2, "cost": 4790.9919, "incumbent": {"x0": 0.18435552716255188, "x1": 6.955205500125885}, "budget": 0, "origin": "Sobol"}
+{"cpu_time": 6.246566772460938e-05, "wallclock_time": 0.7310254573822021, "evaluations": 7, "cost": 3069.4098, "incumbent": {"x0": -0.7338459842998875, "x1": -4.998981786821483}, "budget": 0, "origin": "Local Search"}
+{"cpu_time": 9.226799011230469e-05, "wallclock_time": 3.2085988521575928, "evaluations": 10, "cost": 1122.0405, "incumbent": {"x0": 2.50241005426406, "x1": 2.9157397646922227}, "budget": 0, "origin": "Local Search"}
+{"cpu_time": 0.00010633468627929688, "wallclock_time": 3.2276248931884766, "evaluations": 11, "cost": 1114.7603, "incumbent": {"x0": 2.501623772758304, "x1": 2.922697394910098}, "budget": 0, "origin": "Local Search"}
+{"cpu_time": 0.00012087821960449219, "wallclock_time": 4.331298589706421, "evaluations": 13, "cost": 276.631, "incumbent": {"x0": -2.894621968919399, "x1": 9.995817825477454}, "budget": 0, "origin": "Local Search"}
+{"cpu_time": 0.0001423358917236328, "wallclock_time": 5.523259878158569, "evaluations": 15, "cost": 16.6317, "incumbent": {"x0": -0.9143679946679786, "x1": 1.1961639660928434}, "budget": 0, "origin": "Local Search"}
diff --git a/tests/test_files/example_run/traj_aclib2.json b/tests/test_files/example_run/traj_aclib2.json
new file mode 100644
index 000000000..f8bd47c30
--- /dev/null
+++ b/tests/test_files/example_run/traj_aclib2.json
@@ -0,0 +1,8 @@
+{"cpu_time": 0.0, "wallclock_time": 0.0006117820739746094, "evaluations": 0, "cost": 2147483648.0, "incumbent": ["x0='3.9592544734477997'", "x1='-3.1119782524183393'"], "origin": "Sobol"}
+{"cpu_time": 7.3909759521484375e-06, "wallclock_time": 0.0018901824951171875, "evaluations": 1, "cost": 35306.4275, "incumbent": ["x0='3.9592544734477997'", "x1='-3.1119782524183393'"], "origin": "Sobol"}
+{"cpu_time": 1.6689300537109375e-05, "wallclock_time": 0.004310131072998047, "evaluations": 2, "cost": 4790.9919, "incumbent": ["x0='0.18435552716255188'", "x1='6.955205500125885'"], "origin": "Sobol"}
+{"cpu_time": 6.246566772460938e-05, "wallclock_time": 0.7310254573822021, "evaluations": 7, "cost": 3069.4098, "incumbent": ["x0='-0.7338459842998875'", "x1='-4.998981786821483'"], "origin": "Local Search"}
+{"cpu_time": 9.226799011230469e-05, "wallclock_time": 3.2085988521575928, "evaluations": 10, "cost": 1122.0405, "incumbent": ["x0='2.50241005426406'", "x1='2.9157397646922227'"], "origin": "Local Search"}
+{"cpu_time": 0.00010633468627929688, "wallclock_time": 3.2276248931884766, "evaluations": 11, "cost": 1114.7603, "incumbent": ["x0='2.501623772758304'", "x1='2.922697394910098'"], "origin": "Local Search"}
+{"cpu_time": 0.00012087821960449219, "wallclock_time": 4.331298589706421, "evaluations": 13, "cost": 276.631, "incumbent": ["x0='-2.894621968919399'", "x1='9.995817825477454'"], "origin": "Local Search"}
+{"cpu_time": 0.0001423358917236328, "wallclock_time": 5.523259878158569, "evaluations": 15, "cost": 16.6317, "incumbent": ["x0='-0.9143679946679786'", "x1='1.1961639660928434'"], "origin": "Local Search"}
diff --git a/tests/test_files/example_run/traj_old.csv b/tests/test_files/example_run/traj_old.csv
new file mode 100644
index 000000000..e664f399f
--- /dev/null
+++ b/tests/test_files/example_run/traj_old.csv
@@ -0,0 +1,9 @@
+"CPU Time Used","Estimated Training Performance","Wallclock Time","Incumbent ID","Automatic Configurator (CPU) Time","Configuration..."
+0.000000, 2147483648.000000, 0.000612, 1, 0.000612, x0='3.9592544734477997',x1='-3.1119782524183393'
+0.000007, 35306.427500, 0.001890, 1, 0.001883, x0='3.9592544734477997',x1='-3.1119782524183393'
+0.000017, 4790.991900, 0.004310, 2, 0.004293, x0='0.18435552716255188',x1='6.955205500125885'
+0.000062, 3069.409800, 0.731025, 3, 0.730963, x0='-0.7338459842998875',x1='-4.998981786821483'
+0.000092, 1122.040500, 3.208599, 4, 3.208507, x0='2.50241005426406',x1='2.9157397646922227'
+0.000106, 1114.760300, 3.227625, 5, 3.227519, x0='2.501623772758304',x1='2.922697394910098'
+0.000121, 276.631000, 4.331299, 6, 4.331178, x0='-2.894621968919399',x1='9.995817825477454'
+0.000142, 16.631700, 5.523260, 7, 5.523118, x0='-0.9143679946679786',x1='1.1961639660928434'
diff --git a/tests/test_local_bo/__init__.py b/tests/test_local_bo/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_local_bo/test_abstract_subspace.py b/tests/test_local_bo/test_abstract_subspace.py
new file mode 100644
index 000000000..afb8d1117
--- /dev/null
+++ b/tests/test_local_bo/test_abstract_subspace.py
@@ -0,0 +1,470 @@
+import unittest
+
+import numpy as np
+from ConfigSpace import Configuration, ConfigurationSpace
+from ConfigSpace.forbidden import (
+    ForbiddenAndConjunction,
+    ForbiddenEqualsClause,
+    ForbiddenInClause,
+)
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    OrdinalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+from smac.epm.utils import check_subspace_points, get_types
+from smac.optimizer.subspaces import ChallengerListLocal, LocalSubspace
+
+
+def generate_cont_hps():
+    hp1 = UniformIntegerHyperparameter("non_log_uniform_int", lower=0, upper=100, log=False)
+    hp2 = UniformIntegerHyperparameter("log_uniform_int", lower=1, upper=100, log=True)
+
+    hp3 = UniformFloatHyperparameter("non_log_uniform_float", lower=0.0, upper=100.0, log=False)
+    hp4 = UniformFloatHyperparameter("log_uniform_float", lower=1.0, upper=100.0, log=True)
+    return [hp1, hp2, hp3, hp4]
+
+
+def generate_ord_hps():
+    hp1 = OrdinalHyperparameter("ord_hp_1", sequence=[1, 2, 3, 4, 5, 6, 7])
+    return [hp1]
+
+
+def generate_cat_hps(num_hps: int = 1):
+    hps = []
+    for i in range(num_hps):
+        hp = CategoricalHyperparameter(f"cat_hp_{i}", choices=["a", "b", "c", "d"])
+        hps.append(hp)
+    return hps
+
+
+def generate_ss_bounds(cs):
+    bounds_ss_cont = []
+    for hp in cs.get_hyperparameters():
+        if isinstance(hp, (UniformIntegerHyperparameter, UniformFloatHyperparameter)):
+            if hp.log:
+                bounds_ss_cont.append((0.1, 0.9))
+            else:
+                bounds_ss_cont.append((0.05, 0.95))
+        if isinstance(hp, OrdinalHyperparameter):
+            bounds_ss_cont.append((1, 4))
+    return bounds_ss_cont
+
+
+@unittest.mock.patch.multiple(LocalSubspace, __abstractmethods__=set())
+class TestAbstachSubSpace(unittest.TestCase):
+    def test_cs_subspace_1(self):
+        cs = ConfigurationSpace()
+        hps = generate_cont_hps()
+        hps.extend(generate_ord_hps())
+        hps.extend(generate_cat_hps())
+        cs.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs))
+
+        bounds_ss_cat = [(1, 3)]
+        subspace = LocalSubspace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+        )
+
+        for hp in subspace.cs_local.get_hyperparameters():
+            if isinstance(hp, CategoricalHyperparameter):
+                # for categorical hps, we set bound as 1, 3, i.e., the 2nd and 4th element should be selected for
+                # building a subspace
+                self.assertTrue(hp.choices == ("b", "d"))
+
+            elif isinstance(hp, OrdinalHyperparameter):
+                # for ordinary hps, we set bound as (1,3), i.e., we select the 2nd, 3rd, 4th values of
+                # the ordinary sequence
+                self.assertTrue(hp.sequence, (2, 3, 4))
+            elif isinstance(hp, UniformFloatHyperparameter):
+                if hp.log:
+                    raw_hp_range = (1, 100)
+                    # we map from [0., 1.] to [0.1, 0.5]
+                    raw_hp_range_log = np.log(raw_hp_range)
+                    new_hp_range_log_lower = 0.1 * (raw_hp_range_log[1] - raw_hp_range_log[0]) + raw_hp_range_log[0]
+                    new_hp_range_log_upper = 0.9 * (raw_hp_range_log[1] - raw_hp_range_log[0]) + raw_hp_range_log[0]
+
+                    new_range = np.exp([new_hp_range_log_lower, new_hp_range_log_upper])
+                    np.testing.assert_almost_equal([hp.lower, hp.upper], new_range)
+                else:
+                    raw_hp_range = (0, 100)
+                    new_hp_range_lower = 0.05 * (raw_hp_range[1] - raw_hp_range[0]) + raw_hp_range[0]
+                    new_hp_range_upper = 0.95 * (raw_hp_range[1] - raw_hp_range[0]) + raw_hp_range[0]
+                    new_range = [new_hp_range_lower, new_hp_range_upper]
+
+                    np.testing.assert_almost_equal([hp.lower, hp.upper], new_range)
+            elif isinstance(hp, UniformIntegerHyperparameter):
+                if hp.log:
+                    raw_hp_range = (1, 100)
+                    raw_hp_range_log = np.log(raw_hp_range)
+                    new_hp_range_log_lower = 0.1 * (raw_hp_range_log[1] - raw_hp_range_log[0]) + raw_hp_range_log[0]
+                    new_hp_range_log_upper = 0.9 * (raw_hp_range_log[1] - raw_hp_range_log[0]) + raw_hp_range_log[0]
+                    new_range[0] = np.floor(np.exp(new_hp_range_log_lower))
+                    new_range[1] = np.ceil(np.exp(new_hp_range_log_upper))
+                    new_range = np.asarray(new_range, dtype=np.int32)
+                    np.testing.assert_equal([hp.lower, hp.upper], new_range)
+                else:
+                    raw_hp_range = (0, 100)
+                    new_hp_range_lower = 0.05 * (raw_hp_range[1] - raw_hp_range[0]) + raw_hp_range[0]
+                    new_hp_range_upper = 0.95 * (raw_hp_range[1] - raw_hp_range[0]) + raw_hp_range[0]
+                    new_range[0] = np.floor(new_hp_range_lower)
+                    new_range[1] = np.ceil(new_hp_range_upper)
+                    new_range = np.asarray(new_range, dtype=np.int32)
+                    np.testing.assert_equal([hp.lower, hp.upper], new_range)
+
+    def test_cs_subspace_2(self):
+        # check act_dims
+        cs = ConfigurationSpace()
+        hps = generate_cont_hps()
+        hps.extend(generate_ord_hps())
+        hps.extend(generate_cat_hps(2))
+
+        cs.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs))
+
+        bounds_ss_cont = np.array(bounds_ss_cont)
+
+        bounds_ss_cat = [(1, 3), (0, 2)]
+
+        activ_dims = [0, 2, 6]
+
+        subspace = LocalSubspace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+            activate_dims=activ_dims,
+        )
+        np.testing.assert_equal(subspace.activate_dims_cat, [0])
+        np.testing.assert_equal(subspace.activate_dims_cont, [0, 4])
+
+        hps_global = cs.get_hyperparameters()
+        hps_local = subspace.cs_local.get_hyperparameters()
+        for dim_idx, act_dim in enumerate(activ_dims):
+            self.assertTrue(hps_local[dim_idx].__class__ == hps_global[act_dim].__class__)
+
+    def test_cs_subspace_3(self):
+        # check if bounds is None works correctly
+        # check act_dims
+        cs = ConfigurationSpace()
+        hps = generate_cont_hps()
+        hps.extend(generate_ord_hps())
+        hps.extend(generate_cat_hps(2))
+
+        cs.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs))
+
+        bounds_ss_cont = np.array(bounds_ss_cont)
+        bounds_ss_cat = [(1, 3), (0, 2)]
+        hps_global = cs.get_hyperparameters()
+
+        subspace = LocalSubspace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=None,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+        )
+        hps_local = subspace.cs_local.get_hyperparameters()
+        for hp_local, hp_global in zip(hps_global, hps_local):
+            if isinstance(hp_local, CategoricalHyperparameter):
+                self.assertTrue(hp_local != hp_global)
+            else:
+                self.assertTrue(hp_local == hp_global)
+
+        subspace = LocalSubspace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=None,
+            model_local=None,
+        )
+        hps_local = subspace.cs_local.get_hyperparameters()
+        for hp_local, hp_global in zip(hps_global, hps_local):
+            if isinstance(hp_local, CategoricalHyperparameter):
+                self.assertTrue(hp_local == hp_global)
+            else:
+                self.assertTrue(hp_local != hp_global)
+
+        subspace = LocalSubspace(
+            config_space=cs, bounds=bounds, hps_types=types, bounds_ss_cont=None, bounds_ss_cat=None, model_local=None
+        )
+        hps_local = subspace.cs_local.get_hyperparameters()
+        for hp_local, hp_global in zip(hps_global, hps_local):
+            self.assertTrue(hp_local == hp_global)
+
+    def test_ss_normalization(self):
+        cs_global = ConfigurationSpace(1)
+        hps = generate_cont_hps()
+        hps.extend(generate_cat_hps(1))
+        cs_global.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs_global)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs_global))
+
+        bounds_ss_cat = [(1, 3)]
+
+        subspace = LocalSubspace(
+            config_space=cs_global,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+        )
+
+        cs_local = subspace.cs_local
+        samples_global = cs_global.sample_configuration(20)
+        X_samples = np.array([sample.get_array() for sample in samples_global])
+        X_normalized = subspace.normalize_input(X_samples)
+
+        ss_indices = check_subspace_points(
+            X=X_normalized,
+            cont_dims=subspace.activate_dims_cont,
+            cat_dims=subspace.activate_dims_cat,
+            bounds_cont=subspace.bounds_ss_cont,
+            bounds_cat=subspace.bounds_ss_cat,
+        )
+
+        ss_indices = np.where(ss_indices)[0]
+        for ss_idx in ss_indices:
+            x_normalized = X_normalized[ss_idx]
+
+            sample_local = Configuration(cs_local, vector=x_normalized).get_dictionary()
+            sample_global = samples_global[ss_idx].get_dictionary()
+            for key in sample_local.keys():
+                if "int" in key:
+                    # There is some numerical issues here for int hps
+                    self.assertLess(sample_local[key] - sample_global[key], 3)
+                else:
+                    self.assertAlmostEqual(sample_local[key], sample_global[key])
+
+    def test_add_new_observations(self):
+        cs_global = ConfigurationSpace(1)
+        hps = generate_cont_hps()
+        hps.extend(generate_cat_hps(1))
+        cs_global.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs_global)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs_global))
+
+        bounds_ss_cat = [(1, 3)]
+
+        subspace = LocalSubspace(
+            config_space=cs_global,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+        )
+
+        samples_global = cs_global.sample_configuration(20)
+        X_samples = np.array([sample.get_array() for sample in samples_global])
+        y_samples = np.ones(np.shape(X_samples)[0])
+
+        ss_indices = check_subspace_points(
+            X=X_samples,
+            cont_dims=subspace.activate_dims_cont,
+            cat_dims=subspace.activate_dims_cat,
+            bounds_cont=subspace.bounds_ss_cont,
+            bounds_cat=subspace.bounds_ss_cat,
+        )
+
+        subspace.add_new_observations(X_samples, y_samples)
+        self.assertEqual(sum(ss_indices), len(subspace.ss_x))
+        self.assertEqual(sum(ss_indices), len(subspace.ss_y))
+
+        self.assertEqual(len(X_samples), len(subspace.model_x))
+        self.assertEqual(len(y_samples), len(subspace.model_y))
+
+        # test if initialization works
+        subspace_1 = LocalSubspace(
+            config_space=cs_global,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+            initial_data=(X_samples, y_samples),
+        )
+
+        np.testing.assert_allclose(subspace.ss_x, subspace_1.ss_x)
+        np.testing.assert_allclose(subspace.ss_y, subspace_1.ss_y)
+        np.testing.assert_allclose(subspace.model_x, subspace_1.model_x)
+        np.testing.assert_allclose(subspace.model_y, subspace_1.model_y)
+
+
+@unittest.mock.patch.multiple(LocalSubspace, __abstractmethods__=set())
+class TestChallengerListLocal(unittest.TestCase):
+    def test_challenger_list_local_full(self):
+        # check act_dims
+        cs_global = ConfigurationSpace(1)
+        hps = generate_cont_hps()
+        hps.extend(generate_ord_hps())
+        hps.extend(generate_cat_hps(2))
+
+        cs_global.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs_global)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs_global))
+
+        bounds_ss_cont = np.array(bounds_ss_cont)
+
+        bounds_ss_cat = [(1, 3), (0, 2)]
+
+        subspace = LocalSubspace(
+            config_space=cs_global,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+        )
+        cs_local = subspace.cs_local
+
+        num_data = 10
+
+        rs = np.random.RandomState(1)
+
+        challengers = cs_local.sample_configuration(num_data)
+        challengers = [(rs.rand(), challenger) for challenger in challengers]
+
+        cl = ChallengerListLocal(cs_local, cs_global, challengers, config_origin="test", incumbent_array=None)
+
+        self.assertEqual(len(cl), num_data)
+        new_challenger = next(cl).get_dictionary()
+        challenger_local = challengers[0][1].get_dictionary()
+
+        for key in new_challenger.keys():
+            if "int" in key:
+                # There is some numerical issues here for int hps
+                self.assertLess(new_challenger[key] - challenger_local[key], 3)
+            else:
+                self.assertAlmostEqual(new_challenger[key], challenger_local[key])
+
+        self.assertEqual(next(cl).origin, "test")
+
+    def test_challenger_list_local_reduced(self):
+        # check act_dims
+        cs_global = ConfigurationSpace(1)
+        hps = generate_cont_hps()
+        hps.extend(generate_ord_hps())
+        hps.extend(generate_cat_hps(2))
+
+        cs_global.add_hyperparameters(hps)
+
+        types, bounds = get_types(cs_global)
+        bounds_ss_cont = np.array(generate_ss_bounds(cs_global))
+
+        bounds_ss_cont = np.array(bounds_ss_cont)
+
+        bounds_ss_cat = [(1, 3), (0, 2)]
+
+        activ_dims = [0, 2, 6]
+
+        subspace = LocalSubspace(
+            config_space=cs_global,
+            bounds=bounds,
+            hps_types=types,
+            bounds_ss_cont=bounds_ss_cont,
+            bounds_ss_cat=bounds_ss_cat,
+            model_local=None,
+            activate_dims=activ_dims,
+        )
+
+        cs_local = subspace.cs_local
+
+        incumbent_array = cs_global.sample_configuration(1).get_array()
+
+        num_data = 10
+
+        rs = np.random.RandomState(1)
+
+        challengers = cs_local.sample_configuration(num_data)
+        challengers = [(rs.rand(), challenger) for challenger in challengers]
+
+        cl = ChallengerListLocal(
+            cs_local, cs_global, challengers, config_origin="test", incumbent_array=incumbent_array
+        )
+
+        new_challenger = next(cl)
+
+        for i, hp in enumerate((cs_global.get_hyperparameters())):
+            if i not in activ_dims:
+                self.assertAlmostEqual(incumbent_array[i], new_challenger.get_array()[i])
+            else:
+                self.assertTrue(new_challenger.get_dictionary()[hp.name] == challengers[0][1].get_dictionary()[hp.name])
+
+    def test_exception(self):
+        cs_local = ConfigurationSpace(1)
+        hps = generate_cont_hps()
+        cs_local.add_hyperparameters(hps)
+
+        cs_global = ConfigurationSpace(1)
+        hps.extend(generate_cat_hps())
+        cs_global.add_hyperparameters(hps)
+
+        challengers = cs_local.sample_configuration(5)
+        challengers = [(0.0, challenger) for challenger in challengers]
+        self.assertRaisesRegex(
+            ValueError,
+            "Incumbent array must be provided if the global configuration space has more "
+            "hyperparameters then the local configuration space",
+            ChallengerListLocal,
+            cs_local,
+            cs_global,
+            challengers,
+            "test",
+        )
+
+    def test_add_forbidden_ss(self):
+        f0 = UniformFloatHyperparameter("f0", 0.0, 100.0)
+        c0 = CategoricalHyperparameter("c0", [0, 1, 2])
+        o0 = OrdinalHyperparameter("o0", [1, 2, 3])
+
+        i0 = UniformIntegerHyperparameter("i0", 0, 100)
+
+        forbid_1 = ForbiddenEqualsClause(c0, 0)
+        forbid_2 = ForbiddenInClause(o0, [1, 2])
+
+        forbid_3 = ForbiddenEqualsClause(f0, 0.3)
+        forbid_4 = ForbiddenEqualsClause(f0, 59.0)
+
+        forbid_5 = ForbiddenAndConjunction(forbid_2, forbid_3)
+        forbid_6 = ForbiddenAndConjunction(forbid_1, forbid_4)
+        forbid_7 = ForbiddenEqualsClause(i0, 10)
+
+        cs_local = ConfigurationSpace()
+        f0_ss = UniformFloatHyperparameter("f0", 0.0, 50.0)
+        c0_ss = CategoricalHyperparameter("c0", [0, 1, 2])
+        o0_ss = OrdinalHyperparameter("o0", [1, 2, 3])
+        cs_local.add_hyperparameters([f0_ss, c0_ss, o0_ss])
+
+        self.assertIsNotNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_1))
+        self.assertIsNotNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_2))
+
+        self.assertIsNotNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_3))
+        self.assertIsNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_4))
+
+        self.assertIsNotNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_5))
+        self.assertIsNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_6))
+
+        self.assertIsNone(LocalSubspace.fit_forbidden_to_ss(cs_local, forbid_7))
diff --git a/tests/test_local_bo/test_epm_chooser_boing.py b/tests/test_local_bo/test_epm_chooser_boing.py
new file mode 100644
index 000000000..f33547e07
--- /dev/null
+++ b/tests/test_local_bo/test_epm_chooser_boing.py
@@ -0,0 +1,283 @@
+import unittest
+
+import numpy as np
+import torch
+from ConfigSpace import (
+    CategoricalHyperparameter,
+    ConfigurationSpace,
+    UniformFloatHyperparameter,
+)
+from gpytorch.constraints.constraints import Interval
+from gpytorch.kernels import MaternKernel, ScaleKernel
+from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
+from gpytorch.priors import HorseshoePrior, LogNormalPrior
+
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import check_subspace_points, get_types
+from smac.facade.smac_bb_facade import SMAC4BB
+from smac.facade.smac_hpo_facade import SMAC4HPO
+from smac.optimizer.configuration_chooser.boing_chooser import (
+    BOinGChooser,
+    subspace_extraction,
+)
+from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory2epm_boing import RunHistory2EPM4ScaledLogCostWithRaw
+from smac.scenario.scenario import Scenario
+from smac.tae import StatusType
+from smac.utils import test_helpers
+
+
+class TestEPMChooserBOinG(unittest.TestCase):
+    def setUp(self):
+        self.scenario = Scenario(
+            {"cs": test_helpers.get_branin_config_space(), "run_obj": "quality", "output_dir": "data-test_epmchooser"}
+        )
+        self.output_dirs = []
+        self.output_dirs.append(self.scenario.output_dir)
+
+        exp_kernel = MaternKernel(
+            2.5,
+            lengthscale_constraint=Interval(
+                torch.tensor(np.exp(-6.754111155189306).repeat(2)),
+                torch.tensor(np.exp(0.0858637988771976).repeat(2)),
+                transform=None,
+                initial_value=1.0,
+            ),
+            ard_num_dims=2,
+            active_dims=(0, 1),
+        ).double()
+
+        noise_prior = HorseshoePrior(0.1)
+        likelihood = GaussianLikelihood(
+            noise_prior=noise_prior, noise_constraint=Interval(np.exp(-25), np.exp(2), transform=None)
+        ).double()
+
+        kernel = ScaleKernel(
+            exp_kernel,
+            outputscale_constraint=Interval(np.exp(-10.0), np.exp(2.0), transform=None, initial_value=2.0),
+            outputscale_prior=LogNormalPrior(0.0, 1.0),
+        )
+
+        self.model_kwargs = dict(kernel=kernel, likelihood=likelihood)
+
+    def test_init(self):
+        seed = 42
+        config = self.scenario.cs.sample_configuration()
+        rh = RunHistory()
+        rh.add(config, 10, 10, StatusType.SUCCESS)
+
+        epm_chooser_kwargs = {
+            "model_local": GloballyAugmentedLocalGaussianProcess,
+            "model_local_kwargs": self.model_kwargs,
+        }
+
+        smbo_kwargs = {"epm_chooser": BOinGChooser, "epm_chooser_kwargs": epm_chooser_kwargs}
+
+        self.assertRaisesRegex(
+            ValueError,
+            "BOinG only supports RandomForestWithInstances as its global optimizer",
+            SMAC4BB,
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        )
+        self.assertRaisesRegex(
+            ValueError,
+            "BOinG only supports RunHistory2EPM4CostWithRaw as its rh transformer",
+            SMAC4HPO,
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+        )
+
+        epm_chooser = SMAC4HPO(
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        ).solver.epm_chooser
+        self.assertFalse(hasattr(epm_chooser, "turbo_optimizer"))
+
+        epm_chooser_kwargs.update({"do_switching": True})
+        epm_chooser = SMAC4HPO(
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        ).solver.epm_chooser
+        self.assertTrue(hasattr(epm_chooser, "turbo_optimizer"))
+
+    def test_choose_next(self):
+        seed = 42
+        config = self.scenario.cs.sample_configuration()
+        rh = RunHistory()
+        rh.add(config, 10, 10, StatusType.SUCCESS)
+
+        epm_chooser_kwargs = {
+            "model_local": GloballyAugmentedLocalGaussianProcess,
+            "model_local_kwargs": self.model_kwargs,
+        }
+
+        smbo_kwargs = {"epm_chooser": BOinGChooser, "epm_chooser_kwargs": epm_chooser_kwargs}
+
+        epm_chooser = SMAC4HPO(
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        ).solver.epm_chooser
+        x = next(epm_chooser.choose_next())
+        # when number of points is not large enough for building a subspace, GP works locally
+        self.assertEqual(x.origin, "Local Search")
+        for i in range(15):
+            config = self.scenario.cs.sample_configuration()
+            rh.add(config, 10, 10, StatusType.SUCCESS)
+
+        x = next(epm_chooser.choose_next())
+        # when number of points is already large enough for building a subspace, BOinG takes over
+        self.assertEqual(x.origin, "BOinG")
+
+        epm_chooser_kwargs.update({"do_switching": True})
+        epm_chooser = SMAC4HPO(
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        ).solver.epm_chooser
+        epm_chooser.run_TuRBO = True
+        x = next(epm_chooser.choose_next())
+        self.assertEqual(x.origin, "TuRBO")
+
+    def test_do_switching(self):
+        seed = 42
+
+        config = self.scenario.cs.sample_configuration()
+        rh = RunHistory()
+        rh.add(config, 10, 10, StatusType.SUCCESS)
+
+        epm_chooser_kwargs = {
+            "model_local": GloballyAugmentedLocalGaussianProcess,
+            "model_local_kwargs": self.model_kwargs,
+            "do_switching": True,
+        }
+        turbo_kwargs = {"failure_tol_min": 1, "length_min": 0.6}
+        epm_chooser_kwargs.update({"turbo_kwargs": turbo_kwargs})
+
+        smbo_kwargs = {"epm_chooser": BOinGChooser, "epm_chooser_kwargs": epm_chooser_kwargs}
+
+        epm_chooser = SMAC4HPO(
+            scenario=self.scenario,
+            rng=seed,
+            runhistory=rh,
+            smbo_kwargs=smbo_kwargs,
+            runhistory2epm=RunHistory2EPM4ScaledLogCostWithRaw,
+        ).solver.epm_chooser
+
+        for i in range(15):
+            config = self.scenario.cs.sample_configuration()
+            rh.add(config, 10, 10, StatusType.SUCCESS)
+        config = self.scenario.cs.sample_configuration()
+        # ensure config is the incumbent
+        rh.add(config, 9.99, 10, StatusType.SUCCESS)
+        next(epm_chooser.choose_next())
+
+        # init an optimal config
+        np.testing.assert_allclose(config.get_array(), epm_chooser.optimal_config)
+        self.assertAlmostEqual(9.99, epm_chooser.optimal_value)
+        self.assertEqual(0, epm_chooser.failcount_BOinG)
+
+        epm_chooser.failcount_BOinG = 19
+        # in this case, prob_to_TurBO becomes 1
+        with unittest.mock.patch("smac.optimizer.configuration_chooser.boing_chooser.BOinGChooser."
+                                 "restart_TuRBOinG") as mk:
+            next(epm_chooser.choose_next())
+            self.assertTrue(epm_chooser.run_TuRBO)
+            self.assertTrue(mk.called)
+
+        # switch to TuRBO
+        for i in range(1000):
+            next(epm_chooser.choose_next())
+            if not epm_chooser.run_TuRBO:
+                break
+        # TuRBO will be replaced with BOinG if it cannot find a better value conintuously
+        self.assertLess(i, 999)
+
+        epm_chooser.failcount_BOinG = 19
+        next(epm_chooser.choose_next())
+
+        config = self.scenario.cs.sample_configuration()
+        rh.add(config, 9.5, 10, StatusType.SUCCESS)
+        epm_chooser.turbo_optimizer.init_configs = []
+
+        for i in range(10):
+            next(epm_chooser.choose_next())
+            if not epm_chooser.run_TuRBO:
+                break
+        # one time success and two times failure totally 3 times evaluations and in this case we have i==2
+        self.assertEqual(i, 2)
+
+
+class TestSubSpaceExtraction(unittest.TestCase):
+    def test_subspace_extraction(self):
+        cs = ConfigurationSpace(0)
+        cs.add_hyperparameter(UniformFloatHyperparameter("x0", 0.0, 1.0))
+        cs.add_hyperparameter(CategoricalHyperparameter("x1", [0, 1, 2, 3, 4, 5]))
+
+        types, bounds = get_types(cs)
+        rf = RandomForestWithInstances(
+            cs,
+            types=types,
+            bounds=bounds,
+            seed=0,
+            num_trees=10,
+            ratio_features=1.0,
+            min_samples_split=2,
+            min_samples_leaf=1,
+        )
+
+        X = np.array([[0.0, 0], [0.2, 1], [0.3, 2], [0.7, 5], [0.6, 3]])
+
+        Y = np.array([0.1, 0.2, 0.7, 0.6, 0.5])
+
+        X_inc = np.array([0.4, 3])
+        rf.train(X, Y)
+
+        ss_extraction_kwargs = dict(X=X, challenger=X_inc, model=rf, bounds=bounds, cat_dims=[1], cont_dims=[0])
+
+        num_min = 2
+        num_max = 5
+
+        ss_bounds_cont, ss_bounds_cat, ss_indices = subspace_extraction(
+            num_min=num_min, num_max=np.inf, **ss_extraction_kwargs
+        )
+        self.assertTrue(num_min <= sum(ss_indices))
+        x_in_ss = check_subspace_points(X_inc, [0], [1], ss_bounds_cont, ss_bounds_cat)
+        self.assertTrue(x_in_ss[0])
+        ss_indices_re_exam = check_subspace_points(X, [0], [1], ss_bounds_cont, ss_bounds_cat)
+        self.assertEqual(sum(ss_indices), sum(ss_indices_re_exam))
+
+        ss_bounds_cont, ss_bounds_cat, ss_indices = subspace_extraction(
+            num_min=num_min, num_max=num_max, **ss_extraction_kwargs
+        )
+        self.assertTrue(num_min <= sum(ss_indices) <= num_max)
+        x_in_ss = check_subspace_points(X_inc, [0], [1], ss_bounds_cont, ss_bounds_cat)
+        self.assertTrue(x_in_ss[0])
+        ss_indices_re_exam = check_subspace_points(X, [0], [1], ss_bounds_cont, ss_bounds_cat)
+        self.assertEqual(sum(ss_indices), sum(ss_indices_re_exam))
+
+        num_max = 3
+        ss_bounds_cont, ss_bounds_cat, ss_indices = subspace_extraction(
+            num_min=num_min, num_max=num_max, **ss_extraction_kwargs
+        )
+        self.assertTrue(num_min <= sum(ss_indices) <= num_max)
+        self.assertTrue(x_in_ss[0])
+        ss_indices_re_exam = check_subspace_points(X, [0], [1], ss_bounds_cont, ss_bounds_cat)
+        self.assertEqual(sum(ss_indices), sum(ss_indices_re_exam))
diff --git a/tests/test_local_bo/test_epm_chooser_turbo.py b/tests/test_local_bo/test_epm_chooser_turbo.py
new file mode 100644
index 000000000..11994fb9e
--- /dev/null
+++ b/tests/test_local_bo/test_epm_chooser_turbo.py
@@ -0,0 +1,41 @@
+import unittest
+
+import numpy as np
+
+from smac.facade.smac_bb_facade import SMAC4BB
+from smac.optimizer.configuration_chooser.turbo_chooser import TurBOChooser
+from smac.runhistory.runhistory import RunHistory
+from smac.scenario.scenario import Scenario
+from smac.tae import StatusType
+from smac.utils import test_helpers
+
+
+class TestEPMChooserTuRBO(unittest.TestCase):
+    def setUp(self):
+        self.scenario = Scenario(
+            {"cs": test_helpers.get_branin_config_space(), "run_obj": "quality", "output_dir": "data-test_epmchooser"}
+        )
+        self.output_dirs = []
+        self.output_dirs.append(self.scenario.output_dir)
+
+    def test_choose_next(self):
+        config = self.scenario.cs.sample_configuration()
+        rh = RunHistory()
+        rh.add(config, 10, 10, StatusType.SUCCESS)
+        smbo = SMAC4BB(
+            scenario=self.scenario,
+            rng=np.random.RandomState(42),
+            model_type="gp",
+            smbo_kwargs={"epm_chooser": TurBOChooser},
+            initial_design_kwargs={"init_budget": 0},
+            runhistory=rh,
+        ).solver
+
+        x = next(smbo.epm_chooser.choose_next()).get_array()
+        self.assertEqual(x.shape, (2,))
+
+        # remove the init configs
+        smbo.epm_chooser.turbo.init_configs = []
+        x = next(smbo.epm_chooser.choose_next()).get_array()
+
+        self.assertEqual(x.shape, (2,))
diff --git a/tests/test_local_bo/test_rh2epm_boing.py b/tests/test_local_bo/test_rh2epm_boing.py
new file mode 100644
index 000000000..8c7959e79
--- /dev/null
+++ b/tests/test_local_bo/test_rh2epm_boing.py
@@ -0,0 +1,62 @@
+import numpy as np
+
+from smac.runhistory.runhistory2epm import (
+    RunHistory2EPM4Cost,
+    RunHistory2EPM4LogScaledCost,
+)
+from smac.runhistory.runhistory2epm_boing import (
+    RunHistory2EPM4CostWithRaw,
+    RunHistory2EPM4ScaledLogCostWithRaw,
+)
+from smac.tae import StatusType
+
+from tests.test_runhistory.test_runhistory2epm import RunhistoryTest
+
+
+class TestRH2EPMBOinG(RunhistoryTest):
+    def test_cost_without_imputation(self):
+        rh2epm_kwargs = dict(
+            num_params=2,
+            success_states=[StatusType.SUCCESS, StatusType.CRASHED, StatusType.MEMOUT],
+            impute_censored_data=False,
+            scenario=self.scen,
+        )
+        rh2epm = RunHistory2EPM4Cost(**rh2epm_kwargs)
+        rh2epm_log = RunHistory2EPM4LogScaledCost(**rh2epm_kwargs)
+
+        rh2epm_with_raw = RunHistory2EPM4CostWithRaw(**rh2epm_kwargs)
+
+        rh2epm_log_with_raw = RunHistory2EPM4ScaledLogCostWithRaw(**rh2epm_kwargs)
+
+        self.rh.add(
+            config=self.config1,
+            cost=1,
+            time=1,
+            status=StatusType.SUCCESS,
+            instance_id=23,
+            seed=None,
+            additional_info=None,
+        )
+
+        # rh2epm should use cost and not time field later
+        self.rh.add(
+            config=self.config3,
+            cost=200,
+            time=20,
+            status=StatusType.SUCCESS,
+            instance_id=1,
+            seed=None,
+            additional_info=None,
+        )
+
+        _, y = rh2epm.transform(self.rh)
+        _, y_log = rh2epm_log.transform(self.rh)
+        _, y_raw_transformed, y_raw_ = rh2epm_with_raw.transform_with_raw(self.rh)
+        _, y_log_transformed, y_log_raw = rh2epm_log_with_raw.transform_with_raw(self.rh)
+        # all are the raw runhistory values
+
+        raw_values = [y_raw_transformed, y_raw_, y_log_raw]
+        for raw_value in raw_values:
+            np.testing.assert_array_equal(raw_value, y)
+
+        np.testing.assert_equal(y_log, y_log_transformed)
diff --git a/tests/test_local_bo/test_subspace_boing.py b/tests/test_local_bo/test_subspace_boing.py
new file mode 100644
index 000000000..f8de2926c
--- /dev/null
+++ b/tests/test_local_bo/test_subspace_boing.py
@@ -0,0 +1,68 @@
+import unittest
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter
+from gpytorch.kernels import MaternKernel, ScaleKernel
+
+from smac.epm.gaussian_process.augmented import GloballyAugmentedLocalGaussianProcess
+from smac.epm.utils import get_types
+from smac.optimizer.acquisition import EI
+from smac.optimizer.acquisition.maximizer import LocalAndSortedRandomSearch
+from smac.optimizer.subspaces import ChallengerListLocal
+from smac.optimizer.subspaces.boing_subspace import BOinGSubspace
+
+
+def generate_data(num_data, rs: np.random.RandomState):
+    x = rs.rand(num_data, 1)
+    y = rs.rand(num_data)
+    return x, y
+
+
+class TestBOinGSubspace(unittest.TestCase):
+    def setUp(self) -> None:
+        self.cs = ConfigurationSpace()
+        self.cs.add_hyperparameter(UniformFloatHyperparameter("x0", 0, 1, 0.5))
+
+        self.model_local = GloballyAugmentedLocalGaussianProcess
+        cont_dims = [0]
+        exp_kernel = MaternKernel(2.5, ard_num_dims=1, active_dims=tuple(cont_dims)).double()
+
+        kernel = ScaleKernel(exp_kernel)
+        self.model_local_kwargs = {"kernel": kernel}
+        self.types, self.bounds = get_types(self.cs)
+        self.acq_local = EI
+        self.ss_kwargs = dict(
+            config_space=self.cs,
+            bounds=self.bounds,
+            hps_types=self.types,
+            model_local=self.model_local,
+            model_local_kwargs=self.model_local_kwargs,
+        )
+
+    def test_init(self):
+        boing_ss_1 = BOinGSubspace(**self.ss_kwargs)
+        self.assertEqual(boing_ss_1.model.num_inducing_points, 2)
+        self.assertIsInstance(boing_ss_1.acq_optimizer_local, LocalAndSortedRandomSearch)
+        self.assertEqual(boing_ss_1.acq_optimizer_local.n_sls_iterations, 10)
+        self.assertEqual(boing_ss_1.acq_optimizer_local.local_search.n_steps_plateau_walk, 5)
+
+        acq_optimiozer = LocalAndSortedRandomSearch(
+            acquisition_function=None, config_space=self.cs, n_steps_plateau_walk=10, n_sls_iterations=10
+        )
+
+        boing_ss_2 = BOinGSubspace(**self.ss_kwargs, acq_optimizer_local=acq_optimiozer)
+        self.assertEqual(boing_ss_2.acq_optimizer_local.n_sls_iterations, 10)
+        self.assertEqual(boing_ss_2.acq_optimizer_local.local_search.n_steps_plateau_walk, 10)
+
+    def test_generate_challangers(self):
+        rs = np.random.RandomState(1)
+        init_data = generate_data(10, rs)
+        boing_ss = BOinGSubspace(**self.ss_kwargs, initial_data=init_data)
+        challenge = boing_ss.generate_challengers()
+        self.assertIsInstance(challenge, ChallengerListLocal)
+        eval_next = next(challenge)
+        acq_value_challenge = boing_ss.acquisition_function([eval_next])
+        acq_value_init_points = boing_ss.acquisition_function._compute(init_data[0])
+        for acq_init in acq_value_init_points:
+            self.assertLess(acq_init, acq_value_challenge)
diff --git a/tests/test_local_bo/test_turbo_subspace.py b/tests/test_local_bo/test_turbo_subspace.py
new file mode 100644
index 000000000..de057d40c
--- /dev/null
+++ b/tests/test_local_bo/test_turbo_subspace.py
@@ -0,0 +1,185 @@
+import copy
+import unittest
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+from ConfigSpace.conditions import GreaterThanCondition
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+from smac.epm.gaussian_process import GaussianProcess
+from smac.epm.gaussian_process.kernels import ConstantKernel, Matern, WhiteKernel
+from smac.epm.utils import get_types
+from smac.optimizer.acquisition import TS
+from smac.optimizer.subspaces.turbo_subspace import TuRBOSubSpace
+
+
+class TestTurBoSubspace(unittest.TestCase):
+    def setUp(self) -> None:
+        self.cs = ConfigurationSpace()
+        self.cs.add_hyperparameter(UniformFloatHyperparameter("x0", 0, 1, 0.5))
+        self.model_local = GaussianProcess
+        exp_kernel = Matern(nu=2.5)
+        cov_amp = ConstantKernel(
+            2.0,
+        )
+        noise_kernel = WhiteKernel(1e-8)
+        kernel = cov_amp * exp_kernel + noise_kernel
+        self.types, self.bounds = get_types(self.cs)
+        self.model_local_kwargs = {"kernel": kernel}
+        self.acq_local = TS
+        self.ss_kwargs = dict(
+            config_space=self.cs,
+            bounds=self.bounds,
+            hps_types=self.types,
+            model_local=self.model_local,
+            model_local_kwargs=self.model_local_kwargs,
+        )
+
+    def test_init(self):
+        ss = TuRBOSubSpace(**self.ss_kwargs)
+        self.assertEqual(len(ss.init_configs), ss.n_init)
+        ss_init_configs = copy.deepcopy(ss.init_configs)
+        self.assertEqual(ss.num_valid_observations, 0)
+
+        # init configurations are poped
+        for i in reversed(range(len(ss_init_configs))):
+            eval_next = next(ss.generate_challengers())
+            self.assertEqual(eval_next, ss_init_configs[i])
+
+        cs_mix = ConfigurationSpace()
+        cs_mix.add_hyperparameter(UniformFloatHyperparameter("x0", 0, 1, 0.5))
+        cs_mix.add_hyperparameter(CategoricalHyperparameter("x1", [0, 1, 2]))
+
+        self.assertRaisesRegex(
+            ValueError,
+            "Current TurBO Optimizer only supports Numerical Hyperparameters",
+            TuRBOSubSpace,
+            config_space=cs_mix,
+            bounds=None,
+            hps_types=None,
+            model_local=None,
+        )
+
+        x0 = UniformFloatHyperparameter("x0", 0, 1, 0.5)
+        x1 = UniformFloatHyperparameter("x1", 0, 1, 0.5)
+
+        cs_condition = ConfigurationSpace()
+        cs_condition.add_hyperparameters([x0, x1])
+
+        cs_condition.add_condition(GreaterThanCondition(x0, x1, 0.5))
+        self.assertRaisesRegex(
+            ValueError,
+            "Currently TurBO does not support Conditional or Forbidden Hyperparameters",
+            TuRBOSubSpace,
+            config_space=cs_condition,
+            bounds=None,
+            hps_types=None,
+            model_local=None,
+        )
+
+    def test_adjust_length(self):
+        ss = TuRBOSubSpace(**self.ss_kwargs)
+        ss.add_new_observations(np.array([0.5]), np.array([0.5]))
+
+        self.assertEqual(ss.num_valid_observations, 1)
+
+        success_tol = ss.success_tol
+        failure_tol = ss.failure_tol
+        length = ss.length
+
+        for i in range(success_tol):
+            ss.adjust_length(0.3 - i * 0.01)
+        self.assertGreater(ss.length, length)
+
+        # make sure that length cannot be greater than length_max
+        for i in range(100):
+            ss.adjust_length(0.3 - i * 0.01)
+        self.assertLessEqual(ss.length, ss.length_max)
+
+        length = ss.length
+        for i in range(failure_tol):
+            ss.adjust_length(0.5 + i * 0.01)
+        self.assertLessEqual(ss.length, length / 2)
+
+    @unittest.mock.patch.object(GaussianProcess, "predict")
+    def test_restart(self, rf_mock):
+        ss = TuRBOSubSpace(**self.ss_kwargs)
+        ss.add_new_observations(np.array([0.5]), np.array([0.5]))
+        ss.init_configs = []
+
+        ss.length = 0.0
+        challenge = ss.generate_challengers()
+
+        self.assertEqual(ss.length, ss.length_init)
+        self.assertGreater(len(ss.init_configs), 0)
+
+        eval_next = next(challenge)
+        self.assertTrue(eval_next.origin == "TuRBO")
+        self.assertEqual(rf_mock.call_count, 0)
+
+    def test_perturb_samples(self):
+        ss = TuRBOSubSpace(**self.ss_kwargs, incumbent_array=np.array([2.0]))
+
+        prob = 0.0
+        perturb_sample = ss._perturb_samples(prob, np.random.rand(ss.n_candidates, ss.n_dims))
+        # make sure that no new suggestion is replaced by the incumbent
+        self.assertEqual(len(np.where(perturb_sample == 2.0)[0]), 0)
+
+        prob = 1.0
+        perturb_sample = ss._perturb_samples(prob, np.random.rand(ss.n_candidates, ss.n_dims))
+        self.assertEqual(len(np.where(perturb_sample == 2.0)[0]), 0)
+
+        cs = ConfigurationSpace()
+        cs.add_hyperparameter(UniformFloatHyperparameter("x0", 0, 1, 0.5))
+        cs.add_hyperparameter(UniformFloatHyperparameter("x1", 0, 1, 0.5))
+        model_local = GaussianProcess
+        exp_kernel = Matern(nu=2.5)
+        cov_amp = ConstantKernel(
+            2.0,
+        )
+        noise_kernel = WhiteKernel(1e-8)
+        kernel = cov_amp * exp_kernel + noise_kernel
+        types, bounds = get_types(cs)
+        model_local_kwargs = {"kernel": kernel}
+
+        ss = TuRBOSubSpace(
+            config_space=cs,
+            bounds=bounds,
+            hps_types=types,
+            model_local=model_local,
+            model_local_kwargs=model_local_kwargs,
+            incumbent_array=np.array([2.0, 2.0]),
+        )
+
+        prob = 0.0
+        perturb_sample = ss._perturb_samples(prob, np.random.rand(ss.n_candidates, ss.n_dims))
+
+        idx_from_incumbent = np.transpose(perturb_sample == 2.0)
+        self.assertTrue(np.all(np.sum(idx_from_incumbent, axis=1)) < 2)
+
+        prob = 1.0
+        perturb_sample = ss._perturb_samples(prob, np.random.rand(ss.n_candidates, ss.n_dims))
+
+        idx_from_incumbent = np.transpose(perturb_sample == 2.0)
+        self.assertEqual(len(np.where(perturb_sample == 2.0)[0]), 0)
+
+    def test_suggestion(self):
+        num_init_points = 5
+        ss = TuRBOSubSpace(**self.ss_kwargs, incumbent_array=np.array([0.5]))
+        ss.length = 0.1
+        ss.init_configs = []
+        new_data_x = np.vstack([np.random.rand(num_init_points, 1), np.array([[0.5]])])
+        new_data_y = np.vstack([np.random.rand(num_init_points, 1), np.array([[-0.1]])])
+        ss.add_new_observations(new_data_x, new_data_y)
+        challengers = ss._generate_challengers()
+
+        challenger_arrays = np.asarray([challenger[1].get_array() for challenger in challengers])
+        # suggestions are constrained
+        self.assertTrue(np.all(0.4 < challenger_arrays) and np.all(challenger_arrays < 0.6))
+
+        challengers = ss._generate_challengers(_sorted=False)
+        challenger_acq_values = np.asarray([challenger[0] for challenger in challengers])
+        np.testing.assert_equal(0.0, challenger_acq_values)
diff --git a/tests/test_multi_objective/test_schaffer.py b/tests/test_multi_objective/test_schaffer.py
index cffa8fddd..40facd513 100644
--- a/tests/test_multi_objective/test_schaffer.py
+++ b/tests/test_multi_objective/test_schaffer.py
@@ -12,7 +12,7 @@
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.facade.smac_bb_facade import SMAC4BB
 from smac.facade.smac_hpo_facade import SMAC4HPO
-from smac.optimizer.multi_objective.parego import ParEGO
+from smac.multi_objective.parego import ParEGO
 from smac.scenario.scenario import Scenario
 
 MIN_V = -2
@@ -42,9 +42,13 @@ def get_optimum():
 
 def plot(all_x):
     plt.figure()
+
     for x in all_x:
         f1, f2 = schaffer(x)
-        plt.scatter(f1, f2, c="blue", alpha=0.1)
+        plt.scatter(f1, f2, c="blue", alpha=0.2, zorder=3000)
+
+    plt.vlines([1], 0, 4, linestyles="dashed", colors=["red"])
+    plt.hlines([1], 0, 4, linestyles="dashed", colors=["red"])
 
     plt.show()
 
@@ -73,7 +77,7 @@ def setUp(self):
         self.scenario = Scenario(
             {
                 "run_obj": "quality",  # we optimize quality (alternatively runtime)
-                "runcount-limit": 20,  # max. number of function evaluations
+                "runcount-limit": 25,  # max. number of function evaluations
                 "cs": self.cs,  # configuration space
                 "deterministic": True,
                 "multi_objectives": "metric1, metric2",
@@ -104,11 +108,12 @@ def test_facades(self):
 
                 f1_inc, f2_inc = schaffer(incumbent["x"])
                 f1_opt, f2_opt = get_optimum()
+
                 inc = f1_inc + f2_inc
                 opt = f1_opt + f2_opt
                 diff = abs(inc - opt)
 
-                assert diff < 0.1
+                assert diff < 0.5
                 results.append(smac)
 
         return results
diff --git a/tests/test_runhistory/test_rfr_imputor.py b/tests/test_runhistory/test_rfr_imputor.py
index 5071dc57c..02429a62e 100644
--- a/tests/test_runhistory/test_rfr_imputor.py
+++ b/tests/test_runhistory/test_rfr_imputor.py
@@ -10,9 +10,9 @@
     UniformIntegerHyperparameter,
 )
 
-from smac.epm import rfr_imputator
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.util_funcs import get_types
+from smac.epm.random_forest import rfr_imputator
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.utils import get_types
 from smac.runhistory import runhistory, runhistory2epm
 from smac.scenario import scenario
 from smac.tae import StatusType
diff --git a/tests/test_runhistory/test_runhistory2epm.py b/tests/test_runhistory/test_runhistory2epm.py
index b5fe00be9..c2a88aa83 100644
--- a/tests/test_runhistory/test_runhistory2epm.py
+++ b/tests/test_runhistory/test_runhistory2epm.py
@@ -4,9 +4,9 @@
 from ConfigSpace import Configuration, ConfigurationSpace
 from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
 
-from smac.epm.rf_with_instances import RandomForestWithInstances
-from smac.epm.rfr_imputator import RFRImputator
-from smac.epm.util_funcs import get_types
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rfr_imputator import RFRImputator
+from smac.epm.utils import get_types
 from smac.runhistory import runhistory, runhistory2epm
 from smac.scenario.scenario import Scenario
 from smac.tae import StatusType
diff --git a/tests/test_runhistory/test_runhistory_multi_objective.py b/tests/test_runhistory/test_runhistory_multi_objective.py
index e21c5f2bc..3ef3a84a7 100644
--- a/tests/test_runhistory/test_runhistory_multi_objective.py
+++ b/tests/test_runhistory/test_runhistory_multi_objective.py
@@ -148,8 +148,6 @@ def test_full(self):
             status=StatusType.SUCCESS,
         )
 
-        print(rh._cost_per_config)
-
         # Only one value: Normalization goes to 1.0
         self.assertEqual(rh.get_cost(config1), 1.0)
 
@@ -550,8 +548,8 @@ def test_instances(self):
         self.assertEqual(rh.objective_bounds[1], (10, 30))
 
         # Average cost returns us the cost of the latest budget
-        self.assertEqual(rh.average_cost(config1), 0.375)
-        self.assertEqual(rh.average_cost(config2), 0.75)
+        self.assertEqual(rh.get_cost(config1), 0.375)
+        self.assertEqual(rh.get_cost(config2), 0.75)
 
     def test_budgets(self):
         rh = RunHistory()
@@ -582,8 +580,8 @@ def test_budgets(self):
         # SMAC does not overwrite by default
         rh.add(
             config=config1,
-            cost=[50, 100],
-            time=10,
+            cost=[502342352, 23425234],
+            time=11,
             status=StatusType.SUCCESS,
             instance_id=1,
             seed=1,
@@ -604,9 +602,13 @@ def test_budgets(self):
         self.assertEqual(rh.objective_bounds[1], (50, 150))
 
         # Average cost returns us the cost of the latest budget
-        self.assertEqual(rh.average_cost(config1), 0.75)
-        self.assertEqual(rh.average_cost(config2), 0.5)
+        self.assertEqual(rh.get_cost(config1), 0.75)
+        self.assertEqual(rh.average_cost(config1), [40.0, 100.0])
+
+        self.assertEqual(rh.get_cost(config2), 0.5)
+        self.assertEqual(rh.average_cost(config2), [0, 150])
 
 
 if __name__ == "__main__":
     t = RunhistoryMultiObjectiveTest()
+    t.test_budgets()
diff --git a/tests/test_smbo/test_ei_optimization.py b/tests/test_smbo/test_ei_optimization.py
index 3fe03077a..42467579f 100644
--- a/tests/test_smbo/test_ei_optimization.py
+++ b/tests/test_smbo/test_ei_optimization.py
@@ -14,7 +14,7 @@
 
 from smac.configspace import ConfigurationSpace, pcs
 from smac.optimizer.acquisition import EI
-from smac.optimizer.ei_optimization import (
+from smac.optimizer.acquisition.maximizer import (
     LocalAndSortedPriorRandomSearch,
     LocalSearch,
     RandomSearch,
diff --git a/tests/test_smbo/test_epm_configuration_chooser.py b/tests/test_smbo/test_epm_configuration_chooser.py
index 9e6d70ac6..80fed8532 100644
--- a/tests/test_smbo/test_epm_configuration_chooser.py
+++ b/tests/test_smbo/test_epm_configuration_chooser.py
@@ -5,7 +5,7 @@
 import numpy as np
 from ConfigSpace import Configuration
 
-from smac.epm.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.runhistory.runhistory import RunHistory
 from smac.scenario.scenario import Scenario
diff --git a/tests/test_smbo/test_random_configuration_chooser.py b/tests/test_smbo/test_random_configuration_chooser.py
index b68d014ff..17d32e088 100644
--- a/tests/test_smbo/test_random_configuration_chooser.py
+++ b/tests/test_smbo/test_random_configuration_chooser.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from smac.optimizer.random_configuration_chooser import (
+from smac.optimizer.configuration_chooser.random_chooser import (
     ChooserLinearCoolDown,
     ChooserNoCoolDown,
     ChooserProb,
diff --git a/tests/test_smbo/test_smbo.py b/tests/test_smbo/test_smbo.py
index 57f12fc89..f7ca0d4d9 100644
--- a/tests/test_smbo/test_smbo.py
+++ b/tests/test_smbo/test_smbo.py
@@ -9,7 +9,7 @@
 import smac.facade.smac_ac_facade
 from smac.callbacks import IncorporateRunResultCallback
 from smac.configspace import ConfigurationSpace
-from smac.epm.rf_with_instances import RandomForestWithInstances
+from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.facade.smac_hpo_facade import SMAC4HPO
 from smac.intensification.abstract_racer import RunInfoIntent
diff --git a/tests/test_utils/io/test_result_merging.py b/tests/test_utils/io/test_result_merging.py
new file mode 100644
index 000000000..981843c61
--- /dev/null
+++ b/tests/test_utils/io/test_result_merging.py
@@ -0,0 +1,48 @@
+import json
+import logging
+import os
+import tempfile
+import unittest.mock
+import json
+from unittest.mock import patch
+
+from smac.configspace import (
+    CategoricalHyperparameter,
+    Configuration,
+    ConfigurationSpace,
+    Constant,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+from smac.scenario.scenario import Scenario
+from smac.stats.stats import Stats
+from smac.utils.io.traj_logging import TrajEntry, TrajLogger
+
+from smac.utils.io.result_merging import ResultMerger
+
+__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
+__license__ = "3-clause BSD"
+
+
+class ResultMergerTest(unittest.TestCase):
+    def setUp(self) -> None:
+        base_directory = os.path.split(__file__)[0]
+        base_directory = os.path.abspath(os.path.join(base_directory, "../../tests", ".."))
+        os.chdir(base_directory)
+
+    def test_init_valueerror(self):
+        with self.assertRaises(ValueError):
+            rm = ResultMerger()
+
+    def test_merge(self):
+        print(os.getcwd())
+        outdir = "test_files/example_run"
+        rundirs = [outdir] * 3
+        rm = ResultMerger(rundirs=rundirs)
+        rh = rm.get_runhistory()
+        traj = rm.get_trajectory()
+        traj_fn = os.path.join(outdir, "traj.json")
+        with open(traj_fn, "r") as file:
+            lines = file.readlines()
+        traj_from_file = [json.loads(line) for line in lines]
+        self.assertEqual(len(traj_from_file), len(traj))
diff --git a/tests/test_utils/test_multi_objective.py b/tests/test_utils/test_multi_objective.py
index 069901970..6edc7d4fa 100644
--- a/tests/test_utils/test_multi_objective.py
+++ b/tests/test_utils/test_multi_objective.py
@@ -1,8 +1,10 @@
 import unittest
+from multiprocessing.sharedctypes import Value
 
 import numpy as np
+import pytest
 
-from smac.utils.multi_objective import normalize_costs
+from smac.multi_objective.utils import normalize_costs
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
 __license__ = "3-clause BSD"
@@ -10,61 +12,34 @@
 
 class MultiObjectiveTest(unittest.TestCase):
     def setUp(self):
-        self.bounds_1d = [(0, 1)]
-        self.bounds_2d = [(0, 1), (50, 100)]
+        self.bounds = [(0, 50), (50, 100)]
+        self.bounds_invalid = [(0, 0), (5, 5)]
 
     def test_normalize_costs(self):
-        # Normalize between 0..1 given data only
-        v = np.array([[5, 2], [10, 0]])
+        # If no bounds are passed, we get the same result back
+        v = [5, 2]
         nv = normalize_costs(v)
-        self.assertEqual(list(nv.flatten()), list(np.array([[0, 1], [1, 0]]).flatten()))
+        self.assertEqual(nv, [5, 2])
 
         # Normalize between 0..1 given data only
-        v = np.array([[5, 75], [0.5, 50], [0.75, 60], [0, 100]])
-        nv = normalize_costs(v, self.bounds_2d)
-
-        self.assertEqual(
-            list(nv.flatten()),
-            list(np.array([[5, 0.5], [0.5, 0], [0.75, 0.2], [0, 1]]).flatten()),
-        )
-
-        # No normalization
-        v = np.array([[5, 2]])
-        nv = normalize_costs(v)
-        self.assertEqual(list(nv.flatten()), list(np.array([[1.0, 1.0]]).flatten()))
-
-        # Normalization with given bounds
-        v = np.array([[500, 150]])
-        nv = normalize_costs(v, self.bounds_2d)
-        self.assertEqual(list(nv.flatten()), list(np.array([[500, 2.0]]).flatten()))
-
-        # Test one-dimensional list
-        v = [500, 150]
-        nv = normalize_costs(v, self.bounds_1d)
-        self.assertEqual(list(nv.flatten()), list(np.array([[500], [150]]).flatten()))
-
-        # Test one-dimensional array without bounds
-        v = np.array([500, 150])
-        nv = normalize_costs(v)
-        self.assertEqual(list(nv.flatten()), list(np.array([[1.0], [0.0]]).flatten()))
-
-        # Test one-dimensional array without bounds
-        v = np.array([1000, 200, 400, 800, 600, 0])
-        nv = normalize_costs(v)
-        self.assertEqual(
-            list(nv.flatten()),
-            list(np.array([[1], [0.2], [0.4], [0.8], [0.6], [0.0]]).flatten()),
-        )
-
-        # Test one-dimensional array with one objective
-        v = np.array([500])
-        nv = normalize_costs(v, self.bounds_1d)
-        self.assertEqual(list(nv.flatten()), list(np.array([[500.0]]).flatten()))
-
-        # Test one-dimensional list with one objective
-        v = [500]
-        nv = normalize_costs(v, self.bounds_1d)
-        self.assertEqual(list(nv.flatten()), list(np.array([[500.0]]).flatten()))
+        v = [25, 50]
+        nv = normalize_costs(v, self.bounds)
+        self.assertEqual(nv, [0.5, 0])
+
+        # Invalid bounds
+        v = [25, 50]
+        nv = normalize_costs(v, self.bounds_invalid)
+        self.assertEqual(nv, [1, 1])
+
+        # Invalid input
+        v = [[25], [50]]
+        with pytest.raises(AssertionError):
+            nv = normalize_costs(v, self.bounds)
+
+        # Wrong shape
+        v = [25, 50, 75]
+        with pytest.raises(ValueError):
+            nv = normalize_costs(v, self.bounds)
 
 
 if __name__ == "__main__":