Merge pull request #670 from automl/development

Release 0.12.3
automl · Jul 31, 2020 · 9890e0c · 9890e0c
2 parents d5b9381 + 87716d8
commit 9890e0c
Show file tree

Hide file tree

Showing 14 changed files with 67 additions and 33 deletions.
diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,16 @@
+# 0.12.3
+
+## Minor Changes
+
+* Use Scipy's Sobol sequence for the initial design instead of a 3rd-party package (#600)
+* Store start and end time of function evaluation (#647)
+
+## Bug Fixes
+
+* Fixes an issue in the Bayesian optimization facade which triggered an exception when tuning categorical 
+  hyperparameters (#666)
+* Fixes an issue in the Gaussian process MCMC which resulted in reduced execution speed and reduced performance (#666)
+
 # 0.12.2
 
 ## Bug Fixes

diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,5 @@ pynisher>=0.4.1
 ConfigSpace>=0.4.9,<0.5
 scikit-learn>=0.22.0
 pyrfr>=0.8.0
-sobol_seq
 joblib
 lazy_import
diff --git a/smac/__init__.py b/smac/__init__.py
@@ -5,7 +5,7 @@
 import lazy_import
 from smac.utils import dependencies
 
-__version__ = '0.12.2'
+__version__ = '0.12.3'
 __author__ = 'Marius Lindauer, Matthias Feurer, Katharina Eggensperger, Joshua Marben, André Biedenkapp, Aaron Klein,'\
     'Stefan Falkner and Frank Hutter'
 

diff --git a/smac/epm/gaussian_process_mcmc.py b/smac/epm/gaussian_process_mcmc.py
@@ -184,7 +184,7 @@ def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool = True) -> 'Gau
                     self.p0, _, _ = sampler.run_mcmc(self.p0, self.chain_length)
 
                 # Take the last samples from each walker
-                self.hypers = sampler.get_chain()[:, -1]
+                self.hypers = sampler.get_chain()[-1]
             elif self.mcmc_sampler == 'nuts':
                 # Originally published as:
                 # http://www.stat.columbia.edu/~gelman/research/published/nuts.pdf

diff --git a/smac/facade/smac_bo_facade.py b/smac/facade/smac_bo_facade.py
@@ -104,7 +104,7 @@ def __init__(self, model_type: str = 'gp_mcmc', **kwargs: typing.Any):
                     operate_on=cat_dims,
                 )
 
-            assert len(cont_dims + len(cat_dims)) == len(scenario.cs.get_hyperparameters())
+            assert (len(cont_dims) + len(cat_dims)) == len(scenario.cs.get_hyperparameters())
 
             noise_kernel = WhiteKernel(
                 noise_level=1e-8,

diff --git a/smac/initial_design/sobol_design.py b/smac/initial_design/sobol_design.py
@@ -1,6 +1,6 @@
 import typing
 
-import sobol_seq
+from scipy.optimize._shgo_lib.sobol_seq import Sobol
 
 from ConfigSpace.configuration_space import Configuration
 from ConfigSpace.hyperparameters import Constant
@@ -39,7 +39,8 @@ def _select_configurations(self) -> typing.List[Configuration]:
             if isinstance(p, Constant):
                 constants += 1
 
-        sobol = sobol_seq.i4_sobol_generate(len(params) - constants, self.init_budget)
+        sobol_gen = Sobol()
+        sobol = sobol_gen.i4_sobol_generate(len(params) - constants, self.init_budget)
 
         return self._transform_continuous_designs(design=sobol,
                                                   origin='Sobol',

diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py
@@ -41,7 +41,7 @@ def __new__(
 
 
 RunValue = collections.namedtuple(
-    'RunValue', ['cost', 'time', 'status', 'additional_info'])
+    'RunValue', ['cost', 'time', 'status', 'starttime', 'endtime', 'additional_info'])
 
 
 class EnumEncoder(json.JSONEncoder):
@@ -169,6 +169,8 @@ def add(
         instance_id: typing.Optional[str] = None,
         seed: typing.Optional[int] = None,
         budget: float = 0.0,
+        starttime: float = 0.0,
+        endtime: float = 0.0,
         additional_info: typing.Optional[typing.Dict] = None,
         origin: DataOrigin = DataOrigin.INTERNAL,
     ) -> None:
@@ -192,6 +194,10 @@ def add(
                 Random seed used by TA (default: None)
             budget: float
                 budget (cutoff) used in intensifier to limit TA (default: 0)
+            starttime: float
+                starting timestamp of TA evaluation
+            endtime: float
+                ending timestamp of TA evaluation
             additional_info: dict
                 Additional run infos (could include further returned
                 information from TA or fields such as start time and host_id)
@@ -218,7 +224,7 @@ def add(
 
         # Construct keys and values for the data dictionary
         k = RunKey(config_id, instance_id, seed, budget)
-        v = RunValue(cost, time, status, additional_info)
+        v = RunValue(cost, time, status, starttime, endtime, additional_info)
 
         # Each runkey is supposed to be used only once. Repeated tries to add
         # the same runkey will be ignored silently if not capped.
@@ -487,7 +493,9 @@ def load_json(self, fn: str, cs: ConfigurationSpace) -> None:
                      instance_id=k[1],
                      seed=int(k[2]),
                      budget=float(k[3]) if len(k) == 4 else 0,
-                     additional_info=v[3])
+                     starttime=v[3],
+                     endtime=v[4],
+                     additional_info=v[5])
 
     def update_from_json(
         self,
@@ -533,10 +541,10 @@ def update(
         #  the ID
         for key, value in runhistory.data.items():
             config_id, instance_id, seed, budget = key
-            cost, time, status, additional_info = value
+            cost, time, status, start, end, additional_info = value
             config = runhistory.ids_config[config_id]
             self.add(config=config, cost=cost, time=time,
-                     status=status, instance_id=instance_id,
+                     status=status, instance_id=instance_id, starttime=start, endtime=end,
                      seed=seed, budget=budget, additional_info=additional_info,
                      origin=origin)
 

diff --git a/smac/tae/execute_ta_run.py b/smac/tae/execute_ta_run.py
@@ -1,5 +1,6 @@
 import logging
 import math
+import time
 from enum import Enum
 import typing
 
@@ -114,7 +115,7 @@ def __init__(
             run objective of SMAC
         par_factor: int
             penalization factor
-        crash_cost : float
+        cost_for_crash : float
             cost that is used in case of crashed runs (including runs
             that returned NaN or inf)
         abort_on_first_run_crash: bool
@@ -192,12 +193,15 @@ def start(
                              "(run objective), a cutoff time is required, "
                              "but not given to this call.")
 
+        start = time.time()
         status, cost, runtime, additional_info = self.run(config=config,
                                                           instance=instance,
                                                           cutoff=cutoff,
                                                           seed=seed,
                                                           budget=budget,
                                                           instance_specific=instance_specific)
+        end = time.time()
+
         if budget == 0 and status == StatusType.DONOTADVANCE:
             raise ValueError("Cannot handle DONOTADVANCE state when using intensify or SH/HB on "
                              "instances.")
@@ -250,7 +254,7 @@ def start(
             self.runhistory.add(config=config,
                                 cost=cost, time=runtime, status=status,
                                 instance_id=instance, seed=seed,
-                                budget=budget,
+                                budget=budget, starttime=start, endtime=end,
                                 additional_info=additional_info)
             self.stats.n_configs = len(self.runhistory.config_ids)
 

diff --git a/smac/utils/validate.py b/smac/utils/validate.py
@@ -454,13 +454,14 @@ def _get_runs(self,
                     # Add runs to runhistory
                     for c in configs_evaluated[:]:
                         runkey = RunKey(runhistory.config_ids[c], i, seed)
-                        cost, time, status, additional_info = runhistory.data[runkey]
+                        cost, time, status, start, end, additional_info = runhistory.data[runkey]
                         if status in [StatusType.CRASHED, StatusType.ABORT, StatusType.CAPPED]:
                             # Not properly executed target algorithm runs should be repeated
                             configs_evaluated.remove(c)
                             continue
                         new_rh.add(c, cost, time, status, instance_id=i,
-                                   seed=seed, additional_info=additional_info)
+                                   seed=seed, starttime=start, endtime=end,
+                                   additional_info=additional_info)
                         runs_from_rh += 1
                 else:
                     # If no runhistory or no entries for instance, get new seed

diff --git a/test/test_cli/test_deterministic_smac.py b/test/test_cli/test_deterministic_smac.py
@@ -32,6 +32,11 @@ def tearDown(self):
                 shutil.rmtree(output_dir, ignore_errors=True)
         os.chdir(self.current_dir)
 
+    def ignore_timestamps(self, rh):
+        for i, (k, val) in enumerate(rh['data']):
+            rh['data'][i][1] = [v for j, v in enumerate(val) if j not in [3, 4]]  # 3, 4 are start and end timestamps
+        return rh
+
     @unittest.mock.patch("smac.optimizer.ei_optimization.get_one_exchange_neighbourhood")
     def test_deterministic(self, patch):
         """
@@ -71,8 +76,10 @@ def test_deterministic(self, patch):
         h1 = json.load(open(self.output_dir_1 + '/run_1/runhistory.json'))
         h2 = json.load(open(self.output_dir_2 + '/run_1/runhistory.json'))
         h3 = json.load(open(self.output_dir_3 + '/run_2/runhistory.json'))
-        self.assertEqual(h1, h2)
-        self.assertNotEqual(h1, h3)
+        self.assertEqual(self.ignore_timestamps(h1), self.ignore_timestamps(h2))
+        # As h1 is changed inplace in the line above we need to reload it
+        h1 = json.load(open(self.output_dir_1 + '/run_1/runhistory.json'))
+        self.assertNotEqual(self.ignore_timestamps(h1), self.ignore_timestamps(h3))
 
     def test_modes(self):
         """

diff --git a/test/test_epm/test_gp_mcmc.py b/test/test_epm/test_gp_mcmc.py
@@ -87,6 +87,7 @@ def test_gp_train(self):
         model = get_gp(10, rs)
         np.testing.assert_array_almost_equal(model.kernel.theta, fixture)
         model.train(X[:10], Y[:10])
+        self.assertEqual(len(model.models), 36)
 
         for base_model in model.models:
             theta = base_model.gp.kernel.theta
@@ -185,7 +186,7 @@ def test_gp_on_sklearn_data(self):
         model = get_gp(X.shape[1], rs, noise=1e-10, normalize_y=True)
         cv = sklearn.model_selection.KFold(shuffle=True, random_state=rs, n_splits=2)
 
-        maes = [7.8774231707672667164, 8.645038495119097796]
+        maes = [6.841565457149357281, 7.4943401900804902144]
 
         for i, (train_split, test_split) in enumerate(cv.split(X, y)):
             X_train = X[train_split]

diff --git a/test/test_initial_design/test_sobol_design.py b/test/test_initial_design/test_sobol_design.py
@@ -31,7 +31,7 @@ def test_sobol(self):
         cs.add_hyperparameter(UniformFloatHyperparameter('x41', 0, 1))
         with self.assertRaisesRegex(
                 Exception,
-                "('NoneType' object is not iterable)|(cannot unpack non-iterable NoneType object)",
+                "list index out of range",
         ):
             SobolDesign(
                 cs=cs,

diff --git a/test/test_runhistory/test_runhistory.py b/test/test_runhistory/test_runhistory.py
@@ -35,12 +35,12 @@ def test_add_and_pickle(self):
 
         rh.add(config=config, cost=10, time=20,
                status=StatusType.SUCCESS, instance_id=None,
-               seed=None,
+               seed=None, starttime=100, endtime=120,
                additional_info=None)
 
         rh.add(config=config, cost=10, time=20,
                status=StatusType.SUCCESS, instance_id=1,
-               seed=12354,
+               seed=12354, starttime=10, endtime=30,
                additional_info={"start_time": 10})
 
         self.assertFalse(rh.empty())

diff --git a/test/test_smbo/test_pSMAC.py b/test/test_smbo/test_pSMAC.py
@@ -28,17 +28,17 @@ def _remove_tmp_dir(self):
     def test_write(self):
         # The nulls make sure that we correctly emit the python None value
         fixture = '{"data": [[[1, "branin", 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]], ' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                   '[[1, "branini", 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]], ' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                   '[[2, "branini", 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]], ' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                   '[[2, null, 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]], ' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                   '[[3, "branin-hoo", 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]], ' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                   '[[4, null, 1, 0.0], [1, 1, {"__enum__": ' \
-                  '"StatusType.SUCCESS"}, null]]],' \
+                  '"StatusType.SUCCESS"}, 0.0, 0.0, null]]],' \
                   '"config_origins": {},' \
                   '"configs": {' \
                   '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
@@ -97,17 +97,17 @@ def test_load(self):
         configuration_space = test_helpers.get_branin_config_space()
 
         other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                            '[[1, "branin", 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                            '[[3, "branin-hoo", 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                            '[[2, null, 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                            '[[1, "branini", 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]], ' \
                            '[[4, null, 1], [1, 1,' \
-                           '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \
+                           '{"__enum__": "StatusType.SUCCESS"}, 0.0, 0.0, null]]], ' \
                            '"configs": {' \
                            '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                            '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \