Merge pull request #169 from automl/development

Development
automl · Feb 11, 2017 · 6274c3e · 6274c3e
2 parents 9e871f6 + b14c471
commit 6274c3e
Show file tree

Hide file tree

Showing 10 changed files with 157 additions and 51 deletions.
diff --git a/.landscape.yml b/.landscape.yml
@@ -0,0 +1,12 @@
+doc-warnings: true
+test-warnings: false
+strictness: veryhigh
+max-line-length: 100
+pep8:
+  full: true
+ignore-paths: 
+    - ci_scripts
+ignore-patterns:
+    - (^|/)docs(/|$)
+python-targets:
+    - 3
diff --git a/.travis.yml b/.travis.yml
@@ -7,6 +7,8 @@ matrix:
     env: PYTHON_VERSION="3.4" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh"
   - os: linux
     env: PYTHON_VERSION="3.5" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh"
+  - os: linux
+    env: PYTHON_VERSION="3.6" COVERAGE="true" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh"
 
   # Set language to generic to not break travis-ci
   # https://github.com/travis-ci/travis-ci/issues/2312#issuecomment-195620855
@@ -20,6 +22,10 @@ matrix:
     sudo: required
     language: generic
     env: PYTHON_VERSION="3.5" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
+  - os: osx
+    sudo: required
+    language: generic
+    env: PYTHON_VERSION="3.6" MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh"
 
 cache:
   directories:

diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,12 @@
+# 0.2.4
+
+* CI only check code quality for python3
+* Perform local search on configurations from previous runs as proposed in the
+  original paper from 2011 instead of random configurations as implemented
+  before
+* CI run travis-ci unit tests with python3.6
+* FIX #167, remove an endless loop which occured when using pSMAC
+
 # 0.2.3
 
 * MAINT refactor Intensifcation and adding unit tests

diff --git a/smac/__version__.py b/smac/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.2.3"
+__version__ = "0.2.4"
diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py
@@ -99,17 +99,21 @@ def add(self, config, cost, time,
 
         k = RunKey(config_id, instance_id, seed)
         v = RunValue(cost, time, status, additional_info)
-        self.data[k] = v
 
-        if not external_data:
-            # also add to fast data structure
-            is_k = InstSeedKey(instance_id, seed)
-            self._configid_to_inst_seed[
-                config_id] = self._configid_to_inst_seed.get(config_id, [])
-            self._configid_to_inst_seed[config_id].append(is_k)
+        # Each runkey is supposed to be used only once. Repeated tries to add
+        # the same runkey will be ignored silently.
+        if self.data.get(k) is None:
+            self.data[k] = v
 
-        # assumes an average across runs as cost function
-        self.incremental_update_cost(config, cost)
+            if not external_data:
+                # also add to fast data structure
+                is_k = InstSeedKey(instance_id, seed)
+                self._configid_to_inst_seed[
+                    config_id] = self._configid_to_inst_seed.get(config_id, [])
+                self._configid_to_inst_seed[config_id].append(is_k)
+
+            # assumes an average across runs as cost function
+            self.incremental_update_cost(config, cost)
 
     def update_cost(self, config):
         '''
@@ -191,11 +195,17 @@ def get_runs_for_config(self, config):
             list: tuples of instance, seed
         """
         config_id = self.config_ids.get(config)
-        is_list = self._configid_to_inst_seed.get(config_id)
-        if is_list is None:
-            return []
-        else:
-            return is_list
+        return self._configid_to_inst_seed.get(config_id, [])
+
+    def get_all_configs(self):
+        """ Return all configurations in this RunHistory object
+
+        Returns
+        -------
+            list: parameter configurations
+
+        """
+        return list(self.config_ids.keys())
 
     def empty(self):
         """

diff --git a/smac/smbo/smbo.py b/smac/smbo/smbo.py
@@ -204,13 +204,14 @@ def choose_next(self, X, Y,
             else:
                 num_configurations_by_local_search = 10
 
-        # initial SLS by incumbent +
-        # best configuration from next_configs_by_random_search_sorted
+        # initiate local search with best configurations from previous runs
+        configs_previous_runs = self.runhistory.get_all_configs()
+        configs_previous_runs_sorted = self._sort_configs_by_acq_value(configs_previous_runs)
+        num_configs_local_search = min(len(configs_previous_runs_sorted), num_configurations_by_local_search)
         next_configs_by_local_search = \
             self._get_next_by_local_search(
-                [self.incumbent] +
                 list(map(lambda x: x[1],
-                         next_configs_by_random_search_sorted[:num_configurations_by_local_search - 1])))
+                         configs_previous_runs_sorted[:num_configs_local_search])))
 
         next_configs_by_acq_value = next_configs_by_random_search_sorted + \
             next_configs_by_local_search
@@ -223,7 +224,7 @@ def choose_next(self, X, Y,
         # Remove dummy acquisition function value
         next_configs_by_random_search = [x[1] for x in
                                          self._get_next_by_random_search(
-                                             num_points=num_configurations_by_local_search + num_configurations_by_random_search_sorted)]
+                                             num_points=num_configs_local_search + num_configurations_by_random_search_sorted)]
 
         challengers = list(itertools.chain(*zip(next_configs_by_acq_value,
                                                 next_configs_by_random_search)))
@@ -251,26 +252,9 @@ def _get_next_by_random_search(self, num_points=1000, _sorted=False):
         else:
             rand_configs = [self.config_space.sample_configuration(size=1)]
         if _sorted:
-            imputed_rand_configs = map(ConfigSpace.util.impute_inactive_values,
-                                       rand_configs)
-            imputed_rand_configs = [x.get_array()
-                                    for x in imputed_rand_configs]
-            imputed_rand_configs = np.array(imputed_rand_configs,
-                                            dtype=np.float64)
-            acq_values = self.acquisition_func(imputed_rand_configs)
-            # From here
-            # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
-            random = self.rng.rand(len(acq_values))
-            # Last column is primary sort key!
-            indices = np.lexsort((random.flatten(), acq_values.flatten()))
-
             for i in range(len(rand_configs)):
                 rand_configs[i].origin = 'Random Search (sorted)'
-
-            # Cannot use zip here because the indices array cannot index the
-            # rand_configs list, because the second is a pure python list
-            return [(acq_values[ind][0], rand_configs[ind])
-                    for ind in indices[::-1]]
+            return self._sort_configs_by_acq_value(rand_configs)
         else:
             for i in range(len(rand_configs)):
                 rand_configs[i].origin = 'Random Search'
@@ -308,3 +292,36 @@ def _get_next_by_local_search(self, init_points=typing.List[Configuration]):
         configs_acq.sort(reverse=True, key=lambda x: x[0])
 
         return configs_acq
+
+    def _sort_configs_by_acq_value(self, configs):
+        """ Sort the given configurations by acquisition value
+
+        Parameters
+        ----------
+        configs : list(Configuration)
+
+        Returns
+        -------
+        list: (acquisition value, Candidate solutions),
+                ordered by their acquisition function value
+
+        """
+
+        imputed_configs = map(ConfigSpace.util.impute_inactive_values,
+                              configs)
+        imputed_configs = [x.get_array()
+                           for x in imputed_configs]
+        imputed_configs = np.array(imputed_configs,
+                                   dtype=np.float64)
+        acq_values = self.acquisition_func(imputed_configs)
+
+        # From here
+        # http://stackoverflow.com/questions/20197990/how-to-make-argsort-result-to-be-random-between-equal-values
+        random = self.rng.rand(len(acq_values))
+        # Last column is primary sort key!
+        indices = np.lexsort((random.flatten(), acq_values.flatten()))
+
+        # Cannot use zip here because the indices array cannot index the
+        # rand_configs list, because the second is a pure python list
+        return [(acq_values[ind][0], configs[ind])
+                for ind in indices[::-1]]
diff --git a/test/test_intensify/test_intensify.py b/test/test_intensify/test_intensify.py
@@ -391,6 +391,4 @@ def test_adaptive_capping(self):
                           inc_sum_cost=inc_sum_cost)
         # scenario cutoff
         self.assertEqual(cutoff, 5)
-
-
 
diff --git a/test/test_runhistory/test_runhistory.py b/test/test_runhistory/test_runhistory.py
@@ -29,8 +29,7 @@ def test_add_and_pickle(self):
         '''
         rh = RunHistory(aggregate_func=average_cost)
         cs = get_config_space()
-        config = Configuration(cs,
-                               values={'a': 1, 'b': 2})
+        config = Configuration(cs, values={'a': 1, 'b': 2})
 
         self.assertTrue(rh.empty())
 
@@ -55,6 +54,21 @@ def test_add_and_pickle(self):
             loaded_rh = pickle.load(fh)
         self.assertEqual(loaded_rh.data, rh.data)
 
+    def test_add_multiple_times(self):
+        rh = RunHistory(aggregate_func=average_cost)
+        cs = get_config_space()
+        config = Configuration(cs, values={'a': 1, 'b': 2})
+
+        for i in range(5):
+            rh.add(config=config, cost=i + 1, time=i + 1,
+                   status=StatusType.SUCCESS, instance_id=None,
+                   seed=12345, additional_info=None)
+
+        self.assertEqual(len(rh.data), 1)
+        self.assertEqual(len(rh.get_runs_for_config(config)), 1)
+        self.assertEqual(len(rh._configid_to_inst_seed[1]), 1)
+        self.assertEqual(list(rh.data.values())[0].cost, 1)
+
     def test_get_config_runs(self):
         '''
             get some config runs from runhistory

diff --git a/test/test_smbo/test_pSMAC.py b/test/test_smbo/test_pSMAC.py
@@ -115,8 +115,8 @@ def test_load(self):
                          [1, 2, 3, 4])
         self.assertEqual(len(runhistory.data), 6)
 
-        # load from non-empty runhistory, but existing run will be overridden
-        #  because it alread existed
+        # load from non-empty runhistory, in case of a duplicate the existing
+        # result will be kept and the new one silently discarded
         runhistory = RunHistory(aggregate_func=average_cost)
         configuration_space.seed(1)
         config = configuration_space.sample_configuration()
@@ -127,10 +127,10 @@ def test_load(self):
                                     configuration_space)
         id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
         self.assertEqual(len(runhistory.data), 6)
-        self.assertNotEqual(id_before, id_after)
+        self.assertEqual(id_before, id_after)
 
-        # load from non-empty runhistory, but existing run will not be
-        # overridden, but config_id will be re-used
+        # load from non-empty runhistory, in case of a duplicate the existing
+        # result will be kept and the new one silently discarded
         runhistory = RunHistory(aggregate_func=average_cost)
         configuration_space.seed(1)
         config = configuration_space.sample_configuration()

diff --git a/test/test_smbo/test_smbo.py b/test/test_smbo/test_smbo.py
@@ -107,6 +107,7 @@ def test_choose_next(self):
         smbo.runhistory = RunHistory(aggregate_func=average_cost)
         X = self.scenario.cs.sample_configuration().get_array()[None, :]
         smbo.incumbent = self.scenario.cs.sample_configuration()
+        smbo.runhistory.add(smbo.incumbent, 10, 10, 1)
 
         Y = self.branin(X)
         x = smbo.choose_next(X, Y)[0].get_array()
@@ -119,6 +120,45 @@ def side_effect(X, derivative):
         smbo = SMAC(self.scenario, rng=1).solver
         smbo.incumbent = self.scenario.cs.sample_configuration()
         smbo.runhistory = RunHistory(aggregate_func=average_cost)
+        smbo.runhistory.add(smbo.incumbent, 10, 10, 1)
+        smbo.model = mock.Mock(spec=RandomForestWithInstances)
+        smbo.acquisition_func._compute = mock.Mock(spec=RandomForestWithInstances)
+        smbo.acquisition_func._compute.side_effect = side_effect
+
+        X = smbo.rng.rand(10, 2)
+        Y = smbo.rng.rand(10, 1)
+
+        x = smbo.choose_next(X, Y)
+
+        self.assertEqual(smbo.model.train.call_count, 1)
+        self.assertEqual(len(x), 2002)
+        num_random_search = 0
+        num_local_search = 0
+        for i in range(0, 2002, 2):
+            # print(x[i].origin)
+            self.assertIsInstance(x[i], Configuration)
+            if 'Random Search (sorted)' in x[i].origin:
+                num_random_search += 1
+            elif 'Local Search' in x[i].origin:
+                num_local_search += 1
+        # number of local search configs has to be least 10
+        # since x can have duplicates
+        # which can be associated with the local search
+        self.assertGreaterEqual(num_local_search, 1)
+        for i in range(1, 2002, 2):
+            self.assertIsInstance(x[i], Configuration)
+            self.assertEqual(x[i].origin, 'Random Search')
+
+    def test_choose_next_3(self):
+        def side_effect(X, derivative):
+            return np.mean(X, axis=1).reshape((-1, 1))
+
+        smbo = SMAC(self.scenario, rng=1).solver
+        smbo.incumbent = self.scenario.cs.sample_configuration()
+        previous_configs = [smbo.incumbent] + [self.scenario.cs.sample_configuration() for i in range(0, 20)]
+        smbo.runhistory = RunHistory(aggregate_func=average_cost)
+        for i in range(0, len(previous_configs)):
+            smbo.runhistory.add(previous_configs[i], i, 10, 1)
         smbo.model = mock.Mock(spec=RandomForestWithInstances)
         smbo.acquisition_func._compute = mock.Mock(spec=RandomForestWithInstances)
         smbo.acquisition_func._compute.side_effect = side_effect
@@ -132,16 +172,16 @@ def side_effect(X, derivative):
         self.assertEqual(len(x), 2020)
         num_random_search = 0
         num_local_search = 0
-        for i in range(0, 2020,2):
-            #print(x[i].origin)
+        for i in range(0, 2020, 2):
+            # print(x[i].origin)
             self.assertIsInstance(x[i], Configuration)
             if 'Random Search (sorted)' in x[i].origin:
                 num_random_search += 1
             elif 'Local Search' in x[i].origin:
                 num_local_search += 1
         # number of local search configs has to be least 10
-        # since x can have duplicates 
-        # which can be associated with the local search 
+        # since x can have duplicates
+        # which can be associated with the local search
         self.assertGreaterEqual(num_local_search, 10)
         for i in range(1, 2020, 2):
             self.assertIsInstance(x[i], Configuration)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -391,6 +391,4 @@ def test_adaptive_capping(self):
		inc_sum_cost=inc_sum_cost)
		# scenario cutoff
		self.assertEqual(cutoff, 5)