Remove potential sources of nondeterminism in evaluators by not setti…

…ng seeds randomly.
automl · Oct 25, 2023 · 900c5d7 · 900c5d7
1 parent e60c9b4
commit 900c5d7
Showing 6 changed files with 10 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Bug-Fixes
 - Fix configspace version.
+- Remove potential sources of nondeterminism in evaluators by not setting seeds randomly.
 
 # Version 1.1.1
 

diff --git a/deepcave/evaluators/epm/fanova_forest.py b/deepcave/evaluators/epm/fanova_forest.py
@@ -30,7 +30,7 @@ def __init__(
         instance_features: Optional[np.ndarray] = None,
         pca_components: Optional[int] = 2,
         cutoffs: Tuple[float, float] = (-np.inf, np.inf),
-        seed: Optional[int] = None,
+        seed: int = 0,
     ):
         super().__init__(
             configspace=configspace,

diff --git a/deepcave/evaluators/epm/random_forest.py b/deepcave/evaluators/epm/random_forest.py
@@ -56,7 +56,7 @@ def __init__(
         instance_features: Optional[np.ndarray] = None,
         pca_components: Optional[int] = 2,
         log_y: bool = False,
-        seed: Optional[int] = None,
+        seed: int = 0,
     ):
         self.cs = configspace
         self.log_y = log_y
@@ -295,8 +295,6 @@ def _train(self, X: np.ndarray, Y: np.ndarray) -> None:
         # Now we can start to prepare the data for the pyrfr
         data = self._get_data_container(X, Y.flatten())
         seed = self.seed
-        if seed is None:
-            seed = int(random() * 9999)
 
         rng = regression.default_random_engine(seed)
 

diff --git a/deepcave/evaluators/fanova.py b/deepcave/evaluators/fanova.py
@@ -30,7 +30,7 @@ def calculate(
         objectives: Optional[Union[Objective, List[Objective]]] = None,
         budget: Optional[Union[int, float]] = None,
         n_trees: int = 16,
-        seed: Optional[int] = None,
+        seed: int = 0,
     ) -> None:
         """
         Get the data wrt budget and trains the forest on the encoded data.
@@ -47,8 +47,8 @@ def calculate(
             Considered budget. By default None. If None, the highest budget is chosen.
         n_trees : int, optional
             How many trees should be used. By default 16.
-        seed : Optional[int], optional
-            Random seed. By default None.
+        seed : int
+            Random seed. By default 0.
         """
         if objectives is None:
             objectives = self.run.get_objectives()

diff --git a/deepcave/evaluators/lpi.py b/deepcave/evaluators/lpi.py
@@ -30,7 +30,7 @@ def calculate(
         budget: Optional[Union[int, float]] = None,
         continous_neighbors: int = 500,
         n_trees: int = 10,
-        seed: Optional[int] = None,
+        seed: int = 0,
     ) -> None:
         """
         Prepares the data and trains a RandomForest model.
@@ -56,9 +56,6 @@ def calculate(
         self.default = self.cs.get_default_configuration()
         self.incumbent_array = self.incumbent.get_array()
 
-        # Set the seed
-        if seed is None:
-            seed = int(random() * 9999)
         self.seed = seed
         self.rs = np.random.RandomState(seed)
 

diff --git a/tests/test_evaluators/test_fanova.py b/tests/test_evaluators/test_fanova.py
@@ -21,13 +21,13 @@ def test(self):
         objective = self.run.get_objective(0)
 
         # Calculate
-        self.evaluator.calculate(objective, budget)
+        self.evaluator.calculate(objective, budget, seed=0)
         importances = self.evaluator.get_importances(self.hp_names)
 
-        self.evaluator.calculate(objective, budget)
+        self.evaluator.calculate(objective, budget, seed=42)
         importances2 = self.evaluator.get_importances(self.hp_names)
 
-        # No seed: Different results
+        # Different seed: Different results
         assert importances["n_neurons"][1] != importances2["n_neurons"][1]
 
     def test_seed(self):