From 8b6bf04fcc15dd293743eebbb4f9d813ffb2dde4 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:25:29 -0500
Subject: [PATCH 1/9] model cache for faster evaluation

---
 dev-requirements.txt              |  2 +-
 pytest.ini                        |  2 ++
 sklearn_genetic/genetic_search.py | 44 +++++++++++++++++++++++++++++--
 3 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 pytest.ini

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 53fced5..a9e964e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn>=1.1.0
+scikit-learn>=1.3.0
 deap>=1.3.3
 numpy>=1.19.0
 pytest==7.4.0
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..11d9f4c
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --verbose --color=yes --assert=plain --cov-fail-under=95 --cov-config=.coveragerc --cov=./ -p no:warnings --tb=short --cov-report=term-missing:skip-covered
diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py
index 2ea6e14..e2f8da8 100644
--- a/sklearn_genetic/genetic_search.py
+++ b/sklearn_genetic/genetic_search.py
@@ -259,6 +259,7 @@ def __init__(
         self.return_train_score = return_train_score
         self.creator = creator
         self.log_config = log_config
+        self.fitness_cache = {}
 
         # Check that the estimator is compatible with scikit-learn
         if not is_classifier(self.estimator) and not is_regressor(self.estimator):
@@ -392,6 +393,17 @@ def evaluate(self, individual):
             key: individual[n] for n, key in enumerate(self.space.parameters)
         }
 
+        # Convert hyperparameters to a tuple to use as a key in the cache
+        individual_key = tuple(sorted(current_generation_params.items()))
+
+        # Check if the individual has already been evaluated
+        if individual_key in self.fitness_cache:
+            # Retrieve cached result
+            cached_result = self.fitness_cache[individual_key]
+            # Ensure the logbook is updated even if the individual is cached
+            self.logbook.record(parameters=cached_result["current_generation_params"])
+            return cached_result["fitness"]
+
         local_estimator = clone(self.estimator)
         local_estimator.set_params(**current_generation_params)
 
@@ -437,7 +449,15 @@ def evaluate(self, individual):
         # Log the hyperparameters and the cv-score
         self.logbook.record(parameters=current_generation_params)
 
-        return [score]
+        fitness_result = [score]
+
+        # Store the fitness result and the current generation parameters in the cache
+        self.fitness_cache[individual_key] = {
+            "fitness": fitness_result,
+            "current_generation_params": current_generation_params
+        }
+
+        return fitness_result
 
     def fit(self, X, y, callbacks=None):
         """
@@ -880,6 +900,7 @@ def __init__(
         self.return_train_score = return_train_score
         self.creator = creator
         self.log_config = log_config
+        self.fitness_cache = {}
 
         # Check that the estimator is compatible with scikit-learn
         if not is_classifier(self.estimator) and not is_regressor(self.estimator):
@@ -965,6 +986,16 @@ def evaluate(self, individual):
         local_estimator = clone(self.estimator)
         n_selected_features = np.sum(individual)
 
+        # Convert the individual to a tuple to use as a key in the cache
+        individual_key = tuple(individual)
+
+        # Check if the individual has already been evaluated
+        if individual_key in self.fitness_cache:
+            cached_result = self.fitness_cache[individual_key]
+            # Ensure the logbook is updated even if the individual is cached
+            self.logbook.record(parameters=cached_result["current_generation_features"])
+            return cached_result["fitness"]
+
         # Compute the cv-metrics using only the selected features
         cv_results = cross_validate(
             local_estimator,
@@ -1014,7 +1045,16 @@ def evaluate(self, individual):
         ):
             score = -self.criteria_sign * 100000
 
-        return [score, n_selected_features]
+            # Prepare the fitness result
+        fitness_result = [score, n_selected_features]
+
+        # Store the fitness result and the current generation features in the cache
+        self.fitness_cache[individual_key] = {
+            "fitness": fitness_result,
+            "current_generation_features": current_generation_features
+        }
+
+        return fitness_result
 
     def fit(self, X, y, callbacks=None):
         """

From 741ef000a5b178d341021b7386888178329dc172 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 14:44:26 -0500
Subject: [PATCH 2/9] model cache for faster evaluation

---
 docs/index.rst                    |   2 +-
 docs/release_notes.rst            |  11 +++
 setup.py                          |   2 +-
 sklearn_genetic/_version.py       |   2 +-
 sklearn_genetic/genetic_search.py | 124 +++++++++++++++++-------------
 5 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index 10c0edb..b730171 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -27,7 +27,7 @@ inside the env use::
    pip install sklearn-genetic-opt
 
 .. |PythonMinVersion| replace:: 3.8
-.. |ScikitLearnMinVersion| replace:: 1.1.0
+.. |ScikitLearnMinVersion| replace:: 1.3.0
 .. |NumPyMinVersion| replace:: 1.19.0
 .. |SeabornMinVersion| replace:: 0.11.2
 .. |DEAPMinVersion| replace:: 1.3.3
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 886a097..3261d2e 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -3,6 +3,17 @@ Release Notes
 
 Some notes on new features in various releases
 
+What's new in 0.11.0dev0
+------------------------
+
+^^^^^^^^^
+Features:
+^^^^^^^^^
+
+* Added a parameter named `use_cache`, defaults to `True`, If set to true it will avoid to re-evaluating solutions that have already seen,
+  otherwise it will always evaluate the solutions to get the performance metrics
+
+
 What's new in 0.10.1
 --------------------
 
diff --git a/setup.py b/setup.py
index 9a438a2..e75729e 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@
         include=["sklearn_genetic", "sklearn_genetic.*"], exclude=["*tests*"]
     ),
     install_requires=[
-        "scikit-learn>=1.1.0",
+        "scikit-learn>=1.3.0",
         "numpy>=1.19.0",
         "deap>=1.3.3",
         "tqdm>=4.61.1",
diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py
index 018bfb0..fcf3da7 100644
--- a/sklearn_genetic/_version.py
+++ b/sklearn_genetic/_version.py
@@ -1 +1 @@
-__version__ = "0.10.2dev0"
+__version__ = "0.11.0dev0"
diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py
index e2f8da8..6077430 100644
--- a/sklearn_genetic/genetic_search.py
+++ b/sklearn_genetic/genetic_search.py
@@ -172,6 +172,10 @@ class GASearchCV(BaseSearchCV):
         Configuration to log metrics and models to mlflow, of None,
         no mlflow logging will be performed
 
+    use_cache: bool, default=True
+        If set to true it will avoid to re-evaluating solutions that have already seen,
+        otherwise it will always evaluate the solutions to get the performance metrics
+
     Attributes
     ----------
 
@@ -214,27 +218,28 @@ class GASearchCV(BaseSearchCV):
     """
 
     def __init__(
-        self,
-        estimator,
-        cv=3,
-        param_grid=None,
-        scoring=None,
-        population_size=50,
-        generations=80,
-        crossover_probability=0.2,
-        mutation_probability=0.8,
-        tournament_size=3,
-        elitism=True,
-        verbose=True,
-        keep_top_k=1,
-        criteria="max",
-        algorithm="eaMuPlusLambda",
-        refit=True,
-        n_jobs=1,
-        pre_dispatch="2*n_jobs",
-        error_score=np.nan,
-        return_train_score=False,
-        log_config=None,
+            self,
+            estimator,
+            cv=3,
+            param_grid=None,
+            scoring=None,
+            population_size=50,
+            generations=80,
+            crossover_probability=0.2,
+            mutation_probability=0.8,
+            tournament_size=3,
+            elitism=True,
+            verbose=True,
+            keep_top_k=1,
+            criteria="max",
+            algorithm="eaMuPlusLambda",
+            refit=True,
+            n_jobs=1,
+            pre_dispatch="2*n_jobs",
+            error_score=np.nan,
+            return_train_score=False,
+            log_config=None,
+            use_cache=True,
     ):
         self.estimator = estimator
         self.cv = cv
@@ -259,6 +264,7 @@ def __init__(
         self.return_train_score = return_train_score
         self.creator = creator
         self.log_config = log_config
+        self.use_cache = use_cache
         self.fitness_cache = {}
 
         # Check that the estimator is compatible with scikit-learn
@@ -397,7 +403,7 @@ def evaluate(self, individual):
         individual_key = tuple(sorted(current_generation_params.items()))
 
         # Check if the individual has already been evaluated
-        if individual_key in self.fitness_cache:
+        if individual_key in self.fitness_cache and self.use_cache:
             # Retrieve cached result
             cached_result = self.fitness_cache[individual_key]
             # Ensure the logbook is updated even if the individual is cached
@@ -451,11 +457,12 @@ def evaluate(self, individual):
 
         fitness_result = [score]
 
-        # Store the fitness result and the current generation parameters in the cache
-        self.fitness_cache[individual_key] = {
-            "fitness": fitness_result,
-            "current_generation_params": current_generation_params
-        }
+        if self.use_cache:
+            # Store the fitness result and the current generation parameters in the cache
+            self.fitness_cache[individual_key] = {
+                "fitness": fitness_result,
+                "current_generation_params": current_generation_params
+            }
 
         return fitness_result
 
@@ -814,6 +821,10 @@ class GAFeatureSelectionCV(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
         Configuration to log metrics and models to mlflow, of None,
         no mlflow logging will be performed
 
+    use_cache: bool, default=True
+        If set to true it will avoid to re-evaluating solutions that have already seen,
+        otherwise it will always evaluate the solutions to get the performance metrics
+
     Attributes
     ----------
 
@@ -855,27 +866,28 @@ class GAFeatureSelectionCV(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
     """
 
     def __init__(
-        self,
-        estimator,
-        cv=3,
-        scoring=None,
-        population_size=50,
-        generations=80,
-        crossover_probability=0.2,
-        mutation_probability=0.8,
-        tournament_size=3,
-        elitism=True,
-        max_features=None,
-        verbose=True,
-        keep_top_k=1,
-        criteria="max",
-        algorithm="eaMuPlusLambda",
-        refit=True,
-        n_jobs=1,
-        pre_dispatch="2*n_jobs",
-        error_score=np.nan,
-        return_train_score=False,
-        log_config=None,
+            self,
+            estimator,
+            cv=3,
+            scoring=None,
+            population_size=50,
+            generations=80,
+            crossover_probability=0.2,
+            mutation_probability=0.8,
+            tournament_size=3,
+            elitism=True,
+            max_features=None,
+            verbose=True,
+            keep_top_k=1,
+            criteria="max",
+            algorithm="eaMuPlusLambda",
+            refit=True,
+            n_jobs=1,
+            pre_dispatch="2*n_jobs",
+            error_score=np.nan,
+            return_train_score=False,
+            log_config=None,
+            use_cache=True,
     ):
         self.estimator = estimator
         self.cv = cv
@@ -900,6 +912,7 @@ def __init__(
         self.return_train_score = return_train_score
         self.creator = creator
         self.log_config = log_config
+        self.use_cache = use_cache
         self.fitness_cache = {}
 
         # Check that the estimator is compatible with scikit-learn
@@ -990,7 +1003,7 @@ def evaluate(self, individual):
         individual_key = tuple(individual)
 
         # Check if the individual has already been evaluated
-        if individual_key in self.fitness_cache:
+        if individual_key in self.fitness_cache and self.use_cache:
             cached_result = self.fitness_cache[individual_key]
             # Ensure the logbook is updated even if the individual is cached
             self.logbook.record(parameters=cached_result["current_generation_features"])
@@ -1041,18 +1054,19 @@ def evaluate(self, individual):
         # Penalize individuals with more features than the max_features parameter
 
         if self.max_features and (
-            n_selected_features > self.max_features or n_selected_features == 0
+                n_selected_features > self.max_features or n_selected_features == 0
         ):
             score = -self.criteria_sign * 100000
 
             # Prepare the fitness result
         fitness_result = [score, n_selected_features]
 
-        # Store the fitness result and the current generation features in the cache
-        self.fitness_cache[individual_key] = {
-            "fitness": fitness_result,
-            "current_generation_features": current_generation_features
-        }
+        if self.use_cache:
+            # Store the fitness result and the current generation features in the cache
+            self.fitness_cache[individual_key] = {
+                "fitness": fitness_result,
+                "current_generation_features": current_generation_features
+            }
 
         return fitness_result
 

From 5bdb85d5575ca317a0e23108a16c802993a2e0c8 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 15:33:52 -0500
Subject: [PATCH 3/9] warm_start_configs for model initialization

---
 README.rst                        |  9 +++++++++
 dev-requirements.txt              |  2 ++
 docs/release_notes.rst            | 27 ++++++++++++++++++++++++---
 sklearn_genetic/genetic_search.py | 27 ++++++++++++++++++++++++++-
 sklearn_genetic/space/space.py    | 21 +++++++++++++++++++++
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 7654b19..d065656 100644
--- a/README.rst
+++ b/README.rst
@@ -102,12 +102,19 @@ Example: Hyperparameters Tuning
 
    clf = RandomForestClassifier()
 
+   # Defines the possible values to search
    param_grid = {'min_weight_fraction_leaf': Continuous(0.01, 0.5, distribution='log-uniform'),
                  'bootstrap': Categorical([True, False]),
                  'max_depth': Integer(2, 30),
                  'max_leaf_nodes': Integer(2, 35),
                  'n_estimators': Integer(100, 300)}
 
+   # Seed solutions
+   warm_start_configs = [
+              {"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
+              {"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
+       ]
+
    cv = StratifiedKFold(n_splits=3, shuffle=True)
 
    evolved_estimator = GASearchCV(estimator=clf,
@@ -118,6 +125,8 @@ Example: Hyperparameters Tuning
                                   param_grid=param_grid,
                                   n_jobs=-1,
                                   verbose=True,
+                                  use_cache=True,
+                                  warm_start_configs=warm_start_configs,
                                   keep_top_k=4)
 
    # Train and optimize the estimator
diff --git a/dev-requirements.txt b/dev-requirements.txt
index a9e964e..c556fd0 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -15,6 +15,8 @@ sphinx_rtd_theme
 sphinx-copybutton
 numpydoc
 nbsphinx
+ipython>=8.27.0
+Pygments>=2.18.0
 tensorflow>=2.4.0
 tqdm>=4.61.1
 tk
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 3261d2e..4541930 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -10,9 +10,30 @@ What's new in 0.11.0dev0
 Features:
 ^^^^^^^^^
 
-* Added a parameter named `use_cache`, defaults to `True`, If set to true it will avoid to re-evaluating solutions that have already seen,
-  otherwise it will always evaluate the solutions to get the performance metrics
-
+* Added a parameter `use_cache`, which defaults to ``True``. When enabled, the algorithm will skip re-evaluating solutions that have already been evaluated, retrieving the performance metrics from the cache instead.
+  If use_cache is set to ``False``, the algorithm will always re-evaluate solutions, even if they have been seen before, to obtain fresh performance metrics.
+* Add a parameter in `GAFeatureSelectionCV` named warm_start_configs, defaults to ``None``, a list of predefined hyperparameter configurations to seed the initial population.
+  Each element in the list is a dictionary where the keys are the names of the hyperparameters,
+  and the values are the corresponding hyperparameter values to be used for the individual.
+
+  Example:
+
+    .. code-block:: python
+       :linenos:
+
+       warm_start_configs = [
+              {"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
+              {"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
+       ]
+
+  The genetic algorithm will initialize part of the population with these configurations to
+  warm-start the optimization process. The remaining individuals in the population will
+  be initialized randomly according to the defined hyperparameter space.
+
+  This parameter is useful when prior knowledge of good hyperparameter configurations exists,
+  allowing the algorithm to focus on refining known good solutions while still exploring new
+  areas of the hyperparameter space. If set to ``None``, the entire population will be initialized
+  randomly.
 
 What's new in 0.10.1
 --------------------
diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py
index 6077430..c160879 100644
--- a/sklearn_genetic/genetic_search.py
+++ b/sklearn_genetic/genetic_search.py
@@ -240,6 +240,7 @@ def __init__(
             return_train_score=False,
             log_config=None,
             use_cache=True,
+            warm_start_configs=None,
     ):
         self.estimator = estimator
         self.cv = cv
@@ -266,6 +267,7 @@ def __init__(
         self.log_config = log_config
         self.use_cache = use_cache
         self.fitness_cache = {}
+        self.warm_start_configs = warm_start_configs or []
 
         # Check that the estimator is compatible with scikit-learn
         if not is_classifier(self.estimator) and not is_regressor(self.estimator):
@@ -346,7 +348,7 @@ def _register(self):
 
         self.toolbox.register("evaluate", self.evaluate)
 
-        self._pop = self.toolbox.population(n=self.population_size)
+        self._pop = self._initialize_population()
         self._hof = tools.HallOfFame(self.keep_top_k)
 
         self._stats = tools.Statistics(lambda ind: ind.fitness.values)
@@ -357,6 +359,29 @@ def _register(self):
 
         self.logbook = tools.Logbook()
 
+    def _initialize_population(self):
+        """
+        Initialize the population, using warm-start configurations if provided.
+        """
+        population = []
+        # Seed part of the population with warm-start values
+        num_warm_start = min(len(self.warm_start_configs), self.population_size)
+
+        for config in self.warm_start_configs[:num_warm_start]:
+            # Sample an individual from the warm-start configuration
+            individual_values = self.space.sample_warm_start(config)
+            individual_values_list = list(individual_values.values())
+
+            # Manually create the individual and assign its fitness
+            individual = creator.Individual(individual_values_list)
+            population.append(individual)
+
+        # Fill the remaining population with random individuals
+        num_random = self.population_size - num_warm_start
+        population.extend(self.toolbox.population(n=num_random))
+
+        return population
+
     def mutate(self, individual):
         """
         This function is responsible for change a randomly selected parameter from an individual
diff --git a/sklearn_genetic/space/space.py b/sklearn_genetic/space/space.py
index cb913db..c0a8754 100644
--- a/sklearn_genetic/space/space.py
+++ b/sklearn_genetic/space/space.py
@@ -222,6 +222,27 @@ def __init__(self, param_grid: dict = None):
 
         self.param_grid = param_grid
 
+    def sample_warm_start(self, warm_start_values: dict):
+        """
+        Sample a predefined configuration (warm-start) or fill in random values if missing.
+
+        Parameters
+        ----------
+        warm_start_values: dict
+            Predefined configuration values for hyperparameters.
+
+        Returns
+        -------
+        A dictionary containing sampled values for each hyperparameter.
+        """
+        sampled_params = {}
+        for param, dimension in self.param_grid.items():
+            if param in warm_start_values:
+                sampled_params[param] = warm_start_values[param]
+            else:
+                sampled_params[param] = dimension.sample()  # Random sample if no warm-start value
+        return sampled_params
+
     @property
     def dimensions(self):
         """

From 977aea3dd25ef363575060feebc6138b9b3fbd20 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 15:38:20 -0500
Subject: [PATCH 4/9] Dropped support for python 3.8

---
 .github/workflows/ci-tests.yml | 2 +-
 README.rst                     | 2 +-
 docs/index.rst                 | 2 +-
 docs/release_notes.rst         | 6 ++++++
 setup.py                       | 2 +-
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 3944f1e..239ed93 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11']
+        python-version: [ '3.9', '3.10', '3.11']
         os: [ubuntu-latest, windows-latest, macOS-latest]
         include:
           - os: ubuntu-latest
diff --git a/README.rst b/README.rst
index d065656..9e31d68 100644
--- a/README.rst
+++ b/README.rst
@@ -8,7 +8,7 @@
 .. |Codecov| image:: https://codecov.io/gh/rodrigo-arenas/Sklearn-genetic-opt/branch/master/graphs/badge.svg?branch=master&service=github
 .. _Codecov: https://codecov.io/github/rodrigo-arenas/Sklearn-genetic-opt?branch=master
 
-.. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue
+.. |PythonVersion| image:: https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11-blue
 .. _PythonVersion : https://www.python.org/downloads/
 .. |PyPi| image:: https://badge.fury.io/py/sklearn-genetic-opt.svg
 .. _PyPi: https://badge.fury.io/py/sklearn-genetic-opt
diff --git a/docs/index.rst b/docs/index.rst
index b730171..0c821b1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -26,7 +26,7 @@ inside the env use::
 
    pip install sklearn-genetic-opt
 
-.. |PythonMinVersion| replace:: 3.8
+.. |PythonMinVersion| replace:: 3.9
 .. |ScikitLearnMinVersion| replace:: 1.3.0
 .. |NumPyMinVersion| replace:: 1.19.0
 .. |SeabornMinVersion| replace:: 0.11.2
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 4541930..66dfa8a 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -35,6 +35,12 @@ Features:
   areas of the hyperparameter space. If set to ``None``, the entire population will be initialized
   randomly.
 
+^^^^^^^^^^^^
+API Changes:
+^^^^^^^^^^^^
+
+* Dropped support for python 3.8
+
 What's new in 0.10.1
 --------------------
 
diff --git a/setup.py b/setup.py
index e75729e..586c89f 100644
--- a/setup.py
+++ b/setup.py
@@ -51,6 +51,6 @@
         "tensorflow": ["tensorflow>=2.0.0"],
         "all": ["mlflow>=1.30.0", "seaborn>=0.11.2", "tensorflow>=2.0.0"],
     },
-    python_requires=">=3.8",
+    python_requires=">=3.9",
     include_package_data=True,
 )

From a84e7ba13ae20b9b5a40fa79d60e4d39952a74b3 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 15:40:03 -0500
Subject: [PATCH 5/9] Dropped support for python 3.8

---
 dev-requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index c556fd0..8ac28d2 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -15,8 +15,8 @@ sphinx_rtd_theme
 sphinx-copybutton
 numpydoc
 nbsphinx
-ipython>=8.27.0
-Pygments>=2.18.0
+ipython
+Pygments
 tensorflow>=2.4.0
 tqdm>=4.61.1
 tk

From 265cda82f7bad9c1d4ea85e5a526636efb84f7db Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 15:58:19 -0500
Subject: [PATCH 6/9] test warm_start_configs

---
 sklearn_genetic/space/space.py               | 2 +-
 sklearn_genetic/tests/test_genetic_search.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklearn_genetic/space/space.py b/sklearn_genetic/space/space.py
index c0a8754..c28306e 100644
--- a/sklearn_genetic/space/space.py
+++ b/sklearn_genetic/space/space.py
@@ -240,7 +240,7 @@ def sample_warm_start(self, warm_start_values: dict):
             if param in warm_start_values:
                 sampled_params[param] = warm_start_values[param]
             else:
-                sampled_params[param] = dimension.sample()  # Random sample if no warm-start value
+                sampled_params[param] = dimension.sample()
         return sampled_params
 
     @property
diff --git a/sklearn_genetic/tests/test_genetic_search.py b/sklearn_genetic/tests/test_genetic_search.py
index af82c0e..4f2bf04 100644
--- a/sklearn_genetic/tests/test_genetic_search.py
+++ b/sklearn_genetic/tests/test_genetic_search.py
@@ -605,6 +605,8 @@ def test_expected_ga_schedulers():
             "average": Categorical([True, False]),
             "max_iter": Integer(700, 1000),
         },
+        warm_start_configs=[{"l1_ratio": 0.5, "alpha": 0.5, "average": False, "max_iter": 400},
+                            {"l1_ratio": 0.2, "alpha": 0.8, "average": True, "max_iter": 400}],
         verbose=False,
         algorithm="eaSimple",
         n_jobs=-1,

From a5006fa9f81fcd9f3280f8ff1f512da97e9fd05e Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 16:08:39 -0500
Subject: [PATCH 7/9] upgrade actions versions

---
 .github/workflows/ci-tests.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 239ed93..1c320ad 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -19,12 +19,12 @@ jobs:
           - os: windows-latest
             path: ~\AppData\Local\pip\Cache
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - uses: actions/cache@v3
+      - uses: actions/cache@v4
         with:
           path: ${{ matrix.path }}
           key: ${{ runner.os }}-pip-${{ hashFiles('dev-requirements.txt') }}
@@ -38,7 +38,7 @@ jobs:
         run: |
           pytest sklearn_genetic/ --verbose --color=yes --assert=plain --cov-fail-under=95 --cov-config=.coveragerc --cov=./ -p no:warnings
       - name: "Upload coverage to Codecov"
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           fail_ci_if_error: true

From d8ae6a937b139541ec2db95bc0061db0ef055619 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:47:35 -0500
Subject: [PATCH 8/9] novelty score for GASearchCV

---
 docs/release_notes.rst            |  9 +++++++
 sklearn_genetic/genetic_search.py | 16 +++++++------
 sklearn_genetic/utils/tools.py    | 40 +++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 66dfa8a..e0ba510 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -34,6 +34,15 @@ Features:
   allowing the algorithm to focus on refining known good solutions while still exploring new
   areas of the hyperparameter space. If set to ``None``, the entire population will be initialized
   randomly.
+* Introduced a **novelty search strategy** to the `GASearchCV` class. This strategy rewards solutions that are more distinct from others
+  in the population by incorporating a **novelty score** into the fitness evaluation. The novelty score encourages exploration and promotes diversity,
+  reducing the risk of premature convergence to local optima.
+
+       - **Novelty Score**: Calculated based on the distance between an individual and its nearest neighbors in the population.
+         Individuals with higher novelty scores are more distinct from the rest of the population.
+       - **Fitness Evaluation**: The overall fitness is now a combination of the traditional performance score and the novelty score,
+         allowing the algorithm to balance between exploiting known good solutions and exploring new, diverse ones.
+       - **Improved Exploration**: This strategy helps explore new areas of the hyperparameter space, increasing the likelihood of discovering better solutions and avoiding local optima.
 
 ^^^^^^^^^^^^
 API Changes:
diff --git a/sklearn_genetic/genetic_search.py b/sklearn_genetic/genetic_search.py
index c160879..eee15c7 100644
--- a/sklearn_genetic/genetic_search.py
+++ b/sklearn_genetic/genetic_search.py
@@ -28,7 +28,7 @@
     create_feature_selection_cv_results_,
 )
 from .utils.random import weighted_bool_individual
-from .utils.tools import cxUniform, mutFlipBit
+from .utils.tools import cxUniform, mutFlipBit, novelty_scorer
 
 
 class GASearchCV(BaseSearchCV):
@@ -308,7 +308,7 @@ def _register(self):
         """
         self.toolbox = base.Toolbox()
 
-        self.creator.create("FitnessMax", base.Fitness, weights=[self.criteria_sign])
+        self.creator.create("FitnessMax", base.Fitness, weights=[self.criteria_sign, 1.0])
         self.creator.create("Individual", list, fitness=creator.FitnessMax)
 
         attributes = []
@@ -352,10 +352,10 @@ def _register(self):
         self._hof = tools.HallOfFame(self.keep_top_k)
 
         self._stats = tools.Statistics(lambda ind: ind.fitness.values)
-        self._stats.register("fitness", np.mean)
-        self._stats.register("fitness_std", np.std)
-        self._stats.register("fitness_max", np.max)
-        self._stats.register("fitness_min", np.min)
+        self._stats.register("fitness", np.mean, axis=0)
+        self._stats.register("fitness_std", np.std, axis=0)
+        self._stats.register("fitness_max", np.max, axis=0)
+        self._stats.register("fitness_min", np.min, axis=0)
 
         self.logbook = tools.Logbook()
 
@@ -454,6 +454,8 @@ def evaluate(self, individual):
         cv_scores = cv_results[f"test_{self.refit_metric}"]
         score = np.mean(cv_scores)
 
+        novelty_score = novelty_scorer(individual, self._pop)
+
         # Uses the log config to save in remote log server (e.g MLflow)
         if self.log_config is not None:
             self.log_config.create_run(
@@ -480,7 +482,7 @@ def evaluate(self, individual):
         # Log the hyperparameters and the cv-score
         self.logbook.record(parameters=current_generation_params)
 
-        fitness_result = [score]
+        fitness_result = [score, novelty_score]
 
         if self.use_cache:
             # Store the fitness result and the current generation parameters in the cache
diff --git a/sklearn_genetic/utils/tools.py b/sklearn_genetic/utils/tools.py
index 3fe9005..d1847a5 100644
--- a/sklearn_genetic/utils/tools.py
+++ b/sklearn_genetic/utils/tools.py
@@ -1,4 +1,5 @@
 import random
+import numpy as np
 
 
 def mutFlipBit(individual, indpb):
@@ -67,3 +68,42 @@ def check_bool_individual(individual):
         individual[index] = 1
 
     return individual
+
+
+def novelty_scorer(individual, population, k=15):
+    """
+    Calculate novelty score for an individual based on its distance from other individuals in the population.
+
+    Parameters
+    ----------
+    individual: Individual object
+        The individual (set of hyperparameters) that is being evaluated.
+
+    population: List[Individual]
+        The current population of individuals.
+
+    k: int, default=15
+        The number of nearest neighbors to consider for the novelty calculation.
+
+    Returns
+    -------
+    novelty_score: float
+        The novelty score for the individual.
+    """
+    distances = []
+
+    # Calculate distances between the individual and every other individual in the population
+    for other in population:
+        if other != individual:
+            # Here we use Hamming distance to measure difference
+            distance = sum(i != o for i, o in zip(individual, other))
+            distances.append(distance)
+
+    # Sort the distances and take the average of the k nearest neighbors
+    distances = sorted(distances)
+    k_min = min(k, len(population))
+    nearest_distances = distances[:k_min]
+
+    # Novelty score is the average distance to the k-nearest neighbors
+    novelty_score = np.mean(nearest_distances) if nearest_distances else 0
+    return novelty_score

From fba486c5ede488cc0873d6c3bfc7ee48ae0b8865 Mon Sep 17 00:00:00 2001
From: "rodrigo.arenas" <31422766+rodrigo-arenas@users.noreply.github.com>
Date: Thu, 12 Sep 2024 18:16:45 -0500
Subject: [PATCH 9/9] 0.11.0 release

---
 docs/release_notes.rst      | 4 ++--
 sklearn_genetic/_version.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index e0ba510..2db6a18 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -3,8 +3,8 @@ Release Notes
 
 Some notes on new features in various releases
 
-What's new in 0.11.0dev0
-------------------------
+What's new in 0.11.0
+--------------------
 
 ^^^^^^^^^
 Features:
diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py
index fcf3da7..ae6db5f 100644
--- a/sklearn_genetic/_version.py
+++ b/sklearn_genetic/_version.py
@@ -1 +1 @@
-__version__ = "0.11.0dev0"
+__version__ = "0.11.0"