diff --git a/alpha_automl/automl_manager.py b/alpha_automl/automl_manager.py index 4bb7086a..f9832713 100644 --- a/alpha_automl/automl_manager.py +++ b/alpha_automl/automl_manager.py @@ -67,10 +67,11 @@ def _search_pipelines(self, automl_hyperparams): found_pipelines = 0 pipeline_threshold = 20 + X, y, _ = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task) while pipelines and found_pipelines < pipeline_threshold: pipeline = pipelines.pop() try: - alphaautoml_pipeline = score_pipeline(pipeline, self.X, self.y, self.scoring, + alphaautoml_pipeline = score_pipeline(pipeline, X, y, self.scoring, self.splitting_strategy, self.task, self.verbose) diff --git a/alpha_automl/utils.py b/alpha_automl/utils.py index 73313108..0303373b 100644 --- a/alpha_automl/utils.py +++ b/alpha_automl/utils.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd import torch +from datetime import datetime from enum import Enum from sklearn.compose import ColumnTransformer from sklearn.preprocessing import LabelEncoder @@ -66,10 +67,10 @@ def sample_dataset(X, y, sample_size, task): if original_size > sample_size: ratio = sample_size / original_size try: - _, X_test, _, y_test = train_test_split(X, y, random_state=RANDOM_SEED, test_size=ratio, stratify=y, shuffle=shuffle) + _, X_test, _, y_test = train_test_split(X, y, random_state=datetime.now().timestamp(), test_size=ratio, stratify=y, shuffle=shuffle) except Exception: # Not using stratified sampling when the minority class has few instances, not enough for all the folds - _, X_test, _, y_test = train_test_split(X, y, random_state=RANDOM_SEED, test_size=ratio, shuffle=shuffle) + _, X_test, _, y_test = train_test_split(X, y, random_state=datetime.now().timestamp(), test_size=ratio, shuffle=shuffle) logger.debug(f'Sampling down data from {original_size} to {len(X_test)}') if isinstance(X_test, pd.DataFrame): X_test = X_test.reset_index(drop=True)