From 242aa1b23e495d0640b199226bd1fd48cc329853 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Wed, 17 Jan 2024 15:46:45 -0500 Subject: [PATCH 1/7] Add files for integratiion --- scripts/amlb/user_config/config.yaml | 13 ++++ .../extensions/Alpha-AutoML/__init__.py | 22 ++++++ .../extensions/Alpha-AutoML/exec.py | 77 +++++++++++++++++++ .../extensions/Alpha-AutoML/requirements.txt | 1 + .../extensions/Alpha-AutoML/setup.sh | 9 +++ scripts/amlb/user_config/frameworks.yaml | 5 ++ 6 files changed, 127 insertions(+) create mode 100644 scripts/amlb/user_config/config.yaml create mode 100644 scripts/amlb/user_config/extensions/Alpha-AutoML/__init__.py create mode 100644 scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py create mode 100644 scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt create mode 100755 scripts/amlb/user_config/extensions/Alpha-AutoML/setup.sh create mode 100644 scripts/amlb/user_config/frameworks.yaml diff --git a/scripts/amlb/user_config/config.yaml b/scripts/amlb/user_config/config.yaml new file mode 100644 index 00000000..05ade0be --- /dev/null +++ b/scripts/amlb/user_config/config.yaml @@ -0,0 +1,13 @@ +--- +seed: 0 # any int32 to pass a fixed seed to the jobs. +benchmarks: # configuration namespace for the benchmarks definitions. + metrics: # default metrics by dataset type (as listed by amlb.data.DatasetType), + # only the first metric is optimized by the frameworks, + # the others are computed only for information purpose. + binary: ['acc'] # available metrics: auc (AUC), acc (Accuracy), balacc (Balanced Accuracy), pr_auc (Precision Recall AUC), logloss (Log Loss), f1, f2, f05 (F-beta scores with beta=1, 2, or 0.5), max_pce, mean_pce (Max/Mean Per-Class Error). + multiclass: ['acc'] # available metrics: same as for binary, except auc, replaced by auc_ovo (AUC One-vs-One), auc_ovr (AUC One-vs-Rest). AUC metrics and F-beta metrics are computed with weighted average. + +frameworks: + definition_file: # this allows to add custom framework definitions (in {user}/frameworks.yaml) on top of the default ones. + - '{root}/resources/frameworks.yaml' + - '{user}/frameworks.yaml' diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/__init__.py b/scripts/amlb/user_config/extensions/Alpha-AutoML/__init__.py new file mode 100644 index 00000000..1bbff0e1 --- /dev/null +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/__init__.py @@ -0,0 +1,22 @@ +from amlb.benchmark import TaskConfig +from amlb.data import Dataset +from amlb.utils import call_script_in_same_dir + + +def setup(*args, **kwargs): + call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs) + + +def run(dataset: Dataset, config: TaskConfig): + from frameworks.shared.caller import run_in_venv + + data = dict( + train=dict(path=dataset.train.data_path('csv')), + test=dict(path=dataset.test.data_path('csv')), + target=dict( + name=dataset.target.name, + classes=dataset.target.values + ) + ) + + return run_in_venv(__file__, "exec.py", input_data=data, dataset=dataset, config=config) diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py new file mode 100644 index 00000000..ad6d8d08 --- /dev/null +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py @@ -0,0 +1,77 @@ +import os +import logging +import numpy as np +import pandas as pd +import tempfile as tmp +from alpha_automl import AutoMLClassifier +from frameworks.shared.callee import call_run, result +from frameworks.shared.utils import Timer + + +os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() +os.environ['OMP_NUM_THREADS'] = '1' +os.environ['OPENBLAS_NUM_THREADS'] = '1' +os.environ['MKL_NUM_THREADS'] = '1' +log = logging.getLogger(__name__) + + +def run(dataset, config): + log.info(f'\n**** Running Alpha-AutoML ****\n') + + metrics_mapping = { + 'acc': 'accuracy_score', + #'auc': metrics.roc_auc, + 'f1': 'f1_score', + #'logloss': metrics.log_loss, + 'mae': 'mean_absolute_error', + 'mse': 'mean_squared_error', + 'r2': 'r2_score' + } + + metric = config.metric + + if metric is None: + log.warning(f'Performance metric {metric} not supported, defaulting to accuracy') + metric = 'acc' + + train_dataset_path = dataset.train.path + test_dataset_path = dataset.test.path + target_name = dataset.target.name + output_path = config.output_dir + time_bound = int(config.max_runtime_seconds/60) + + log.info(f'Received parameters:\n' + f'train_dataset: {train_dataset_path}\n' + f'test_dataset: {test_dataset_path}\n' + f'target_name: {target_name}\n' + f'time_bound: {time_bound}\n' + f'metric: {metric}\n' + ) + + automl = AutoMLClassifier(time_bound=time_bound, metric=metrics_mapping[metric], time_bound_run=15, + output_folder=output_path, start_mode='spawn', verbose=logging.DEBUG) + + train_dataset = pd.read_csv(train_dataset_path) + test_dataset = pd.read_csv(test_dataset_path) + X_train = train_dataset.drop(columns=[target_name]) + y_train = train_dataset[[target_name]] + X_test = test_dataset.drop(columns=[target_name]) + y_test = test_dataset[[target_name]] + + with Timer() as training: + automl.fit(X_train, y_train) + automl.plot_leaderboard(use_print=True) + predictions = automl.predict(X_test) + + classes = pd.read_csv(train_dataset)[target_name].unique() + probabilities = pd.DataFrame(0, index=np.arange(len(predictions)), columns=classes) + + return result(dataset=dataset, + output_file=config.output_predictions_file, + probabilities=probabilities, + predictions=predictions, + training_duration=training.duration) + + +if __name__ == '__main__': + call_run(run) diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt b/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt new file mode 100644 index 00000000..2fb52117 --- /dev/null +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/requirements.txt @@ -0,0 +1 @@ +alpha-automl \ No newline at end of file diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/setup.sh b/scripts/amlb/user_config/extensions/Alpha-AutoML/setup.sh new file mode 100755 index 00000000..c1fd182a --- /dev/null +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/setup.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +shopt -s expand_aliases +HERE=$(dirname "$0") + +. "automlbenchmark/frameworks/shared/setup.sh" "$HERE" true +export AR=/usr/bin/ar +PIP install -r "$HERE/requirements.txt" + +PY -c "from alpha_automl import __version__; print(__version__)" >> "${HERE}/.installed" diff --git a/scripts/amlb/user_config/frameworks.yaml b/scripts/amlb/user_config/frameworks.yaml new file mode 100644 index 00000000..e7965f87 --- /dev/null +++ b/scripts/amlb/user_config/frameworks.yaml @@ -0,0 +1,5 @@ +Alpha-AutoML: + module: extensions.Alpha-AutoML + version: 'stable' + project: https://github.com/VIDA-NYU/alpha-automl + description: 'Alpha-AutoML is an AutoML library that automatically searches for models and derives end-to-end pipelines that read, pre-process the data, and train the model.' From 7b1451c41e918cb159daa131cea78bbd11ebbf9c Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Tue, 23 Jan 2024 21:44:43 -0500 Subject: [PATCH 2/7] Add optional params (predict_duration, training_duration and probabilities) --- .../extensions/Alpha-AutoML/exec.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py index ad6d8d08..e84f30b5 100644 --- a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py @@ -1,6 +1,5 @@ import os import logging -import numpy as np import pandas as pd import tempfile as tmp from alpha_automl import AutoMLClassifier @@ -49,7 +48,7 @@ def run(dataset, config): ) automl = AutoMLClassifier(time_bound=time_bound, metric=metrics_mapping[metric], time_bound_run=15, - output_folder=output_path, start_mode='spawn', verbose=logging.DEBUG) + output_folder=output_path, verbose=logging.DEBUG) train_dataset = pd.read_csv(train_dataset_path) test_dataset = pd.read_csv(test_dataset_path) @@ -58,19 +57,28 @@ def run(dataset, config): X_test = test_dataset.drop(columns=[target_name]) y_test = test_dataset[[target_name]] - with Timer() as training: + with Timer() as train_time: automl.fit(X_train, y_train) - automl.plot_leaderboard(use_print=True) - predictions = automl.predict(X_test) - classes = pd.read_csv(train_dataset)[target_name].unique() - probabilities = pd.DataFrame(0, index=np.arange(len(predictions)), columns=classes) + automl.plot_leaderboard(use_print=True) + best_pipeline = automl.get_pipeline() - return result(dataset=dataset, + with Timer() as test_time: + predictions = best_pipeline.predict(X_test) + predictions = automl.label_encoder.inverse_transform(predictions) + + probabilities = pd.DataFrame(automl.get_pipeline().predict_proba(X_test), + columns=automl.label_encoder.inverse_transform(automl.get_pipeline().classes_)) + + return result( output_file=config.output_predictions_file, - probabilities=probabilities, predictions=predictions, - training_duration=training.duration) + truth=y_test, + probabilities=probabilities, + probabilities_labels=probabilities.columns.values.astype(str).tolist(), + training_duration=train_time.duration, + predict_duration=test_time.duration, + target_is_encoded=False) if __name__ == '__main__': From 11e6698c1aa2ffb2b7fd55221b7020ac2b5fa774 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Thu, 25 Jan 2024 13:48:28 -0500 Subject: [PATCH 3/7] Add instructions --- scripts/amlb/README.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 scripts/amlb/README.md diff --git a/scripts/amlb/README.md b/scripts/amlb/README.md new file mode 100644 index 00000000..d1b17aed --- /dev/null +++ b/scripts/amlb/README.md @@ -0,0 +1,37 @@ +# AMLB Experiments + + + +We used the AutoML Benchmark (AMLB) to run these experiments locally using Python 3.9. To reproduce these experiments, +follow the next steps. + + +1. Clone and install AMLB. See the [AMLB repository](https://github.com/openml/automlbenchmark/tree/v2.0.6/) +for additional details about the installation. + +``` +git clone https://github.com/openml/automlbenchmark.git --branch stable --depth 1 +cd automlbenchmark +pip install -r requirements.txt +cd .. +``` + +2. Create the *openml_datasets* and *results* folder (AMLB will use theses folders). +``` +mkdir openml_datasets +mkdir results +``` + + +3. To test the installation, run the following command. You should get valid ML pipelines after running it. +``` + python automlbenchmark/runbenchmark.py Alpha-AutoML openml/t/12 -f 0 -u user_config/ -i openml_datasets/ -o results/ +``` + +4. We ran all the systems (AutoWEKA, TPOT, H2O, AutoGluon, Auto-Sklearn, and AlphaD3M) using Singularity containers in +SLURM batch jobs in the [NYU Greene Cluster](https://sites.google.com/nyu.edu/nyu-hpc/hpc-systems/greene). To run the +experiments in this cluster, run `bash ./run_all_automlbenchmark.sh`. +All the results will be stored in the `./results/results.csv` file. + + + From b339b7760302298a000b010ed0a9cda2155df6a9 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Fri, 26 Jan 2024 21:34:33 -0500 Subject: [PATCH 4/7] Run jobs with slurm --- scripts/amlb/README.md | 2 +- scripts/amlb/automl_job.SBATCH | 8 ++++++++ scripts/amlb/run_all_automlbenchmark.sh | 13 +++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 scripts/amlb/automl_job.SBATCH create mode 100644 scripts/amlb/run_all_automlbenchmark.sh diff --git a/scripts/amlb/README.md b/scripts/amlb/README.md index d1b17aed..7f3e49ab 100644 --- a/scripts/amlb/README.md +++ b/scripts/amlb/README.md @@ -25,7 +25,7 @@ mkdir results 3. To test the installation, run the following command. You should get valid ML pipelines after running it. ``` - python automlbenchmark/runbenchmark.py Alpha-AutoML openml/t/12 -f 0 -u user_config/ -i openml_datasets/ -o results/ + python automlbenchmark/runbenchmark.py Alpha-AutoML openml/t/12 test -f 0 -u user_config/ -i openml_datasets/ -o results/ ``` 4. We ran all the systems (AutoWEKA, TPOT, H2O, AutoGluon, Auto-Sklearn, and AlphaD3M) using Singularity containers in diff --git a/scripts/amlb/automl_job.SBATCH b/scripts/amlb/automl_job.SBATCH new file mode 100644 index 00000000..6b8792ba --- /dev/null +++ b/scripts/amlb/automl_job.SBATCH @@ -0,0 +1,8 @@ +#!/bin/bash +#SBATCH -c 4 +#SBATCH --mem 32GB +#SBATCH --time 01:15:00 +#SBATCH --output logs/automl_job_%J.out +#SBATCH --mail-user=rl3725@nyu.edu + +singularity exec --bind /scratch/rl3725/alphaautoml_experiments/experiments:/scratch/rl3725/alphaautoml_experiments/experiments --overlay overlay-15GB-500K.ext3:rw /scratch/work/public/singularity/ubuntu-20.04.4.sif /bin/bash -c "source /ext3/env.sh; python automlbenchmark/runbenchmark.py ${1} ${2} 1h4c -f 0 -u user_config/ -i openml_datasets/ -o results/" diff --git a/scripts/amlb/run_all_automlbenchmark.sh b/scripts/amlb/run_all_automlbenchmark.sh new file mode 100644 index 00000000..8744ac2a --- /dev/null +++ b/scripts/amlb/run_all_automlbenchmark.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +datasets="openml/t/10101 openml/t/12 openml/t/146195 openml/t/146212 openml/t/146606 openml/t/146818 openml/t/146821 openml/t/146822 openml/t/146825 openml/t/14965 openml/t/167119 openml/t/167120 openml/t/168329 openml/t/168330 openml/t/168331 openml/t/168332 openml/t/168335 openml/t/168337 openml/t/168338 openml/t/168868 openml/t/168908 openml/t/168909 openml/t/168910 openml/t/168911 openml/t/168912 openml/t/189354 openml/t/189355 openml/t/189356 openml/t/3 openml/t/31 openml/t/34539 openml/t/3917 openml/t/3945 openml/t/53 openml/t/7592 openml/t/7593 openml/t/9952 openml/t/9977 openml/t/9981" +systems="autosklearn AutoGluon TPOT H2OAutoML AutoWEKA Alpha-AutoML" + +for system in $systems +do + for dataset in $datasets + do + echo "Running ${system} system in ${dataset} dataset" + sbatch automl_job.SBATCH $system $dataset + done +done \ No newline at end of file From 53638ef79f1f81e8942a35cb9145304760e63441 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Mon, 29 Jan 2024 15:45:18 -0500 Subject: [PATCH 5/7] Make read-only --- scripts/amlb/automl_job.SBATCH | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/amlb/automl_job.SBATCH b/scripts/amlb/automl_job.SBATCH index 6b8792ba..8547b7ab 100644 --- a/scripts/amlb/automl_job.SBATCH +++ b/scripts/amlb/automl_job.SBATCH @@ -5,4 +5,4 @@ #SBATCH --output logs/automl_job_%J.out #SBATCH --mail-user=rl3725@nyu.edu -singularity exec --bind /scratch/rl3725/alphaautoml_experiments/experiments:/scratch/rl3725/alphaautoml_experiments/experiments --overlay overlay-15GB-500K.ext3:rw /scratch/work/public/singularity/ubuntu-20.04.4.sif /bin/bash -c "source /ext3/env.sh; python automlbenchmark/runbenchmark.py ${1} ${2} 1h4c -f 0 -u user_config/ -i openml_datasets/ -o results/" +singularity exec --bind /scratch/rl3725/alphaautoml_experiments/experiments:/scratch/rl3725/alphaautoml_experiments/experiments --overlay overlay-15GB-500K.ext3:ro /scratch/work/public/singularity/ubuntu-20.04.4.sif /bin/bash -c "source /ext3/env.sh; python automlbenchmark/runbenchmark.py ${1} ${2} 1h4c -f 0 -u user_config/ -i openml_datasets/ -o results/" From e8d8bfc2fcf3918ef53771baa151d3e8c9beaba2 Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Mon, 29 Jan 2024 16:17:08 -0500 Subject: [PATCH 6/7] Calculate class probabilities --- .../amlb/user_config/extensions/Alpha-AutoML/exec.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py index e84f30b5..5fb34cec 100644 --- a/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py +++ b/scripts/amlb/user_config/extensions/Alpha-AutoML/exec.py @@ -62,13 +62,20 @@ def run(dataset, config): automl.plot_leaderboard(use_print=True) best_pipeline = automl.get_pipeline() + classes = automl.label_encoder.inverse_transform(best_pipeline.classes_) with Timer() as test_time: + log.info('Testing pipeline') predictions = best_pipeline.predict(X_test) predictions = automl.label_encoder.inverse_transform(predictions) - probabilities = pd.DataFrame(automl.get_pipeline().predict_proba(X_test), - columns=automl.label_encoder.inverse_transform(automl.get_pipeline().classes_)) + try: + probabilities = pd.DataFrame(best_pipeline.predict_proba(X_test), columns=classes) + except: # Some primitives don't implement predict_proba method + log.warning(f'The method predict_proba is not supported, using fallback') + probabilities = pd.DataFrame(0, index=range(len(predictions)), columns=classes) # Dataframe of zeros + for index, prediction in enumerate(predictions): + probabilities.at[index, prediction] = 1.0 return result( output_file=config.output_predictions_file, From bef8df1c403cf7728d41bf8390087792f17c0cfe Mon Sep 17 00:00:00 2001 From: Roque Lopez Date: Tue, 30 Jan 2024 10:17:30 -0500 Subject: [PATCH 7/7] Analysis of initial results --- scripts/amlb/results_analysis.ipynb | 1816 +++++++++++++++++++++++++++ 1 file changed, 1816 insertions(+) create mode 100644 scripts/amlb/results_analysis.ipynb diff --git a/scripts/amlb/results_analysis.ipynb b/scripts/amlb/results_analysis.ipynb new file mode 100644 index 00000000..a6d59b24 --- /dev/null +++ b/scripts/amlb/results_analysis.ipynb @@ -0,0 +1,1816 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis of OpenML Experiments" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import altair as alt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatasetTypeAutoGluonAutoWEKAAuto-SklearnH2OTPOTAlphaD3MAlpha-AutoML
0task_10101binary0.7600000.7600000.7600000.7600000.7600000.7866670.720000
1task_12multiclass0.9750000.9850000.9800000.975000NaN0.9650000.980000
2task_146195multiclass0.8755180.7106280.8556840.8767020.8516870.8058020.833333
3task_146212multiclass0.9996550.9982761.0000000.9998280.9998280.9998280.999828
4task_146606binary0.7363590.6037740.7329930.719327NaN0.7262620.730546
\n", + "
" + ], + "text/plain": [ + " Dataset Type AutoGluon AutoWEKA Auto-Sklearn H2O \\\n", + "0 task_10101 binary 0.760000 0.760000 0.760000 0.760000 \n", + "1 task_12 multiclass 0.975000 0.985000 0.980000 0.975000 \n", + "2 task_146195 multiclass 0.875518 0.710628 0.855684 0.876702 \n", + "3 task_146212 multiclass 0.999655 0.998276 1.000000 0.999828 \n", + "4 task_146606 binary 0.736359 0.603774 0.732993 0.719327 \n", + "\n", + " TPOT AlphaD3M Alpha-AutoML \n", + "0 0.760000 0.786667 0.720000 \n", + "1 NaN 0.965000 0.980000 \n", + "2 0.851687 0.805802 0.833333 \n", + "3 0.999828 0.999828 0.999828 \n", + "4 NaN 0.726262 0.730546 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "performances = pd.read_csv('results/results.csv')\n", + "performances = performances[['id', 'framework', 'type', 'result']]\n", + "performances = pd.pivot_table(performances, index=['id', 'type'], columns='framework', values='result')\n", + "performances = performances.reset_index()\n", + "performances.columns.name = None\n", + "performances.rename(columns={'id': 'Dataset', 'type': 'Type', 'H2OAutoML': 'H2O', 'autosklearn': 'Auto-Sklearn'}, inplace=True)\n", + "performances = performances[['Dataset', 'Type', 'AutoGluon', 'AutoWEKA', 'Auto-Sklearn', 'H2O', 'TPOT', 'AlphaD3M', 'Alpha-AutoML']]\n", + "performances = performances.replace('openml.org/t/','task_', regex=True)\n", + "performances.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Utils\n", + "\n", + "def calculate_rank(performances):\n", + " performances_t = performances.T\n", + " performances_t.columns = performances_t.loc['Dataset'].tolist() # Set the datasets as column names\n", + " all_ranks = []\n", + " \n", + " for dataset in performances_t.columns:\n", + " ranks_by_dataset = pd.DataFrame({dataset: performances_t[dataset]})\n", + " ranks_by_dataset.drop(['Dataset', 'Type'], inplace=True) # Remove 'Dataset', and 'Type'\n", + " ranks_by_dataset = ranks_by_dataset.rank(ascending=False, method='min')\n", + " worst_rank = float(ranks_by_dataset.shape[0]) # Number of AutoML Systems\n", + " ranks_by_dataset.fillna(worst_rank, inplace=True) # Add the worst rank to the systems that didn't produce pipelines\n", + " all_ranks.append(ranks_by_dataset)\n", + " \n", + " all_ranks = pd.concat(all_ranks, axis=1, join='inner')\n", + " \n", + " return all_ranks\n", + "\n", + "def generate_latex(all_performances, file_name):\n", + " performances = all_performances.copy(deep=True)\n", + " try:\n", + " performances.drop(columns=['Type'], inplace=True)\n", + " except:\n", + " pass\n", + " performances.to_latex(f'{file_name}.tex', float_format='%.2f', index=False, na_rep='-')\n", + " print(f'Latex generated at {file_name}.tex file.')\n", + "\n", + "def calculate_gain(all_performances):\n", + " systems = ['AutoGluon', 'AutoWEKA', 'H2O', 'TPOT', 'Auto-Sklearn', 'Alpha-AutoML']\n", + " performances = all_performances.copy(deep=True) \n", + " performances['Others_Avg'] = performances[systems].mean(axis=1)\n", + " performances['Gain'] = performances['Alpha-AutoML'] - performances['Others_Avg']\n", + " performances.drop(columns=['Others_Avg'], inplace=True)\n", + " \n", + " return performances.round(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculating Gains" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatasetTypeAutoGluonAutoWEKAAuto-SklearnH2OTPOTAlphaD3MAlpha-AutoMLGain
0task_10101binary0.760.760.760.760.760.790.72-0.03
1task_12multiclass0.980.980.980.98NaN0.960.980.00
2task_146195multiclass0.880.710.860.880.850.810.83-0.00
3task_146212multiclass1.001.001.001.001.001.001.000.00
4task_146606binary0.740.600.730.72NaN0.730.730.03
\n", + "
" + ], + "text/plain": [ + " Dataset Type AutoGluon AutoWEKA Auto-Sklearn H2O TPOT \\\n", + "0 task_10101 binary 0.76 0.76 0.76 0.76 0.76 \n", + "1 task_12 multiclass 0.98 0.98 0.98 0.98 NaN \n", + "2 task_146195 multiclass 0.88 0.71 0.86 0.88 0.85 \n", + "3 task_146212 multiclass 1.00 1.00 1.00 1.00 1.00 \n", + "4 task_146606 binary 0.74 0.60 0.73 0.72 NaN \n", + "\n", + " AlphaD3M Alpha-AutoML Gain \n", + "0 0.79 0.72 -0.03 \n", + "1 0.96 0.98 0.00 \n", + "2 0.81 0.83 -0.00 \n", + "3 1.00 1.00 0.00 \n", + "4 0.73 0.73 0.03 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gains = calculate_gain(performances)\n", + "gains.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.003" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "round(gains['Gain'].mean(), 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latex generated at gains.tex file.\n" + ] + } + ], + "source": [ + "generate_latex(gains, 'gains')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculating Average Rank" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
task_10101task_12task_146195task_146212task_146606task_146818task_146821task_146822task_146825task_14965...task_31task_34539task_3917task_3945task_53task_7592task_7593task_9952task_9977task_9981
AutoGluon2.04.02.06.01.01.06.02.01.04.0...4.01.02.04.03.01.01.07.01.03.0
AutoWEKA2.01.07.07.06.05.01.07.07.07.0...6.07.07.07.06.06.05.03.07.07.0
Auto-Sklearn2.02.03.01.02.07.01.02.02.02.0...1.03.03.01.04.02.02.05.03.01.0
H2O2.04.01.02.05.02.01.02.03.01.0...7.02.07.05.01.07.04.05.01.03.0
TPOT2.07.04.02.07.03.01.01.07.05.0...1.04.01.01.07.04.07.01.03.01.0
AlphaD3M1.06.06.02.04.03.07.02.04.05.0...4.06.03.01.05.05.03.04.06.03.0
Alpha-AutoML7.02.05.02.03.05.01.02.07.03.0...3.05.05.07.02.03.07.02.05.06.0
\n", + "

7 rows × 39 columns

\n", + "
" + ], + "text/plain": [ + " task_10101 task_12 task_146195 task_146212 task_146606 \\\n", + "AutoGluon 2.0 4.0 2.0 6.0 1.0 \n", + "AutoWEKA 2.0 1.0 7.0 7.0 6.0 \n", + "Auto-Sklearn 2.0 2.0 3.0 1.0 2.0 \n", + "H2O 2.0 4.0 1.0 2.0 5.0 \n", + "TPOT 2.0 7.0 4.0 2.0 7.0 \n", + "AlphaD3M 1.0 6.0 6.0 2.0 4.0 \n", + "Alpha-AutoML 7.0 2.0 5.0 2.0 3.0 \n", + "\n", + " task_146818 task_146821 task_146822 task_146825 task_14965 \\\n", + "AutoGluon 1.0 6.0 2.0 1.0 4.0 \n", + "AutoWEKA 5.0 1.0 7.0 7.0 7.0 \n", + "Auto-Sklearn 7.0 1.0 2.0 2.0 2.0 \n", + "H2O 2.0 1.0 2.0 3.0 1.0 \n", + "TPOT 3.0 1.0 1.0 7.0 5.0 \n", + "AlphaD3M 3.0 7.0 2.0 4.0 5.0 \n", + "Alpha-AutoML 5.0 1.0 2.0 7.0 3.0 \n", + "\n", + " ... task_31 task_34539 task_3917 task_3945 task_53 \\\n", + "AutoGluon ... 4.0 1.0 2.0 4.0 3.0 \n", + "AutoWEKA ... 6.0 7.0 7.0 7.0 6.0 \n", + "Auto-Sklearn ... 1.0 3.0 3.0 1.0 4.0 \n", + "H2O ... 7.0 2.0 7.0 5.0 1.0 \n", + "TPOT ... 1.0 4.0 1.0 1.0 7.0 \n", + "AlphaD3M ... 4.0 6.0 3.0 1.0 5.0 \n", + "Alpha-AutoML ... 3.0 5.0 5.0 7.0 2.0 \n", + "\n", + " task_7592 task_7593 task_9952 task_9977 task_9981 \n", + "AutoGluon 1.0 1.0 7.0 1.0 3.0 \n", + "AutoWEKA 6.0 5.0 3.0 7.0 7.0 \n", + "Auto-Sklearn 2.0 2.0 5.0 3.0 1.0 \n", + "H2O 7.0 4.0 5.0 1.0 3.0 \n", + "TPOT 4.0 7.0 1.0 3.0 1.0 \n", + "AlphaD3M 5.0 3.0 4.0 6.0 3.0 \n", + "Alpha-AutoML 3.0 7.0 2.0 5.0 6.0 \n", + "\n", + "[7 rows x 39 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranks = calculate_rank(performances)\n", + "ranks" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
task_10101task_12task_146195task_146212task_146606task_146818task_146821task_146822task_146825task_14965...task_34539task_3917task_3945task_53task_7592task_7593task_9952task_9977task_9981average_rank
AutoGluon2.04.02.06.01.01.06.02.01.04.0...1.02.04.03.01.01.07.01.03.02.69
AutoWEKA2.01.07.07.06.05.01.07.07.07.0...7.07.07.06.06.05.03.07.07.05.90
Auto-Sklearn2.02.03.01.02.07.01.02.02.02.0...3.03.01.04.02.02.05.03.01.02.26
H2O2.04.01.02.05.02.01.02.03.01.0...2.07.05.01.07.04.05.01.03.03.49
TPOT2.07.04.02.07.03.01.01.07.05.0...4.01.01.07.04.07.01.03.01.04.21
AlphaD3M1.06.06.02.04.03.07.02.04.05.0...6.03.01.05.05.03.04.06.03.04.21
Alpha-AutoML7.02.05.02.03.05.01.02.07.03.0...5.05.07.02.03.07.02.05.06.04.03
\n", + "

7 rows × 40 columns

\n", + "
" + ], + "text/plain": [ + " task_10101 task_12 task_146195 task_146212 task_146606 \\\n", + "AutoGluon 2.0 4.0 2.0 6.0 1.0 \n", + "AutoWEKA 2.0 1.0 7.0 7.0 6.0 \n", + "Auto-Sklearn 2.0 2.0 3.0 1.0 2.0 \n", + "H2O 2.0 4.0 1.0 2.0 5.0 \n", + "TPOT 2.0 7.0 4.0 2.0 7.0 \n", + "AlphaD3M 1.0 6.0 6.0 2.0 4.0 \n", + "Alpha-AutoML 7.0 2.0 5.0 2.0 3.0 \n", + "\n", + " task_146818 task_146821 task_146822 task_146825 task_14965 \\\n", + "AutoGluon 1.0 6.0 2.0 1.0 4.0 \n", + "AutoWEKA 5.0 1.0 7.0 7.0 7.0 \n", + "Auto-Sklearn 7.0 1.0 2.0 2.0 2.0 \n", + "H2O 2.0 1.0 2.0 3.0 1.0 \n", + "TPOT 3.0 1.0 1.0 7.0 5.0 \n", + "AlphaD3M 3.0 7.0 2.0 4.0 5.0 \n", + "Alpha-AutoML 5.0 1.0 2.0 7.0 3.0 \n", + "\n", + " ... task_34539 task_3917 task_3945 task_53 task_7592 \\\n", + "AutoGluon ... 1.0 2.0 4.0 3.0 1.0 \n", + "AutoWEKA ... 7.0 7.0 7.0 6.0 6.0 \n", + "Auto-Sklearn ... 3.0 3.0 1.0 4.0 2.0 \n", + "H2O ... 2.0 7.0 5.0 1.0 7.0 \n", + "TPOT ... 4.0 1.0 1.0 7.0 4.0 \n", + "AlphaD3M ... 6.0 3.0 1.0 5.0 5.0 \n", + "Alpha-AutoML ... 5.0 5.0 7.0 2.0 3.0 \n", + "\n", + " task_7593 task_9952 task_9977 task_9981 average_rank \n", + "AutoGluon 1.0 7.0 1.0 3.0 2.69 \n", + "AutoWEKA 5.0 3.0 7.0 7.0 5.90 \n", + "Auto-Sklearn 2.0 5.0 3.0 1.0 2.26 \n", + "H2O 4.0 5.0 1.0 3.0 3.49 \n", + "TPOT 7.0 1.0 3.0 1.0 4.21 \n", + "AlphaD3M 3.0 4.0 6.0 3.0 4.21 \n", + "Alpha-AutoML 7.0 2.0 5.0 6.0 4.03 \n", + "\n", + "[7 rows x 40 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ranks['average_rank'] = ranks.mean(axis=1) # Add a column with average rank\n", + "ranks = ranks.round({'average_rank': 2})\n", + "ranks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normalizing Scores" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatasetTypeAutoGluonAutoWEKAAuto-SklearnH2OTPOTAlphaD3MAlpha-AutoML
0task_10101binary0.970.970.970.970.971.000.92
1task_12multiclass0.991.000.990.99NaN0.980.99
2task_146195multiclass1.000.810.981.000.970.920.95
3task_146212multiclass1.001.001.001.001.001.001.00
4task_146606binary1.000.821.000.98NaN0.990.99
5task_146818binary1.000.940.920.980.950.950.94
6task_146821multiclass0.991.001.001.001.000.971.00
7task_146822multiclass1.000.991.001.001.001.001.00
8task_146825multiclass1.00NaN0.990.99NaN0.94NaN
9task_14965binary1.000.961.001.001.001.001.00
10task_167119multiclass0.960.830.981.000.940.860.90
11task_167120binary1.001.001.000.99NaN0.991.00
12task_168329multiclass1.000.690.960.880.890.94NaN
13task_168330multiclass1.000.891.001.000.970.980.98
14task_168331multiclass1.000.841.000.950.900.910.95
15task_168332multiclass1.00NaN0.980.930.800.75NaN
16task_168335binary1.00NaN1.00NaN0.990.991.00
17task_168337binary0.98NaN1.000.960.890.711.00
18task_168338binary1.00NaN1.001.000.990.971.00
19task_168868binary1.000.991.001.001.001.001.00
20task_168908binary0.970.960.990.94NaN1.000.99
21task_168909multiclass1.000.971.000.99NaN1.000.99
22task_168910multiclass0.990.820.990.990.970.891.00
23task_168911binary0.991.001.001.000.990.980.99
24task_168912binary0.980.970.991.001.000.980.99
25task_189354binary1.00NaN1.000.911.000.960.94
26task_189355multiclass1.00NaN0.00NaNNaN0.94NaN
27task_189356binary1.00NaN0.97NaNNaNNaNNaN
28task_3binary1.000.941.001.001.001.001.00
29task_31binary0.940.801.00NaN1.000.940.99
30task_34539binary1.00NaN1.001.000.990.990.99
31task_3917binary0.99NaN0.98NaN1.000.980.97
32task_3945binary1.00NaN1.000.991.001.00NaN
33task_53multiclass0.970.760.961.00NaN0.930.99
34task_7592binary1.000.991.000.991.001.001.00
35task_7593multiclass1.000.680.990.82NaN0.97NaN
36task_9952binary0.960.990.980.981.000.990.99
37task_9977binary1.000.971.001.001.000.991.00
38task_9981multiclass0.980.891.000.981.000.980.97
\n", + "
" + ], + "text/plain": [ + " Dataset Type AutoGluon AutoWEKA Auto-Sklearn H2O TPOT \\\n", + "0 task_10101 binary 0.97 0.97 0.97 0.97 0.97 \n", + "1 task_12 multiclass 0.99 1.00 0.99 0.99 NaN \n", + "2 task_146195 multiclass 1.00 0.81 0.98 1.00 0.97 \n", + "3 task_146212 multiclass 1.00 1.00 1.00 1.00 1.00 \n", + "4 task_146606 binary 1.00 0.82 1.00 0.98 NaN \n", + "5 task_146818 binary 1.00 0.94 0.92 0.98 0.95 \n", + "6 task_146821 multiclass 0.99 1.00 1.00 1.00 1.00 \n", + "7 task_146822 multiclass 1.00 0.99 1.00 1.00 1.00 \n", + "8 task_146825 multiclass 1.00 NaN 0.99 0.99 NaN \n", + "9 task_14965 binary 1.00 0.96 1.00 1.00 1.00 \n", + "10 task_167119 multiclass 0.96 0.83 0.98 1.00 0.94 \n", + "11 task_167120 binary 1.00 1.00 1.00 0.99 NaN \n", + "12 task_168329 multiclass 1.00 0.69 0.96 0.88 0.89 \n", + "13 task_168330 multiclass 1.00 0.89 1.00 1.00 0.97 \n", + "14 task_168331 multiclass 1.00 0.84 1.00 0.95 0.90 \n", + "15 task_168332 multiclass 1.00 NaN 0.98 0.93 0.80 \n", + "16 task_168335 binary 1.00 NaN 1.00 NaN 0.99 \n", + "17 task_168337 binary 0.98 NaN 1.00 0.96 0.89 \n", + "18 task_168338 binary 1.00 NaN 1.00 1.00 0.99 \n", + "19 task_168868 binary 1.00 0.99 1.00 1.00 1.00 \n", + "20 task_168908 binary 0.97 0.96 0.99 0.94 NaN \n", + "21 task_168909 multiclass 1.00 0.97 1.00 0.99 NaN \n", + "22 task_168910 multiclass 0.99 0.82 0.99 0.99 0.97 \n", + "23 task_168911 binary 0.99 1.00 1.00 1.00 0.99 \n", + "24 task_168912 binary 0.98 0.97 0.99 1.00 1.00 \n", + "25 task_189354 binary 1.00 NaN 1.00 0.91 1.00 \n", + "26 task_189355 multiclass 1.00 NaN 0.00 NaN NaN \n", + "27 task_189356 binary 1.00 NaN 0.97 NaN NaN \n", + "28 task_3 binary 1.00 0.94 1.00 1.00 1.00 \n", + "29 task_31 binary 0.94 0.80 1.00 NaN 1.00 \n", + "30 task_34539 binary 1.00 NaN 1.00 1.00 0.99 \n", + "31 task_3917 binary 0.99 NaN 0.98 NaN 1.00 \n", + "32 task_3945 binary 1.00 NaN 1.00 0.99 1.00 \n", + "33 task_53 multiclass 0.97 0.76 0.96 1.00 NaN \n", + "34 task_7592 binary 1.00 0.99 1.00 0.99 1.00 \n", + "35 task_7593 multiclass 1.00 0.68 0.99 0.82 NaN \n", + "36 task_9952 binary 0.96 0.99 0.98 0.98 1.00 \n", + "37 task_9977 binary 1.00 0.97 1.00 1.00 1.00 \n", + "38 task_9981 multiclass 0.98 0.89 1.00 0.98 1.00 \n", + "\n", + " AlphaD3M Alpha-AutoML \n", + "0 1.00 0.92 \n", + "1 0.98 0.99 \n", + "2 0.92 0.95 \n", + "3 1.00 1.00 \n", + "4 0.99 0.99 \n", + "5 0.95 0.94 \n", + "6 0.97 1.00 \n", + "7 1.00 1.00 \n", + "8 0.94 NaN \n", + "9 1.00 1.00 \n", + "10 0.86 0.90 \n", + "11 0.99 1.00 \n", + "12 0.94 NaN \n", + "13 0.98 0.98 \n", + "14 0.91 0.95 \n", + "15 0.75 NaN \n", + "16 0.99 1.00 \n", + "17 0.71 1.00 \n", + "18 0.97 1.00 \n", + "19 1.00 1.00 \n", + "20 1.00 0.99 \n", + "21 1.00 0.99 \n", + "22 0.89 1.00 \n", + "23 0.98 0.99 \n", + "24 0.98 0.99 \n", + "25 0.96 0.94 \n", + "26 0.94 NaN \n", + "27 NaN NaN \n", + "28 1.00 1.00 \n", + "29 0.94 0.99 \n", + "30 0.99 0.99 \n", + "31 0.98 0.97 \n", + "32 1.00 NaN \n", + "33 0.93 0.99 \n", + "34 1.00 1.00 \n", + "35 0.97 NaN \n", + "36 0.99 0.99 \n", + "37 0.99 1.00 \n", + "38 0.98 0.97 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "systems = ['AutoGluon', 'AutoWEKA', 'H2O', 'TPOT', 'AlphaD3M', 'Auto-Sklearn', 'Alpha-AutoML']\n", + "performances[systems] = performances[systems].apply(lambda x: x/x.max(), axis=1)\n", + "performances = performances.round(2)\n", + " \n", + "performances#.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latex generated at normalized_performances.tex file.\n" + ] + } + ], + "source": [ + "generate_latex(performances, 'normalized_performances')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DatasetTypeAutoMLPerformance
0task_10101binaryAutoGluon0.97
1task_12multiclassAutoGluon0.99
2task_146195multiclassAutoGluon1.00
3task_146212multiclassAutoGluon1.00
4task_146606binaryAutoGluon1.00
\n", + "
" + ], + "text/plain": [ + " Dataset Type AutoML Performance\n", + "0 task_10101 binary AutoGluon 0.97\n", + "1 task_12 multiclass AutoGluon 0.99\n", + "2 task_146195 multiclass AutoGluon 1.00\n", + "3 task_146212 multiclass AutoGluon 1.00\n", + "4 task_146606 binary AutoGluon 1.00" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Change the dataframe to the format of altair lib\n", + "performances = pd.melt(performances, id_vars=['Dataset', 'Type'], var_name='AutoML', value_name='Performance')\n", + "performances.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_performances(source):\n", + " domain = ['AutoGluon', 'AutoWEKA', 'Auto-Sklearn', 'H2O', 'TPOT', 'AlphaD3M', 'Alpha-AutoML']\n", + " color_range = ['#f7b97c', '#f58517', '#e7ba52', '#e45857', '#d67196', '#ccf77c', '#396cb0']\n", + " \n", + " return alt.Chart(source, title=\"\").mark_point(filled=True, size=32).encode(\n", + " alt.X(\n", + " 'Performance:Q',\n", + " title=\"Accuracy\",\n", + " scale=alt.Scale(zero=False),\n", + " axis=alt.Axis(grid=False)\n", + " ),\n", + " alt.Y(\n", + " 'Dataset:N',\n", + " title=\"\",\n", + " sort='-x',\n", + " axis=alt.Axis(grid=True)\n", + " ),\n", + " #color=alt.Color('AutoML:N', legend=alt.Legend(title=\"AutoML\")),\n", + " color=alt.Color('AutoML:N', scale=alt.Scale(domain=domain, range=color_range), legend=alt.Legend(title=\"AutoML\")),\n", + " row=alt.Row(\n", + " 'Type:N',\n", + " title=\"\",\n", + " sort=alt.EncodingSortField(field='yield', op='sum', order='descending'),\n", + " )\n", + " ).properties(\n", + " height=alt.Step(12),\n", + " width=250\n", + " )\n", + "# .configure_view(stroke=\"transparent\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "df_binary = performances[(performances['Type']=='binary')]\n", + "chart1 = plot_performances(df_binary)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df_multiclass = performances[(performances['Type']=='multiclass')]\n", + "chart2 = plot_performances(df_multiclass)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.HConcatChart(...)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "alt.hconcat(chart1, chart2).configure_view(stroke='transparent')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}