From 7942cd09f66762d2c40c7a7fbf6db3e7ea091e2a Mon Sep 17 00:00:00 2001
From: Eden Wu <yfw215@cl001.hpc.nyu.edu>
Date: Sat, 31 Aug 2024 20:28:15 -0400
Subject: [PATCH] update to regressor

---
 alpha_automl/hyperparameter_tuning/smac.py    |   11 +-
 .../smac_parameters.json                      |  229 ++-
 examples/s4e9.ipynb                           | 1297 +++++++----------
 3 files changed, 786 insertions(+), 751 deletions(-)

diff --git a/alpha_automl/hyperparameter_tuning/smac.py b/alpha_automl/hyperparameter_tuning/smac.py
index 715781f..b9f667a 100644
--- a/alpha_automl/hyperparameter_tuning/smac.py
+++ b/alpha_automl/hyperparameter_tuning/smac.py
@@ -41,11 +41,14 @@ def gen_pipeline(config, pipeline):
 
         if step_type == 'COLUMN_TRANSFORMER':
             transformers = []
-            for trans_name, _, trans_index in step_obj.__dict__['transformers']:
+            for trans_name, trans_obj_ori, trans_index in step_obj.__dict__['transformers']:
                 trans_prim_name = trans_name.split('-')[0]
-                trans_obj = create_object(trans_prim_name, get_primitive_params(config, trans_prim_name))
-                transformers.append((trans_name, trans_obj, trans_index))
-                step_obj.__dict__['transformers'] = transformers
+                if "alpha_automl.builtin_primitives.math_features" in trans_prim_name:
+                    transformers.append((trans_name, trans_obj_ori, trans_index))
+                else:
+                    trans_obj = create_object(trans_prim_name, get_primitive_params(config, trans_prim_name))
+                    transformers.append((trans_name, trans_obj, trans_index))
+            step_obj.__dict__['transformers'] = transformers
             new_pipeline.steps.append([step_name, create_object(step_name, step_obj.__dict__)])
         else:
             new_pipeline.steps.append([step_name, create_object(step_name, get_primitive_params(config, step_name))])
diff --git a/alpha_automl/hyperparameter_tuning/smac_parameters.json b/alpha_automl/hyperparameter_tuning/smac_parameters.json
index 82da4b3..d994052 100644
--- a/alpha_automl/hyperparameter_tuning/smac_parameters.json
+++ b/alpha_automl/hyperparameter_tuning/smac_parameters.json
@@ -274,8 +274,229 @@
     },
     "sklearn.preprocessing.PolynomialFeatures": {},
     "alpha_automl.wrapper_primitives.llm_feature_engine.LLMFeatureGenerator": {},
-    "feature_engine.creation.math_features.MathFeatures-sum": {},
-    "feature_engine.creation.math_features.MathFeatures-mean": {},
-    "feature_engine.creation.math_features.MathFeatures-prod": {},
-    "feature_engine.creation.math_features.MathFeatures-std": {}
+    "alpha_automl.builtin_primitives.math_features.MathFeaturesProd": {},
+    "alpha_automl.builtin_primitives.math_features.MathFeaturesMean": {},
+    "alpha_automl.builtin_primitives.math_features.MathFeaturesSum": {},
+    "alpha_automl.builtin_primitives.math_features.MathFeaturesStd": {},
+    "sklearn.ensemble.ExtraTreesRegressor": {
+        "n_estimators": {
+            "type": "Integer",
+            "value": [
+                200,
+                2000
+            ],
+            "default": 400
+        },
+        "max_features": {
+            "type": "Float",
+            "value": [
+                0.1,
+                1.0
+            ],
+            "default": 1.0
+        },
+        "max_leaf_nodes": {
+            "type": "Integer",
+            "value": [
+                80,
+                10000
+            ],
+            "default": 80
+        },
+        "criterion": {
+            "type": "Categorical",
+            "value": [
+                "entropy",
+                "gini"
+            ],
+            "default": "entropy"
+        }
+    },
+    "sklearn.ensemble.RandomForestRegressor": {
+        "n_estimators": {
+            "type": "Integer",
+            "value": [
+                200,
+                2000
+            ],
+            "default": 400
+        },
+        "max_features": {
+            "type": "Float",
+            "value": [
+                0.1,
+                1.0
+            ],
+            "default": 1.0
+        },
+        "criterion": {
+            "type": "Categorical",
+            "value": [
+                "entropy",
+                "gini"
+            ],
+            "default": "entropy"
+        }
+    },
+    "xgboost.XGBRegressor": {
+        "n_estimators": {
+            "type": "Integer",
+            "value": [
+                200,
+                2000
+            ],
+            "default": 400
+        },
+        "max_leaves": {
+            "type": "Integer",
+            "value": [
+                80,
+                10000
+            ],
+            "default": 80
+        },
+        "min_child_weight": {
+            "type": "Float",
+            "value": [
+                0.001,
+                0.1
+            ],
+            "default": 0.1
+        },
+        "learning_rate": {
+            "type": "Float",
+            "value": [
+                0.01,
+                0.3
+            ],
+            "default": 0.1
+        },
+        "subsample": {
+            "type": "Float",
+            "value": [
+                0.5,
+                1.0
+            ],
+            "default": 1.0
+        },
+        "colsample_bylevel": {
+            "type": "Float",
+            "value": [
+                0.5,
+                1.0
+            ],
+            "default": 1.0
+        },
+        "colsample_bytree": {
+            "type": "Float",
+            "value": [
+                0.5,
+                1.0
+            ],
+            "default": 1.0
+        }
+    },
+    "lightgbm.LGBMRegressor": {
+        "n_estimators": {
+            "type": "Integer",
+            "value": [
+                200,
+                2000
+            ],
+            "default": 400
+        },
+        "num_leaves": {
+            "type": "Integer",
+            "value": [
+                80,
+                10000
+            ],
+            "default": 80
+        },
+        "min_child_samples": {
+            "type": "Integer",
+            "value": [
+                20,
+                100
+            ],
+            "default": 20
+        },
+        "learning_rate": {
+            "type": "Float",
+            "value": [
+                0.001,
+                0.3
+            ],
+            "default": 0.1
+        },
+        "log_max_bin": {
+            "type": "Integer",
+            "value": [
+                6,
+                10
+            ],
+            "default": 8
+        },
+        "colsample_bytree": {
+            "type": "Float",
+            "value": [
+                0.3,
+                1.0
+            ],
+            "default": 1.0
+        },
+        "verbose": {
+            "type": "Constant",
+            "value": -1,
+            "default": -1
+        }
+    },
+    "catboost.CatBoostRegressor": {
+        "logging_level": {
+            "type": "Constant",
+            "value": "Silent",
+            "default": "Silent"
+        },
+        "learning_rate": {
+            "type": "Float",
+            "value": [
+                0.01,
+                0.3
+            ],
+            "default": 0.1
+        },
+        "depth": {
+            "type": "Integer",
+            "value": [
+                1,
+                6
+            ],
+            "default": 6
+        },
+        "l2_leaf_reg": {
+            "type": "Float",
+            "value": [
+                1,
+                10
+            ],
+            "default": 3
+        },
+        "border_count": {
+            "type": "Integer",
+            "value": [
+                32,
+                255
+            ],
+            "default": 254
+        },
+        "auto_class_weights": {
+            "type": "Categorical",
+            "value": [
+                "None",
+                "Balanced",
+                "SqrtBalanced"
+            ],
+            "default": "None"
+        }
+    }
 }
diff --git a/examples/s4e9.ipynb b/examples/s4e9.ipynb
index 94f70b4..529615a 100644
--- a/examples/s4e9.ipynb
+++ b/examples/s4e9.ipynb
@@ -12,8 +12,8 @@
      "text": [
       "/ext3/miniconda3/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n",
-      "2024-08-31 18:44:55,024\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n",
-      "2024-08-31 18:44:55,574\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
+      "2024-08-31 20:27:25,109\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n",
+      "2024-08-31 20:27:25,695\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
      ]
     },
     {
@@ -25,14 +25,14 @@
     }
    ],
    "source": [
-    "from alpha_automl import AutoMLClassifier\n",
+    "from alpha_automl import AutoMLRegressor\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "\n",
     "%env OPENAI_API_KEY=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\n",
     "\n",
-    "train_dataset = pd.read_csv('datasets/s4e8/train.csv').sample(200000)\n",
-    "test_dataset = pd.read_csv('datasets/s4e8/test.csv')"
+    "train_dataset = pd.read_csv('datasets/s4e9/train.csv').sample(20000)\n",
+    "test_dataset = pd.read_csv('datasets/s4e9/test.csv')"
    ]
   },
   {
@@ -62,143 +62,89 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>cap-diameter</th>\n",
-       "      <th>cap-shape</th>\n",
-       "      <th>cap-surface</th>\n",
-       "      <th>cap-color</th>\n",
-       "      <th>does-bruise-or-bleed</th>\n",
-       "      <th>gill-attachment</th>\n",
-       "      <th>gill-spacing</th>\n",
-       "      <th>gill-color</th>\n",
-       "      <th>stem-height</th>\n",
-       "      <th>stem-width</th>\n",
-       "      <th>stem-root</th>\n",
-       "      <th>stem-surface</th>\n",
-       "      <th>stem-color</th>\n",
-       "      <th>veil-type</th>\n",
-       "      <th>veil-color</th>\n",
-       "      <th>has-ring</th>\n",
-       "      <th>ring-type</th>\n",
-       "      <th>spore-print-color</th>\n",
-       "      <th>habitat</th>\n",
-       "      <th>season</th>\n",
+       "      <th>brand</th>\n",
+       "      <th>model</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>milage</th>\n",
+       "      <th>fuel_type</th>\n",
+       "      <th>engine</th>\n",
+       "      <th>transmission</th>\n",
+       "      <th>ext_col</th>\n",
+       "      <th>int_col</th>\n",
+       "      <th>accident</th>\n",
+       "      <th>clean_title</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2006170</th>\n",
-       "      <td>0.76</td>\n",
-       "      <td>x</td>\n",
-       "      <td>g</td>\n",
-       "      <td>o</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>y</td>\n",
-       "      <td>3.59</td>\n",
-       "      <td>0.82</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>w</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>d</td>\n",
-       "      <td>a</td>\n",
+       "      <th>71793</th>\n",
+       "      <td>Toyota</td>\n",
+       "      <td>Tacoma TRD Sport</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>135606</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>159.0HP 2.7L 4 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>6-Speed M/T</td>\n",
+       "      <td>Red</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>None reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>207690</th>\n",
-       "      <td>5.72</td>\n",
-       "      <td>p</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>g</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>5.37</td>\n",
-       "      <td>18.09</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>l</td>\n",
-       "      <td>s</td>\n",
+       "      <th>33214</th>\n",
+       "      <td>Ford</td>\n",
+       "      <td>Mustang GT Premium</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>90000</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>412.0HP 5.0L 8 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>6-Speed M/T</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>At least 1 accident or damage reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2396937</th>\n",
-       "      <td>2.24</td>\n",
-       "      <td>x</td>\n",
-       "      <td>i</td>\n",
-       "      <td>l</td>\n",
-       "      <td>f</td>\n",
-       "      <td>a</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>p</td>\n",
-       "      <td>4.11</td>\n",
-       "      <td>2.57</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>w</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>p</td>\n",
-       "      <td>g</td>\n",
-       "      <td>u</td>\n",
+       "      <th>4936</th>\n",
+       "      <td>INFINITI</td>\n",
+       "      <td>G35 Base</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>96000</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>298.0HP 3.5L V6 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>6-Speed M/T</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>None reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>890751</th>\n",
-       "      <td>1.52</td>\n",
-       "      <td>b</td>\n",
-       "      <td>h</td>\n",
-       "      <td>n</td>\n",
-       "      <td>f</td>\n",
-       "      <td>a</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>5.01</td>\n",
-       "      <td>1.66</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>t</td>\n",
-       "      <td>n</td>\n",
-       "      <td>NaN</td>\n",
+       "      <th>121116</th>\n",
+       "      <td>Acura</td>\n",
+       "      <td>TLX A-Spec</td>\n",
+       "      <td>2023</td>\n",
+       "      <td>14381</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>2.0L I4 16V GDI DOHC Turbo</td>\n",
+       "      <td>9-Speed Automatic</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>Black/Gun Metal</td>\n",
+       "      <td>None reported</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>g</td>\n",
-       "      <td>a</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>530679</th>\n",
-       "      <td>7.67</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>y</td>\n",
-       "      <td>f</td>\n",
-       "      <td>a</td>\n",
-       "      <td>c</td>\n",
-       "      <td>w</td>\n",
-       "      <td>6.54</td>\n",
-       "      <td>17.52</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>w</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>d</td>\n",
-       "      <td>a</td>\n",
+       "      <th>36972</th>\n",
+       "      <td>Chrysler</td>\n",
+       "      <td>300 Touring</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>185000</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>250.0HP 3.5L V6 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>A/T</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>At least 1 accident or damage reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -213,190 +159,136 @@
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2537501</th>\n",
-       "      <td>5.67</td>\n",
-       "      <td>x</td>\n",
-       "      <td>h</td>\n",
-       "      <td>y</td>\n",
-       "      <td>t</td>\n",
-       "      <td>x</td>\n",
-       "      <td>c</td>\n",
-       "      <td>y</td>\n",
-       "      <td>6.71</td>\n",
-       "      <td>14.65</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>w</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>d</td>\n",
-       "      <td>u</td>\n",
+       "      <th>3948</th>\n",
+       "      <td>RAM</td>\n",
+       "      <td>1500 SLT</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>97700</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>395.0HP 5.7L 8 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>A/T</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>At least 1 accident or damage reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1498074</th>\n",
-       "      <td>5.85</td>\n",
-       "      <td>x</td>\n",
-       "      <td>s</td>\n",
-       "      <td>b</td>\n",
-       "      <td>f</td>\n",
-       "      <td>x</td>\n",
-       "      <td>c</td>\n",
-       "      <td>w</td>\n",
-       "      <td>6.47</td>\n",
-       "      <td>9.63</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>s</td>\n",
-       "      <td>n</td>\n",
+       "      <th>84168</th>\n",
+       "      <td>Tesla</td>\n",
+       "      <td>Model X Long Range Plus</td>\n",
+       "      <td>2020</td>\n",
+       "      <td>46000</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>d</td>\n",
-       "      <td>w</td>\n",
+       "      <td>557.0HP Electric Motor Electric Fuel System</td>\n",
+       "      <td>A/T</td>\n",
+       "      <td>Gray</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>None reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1399239</th>\n",
-       "      <td>3.59</td>\n",
-       "      <td>f</td>\n",
-       "      <td>l</td>\n",
-       "      <td>y</td>\n",
-       "      <td>f</td>\n",
-       "      <td>x</td>\n",
-       "      <td>d</td>\n",
-       "      <td>y</td>\n",
-       "      <td>5.16</td>\n",
-       "      <td>4.47</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>l</td>\n",
-       "      <td>u</td>\n",
+       "      <th>186205</th>\n",
+       "      <td>Mercedes-Benz</td>\n",
+       "      <td>GLS 450 Base 4MATIC</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>7100</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>362.0HP 3.0L V6 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>7-Speed A/T</td>\n",
+       "      <td>White</td>\n",
+       "      <td>Beige</td>\n",
+       "      <td>None reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>275415</th>\n",
-       "      <td>9.80</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>f</td>\n",
-       "      <td>s</td>\n",
-       "      <td>c</td>\n",
-       "      <td>g</td>\n",
-       "      <td>7.09</td>\n",
-       "      <td>21.54</td>\n",
-       "      <td>b</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>u</td>\n",
-       "      <td>NaN</td>\n",
+       "      <th>38335</th>\n",
+       "      <td>Mercedes-Benz</td>\n",
+       "      <td>GLC 300 GLC 300</td>\n",
+       "      <td>2022</td>\n",
+       "      <td>2900</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>2.0 Liter Turbo</td>\n",
+       "      <td>Automatic</td>\n",
+       "      <td>Graphite Grey Metallic</td>\n",
+       "      <td>–</td>\n",
+       "      <td>None reported</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>g</td>\n",
-       "      <td>a</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>252426</th>\n",
-       "      <td>4.61</td>\n",
-       "      <td>o</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>5.27</td>\n",
-       "      <td>19.09</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>n</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>d</td>\n",
-       "      <td>s</td>\n",
+       "      <th>93996</th>\n",
+       "      <td>Porsche</td>\n",
+       "      <td>911 Carrera</td>\n",
+       "      <td>2020</td>\n",
+       "      <td>8755</td>\n",
+       "      <td>Gasoline</td>\n",
+       "      <td>379.0HP 3.0L Flat 6 Cylinder Engine Gasoline Fuel</td>\n",
+       "      <td>8-Speed A/T</td>\n",
+       "      <td>Silver</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>None reported</td>\n",
+       "      <td>Yes</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>100000 rows × 20 columns</p>\n",
+       "<p>20000 rows × 11 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "         cap-diameter cap-shape cap-surface cap-color does-bruise-or-bleed  \\\n",
-       "2006170          0.76         x           g         o                    f   \n",
-       "207690           5.72         p         NaN         g                    f   \n",
-       "2396937          2.24         x           i         l                    f   \n",
-       "890751           1.52         b           h         n                    f   \n",
-       "530679           7.67         f         NaN         y                    f   \n",
-       "...               ...       ...         ...       ...                  ...   \n",
-       "2537501          5.67         x           h         y                    t   \n",
-       "1498074          5.85         x           s         b                    f   \n",
-       "1399239          3.59         f           l         y                    f   \n",
-       "275415           9.80         f         NaN         n                    f   \n",
-       "252426           4.61         o         NaN         n                    f   \n",
+       "                brand                    model  model_year  milage fuel_type  \\\n",
+       "71793          Toyota         Tacoma TRD Sport        2013  135606  Gasoline   \n",
+       "33214            Ford       Mustang GT Premium        2012   90000  Gasoline   \n",
+       "4936         INFINITI                 G35 Base        2000   96000  Gasoline   \n",
+       "121116          Acura               TLX A-Spec        2023   14381  Gasoline   \n",
+       "36972        Chrysler              300 Touring        2009  185000  Gasoline   \n",
+       "...               ...                      ...         ...     ...       ...   \n",
+       "3948              RAM                 1500 SLT        2017   97700  Gasoline   \n",
+       "84168           Tesla  Model X Long Range Plus        2020   46000       NaN   \n",
+       "186205  Mercedes-Benz      GLS 450 Base 4MATIC        2018    7100  Gasoline   \n",
+       "38335   Mercedes-Benz          GLC 300 GLC 300        2022    2900  Gasoline   \n",
+       "93996         Porsche              911 Carrera        2020    8755  Gasoline   \n",
        "\n",
-       "        gill-attachment gill-spacing gill-color  stem-height  stem-width  \\\n",
-       "2006170             NaN          NaN          y         3.59        0.82   \n",
-       "207690                f            f          f         5.37       18.09   \n",
-       "2396937               a          NaN          p         4.11        2.57   \n",
-       "890751                a          NaN          n         5.01        1.66   \n",
-       "530679                a            c          w         6.54       17.52   \n",
-       "...                 ...          ...        ...          ...         ...   \n",
-       "2537501               x            c          y         6.71       14.65   \n",
-       "1498074               x            c          w         6.47        9.63   \n",
-       "1399239               x            d          y         5.16        4.47   \n",
-       "275415                s            c          g         7.09       21.54   \n",
-       "252426                f            f          f         5.27       19.09   \n",
+       "                                                   engine       transmission  \\\n",
+       "71793        159.0HP 2.7L 4 Cylinder Engine Gasoline Fuel        6-Speed M/T   \n",
+       "33214        412.0HP 5.0L 8 Cylinder Engine Gasoline Fuel        6-Speed M/T   \n",
+       "4936        298.0HP 3.5L V6 Cylinder Engine Gasoline Fuel        6-Speed M/T   \n",
+       "121116                         2.0L I4 16V GDI DOHC Turbo  9-Speed Automatic   \n",
+       "36972       250.0HP 3.5L V6 Cylinder Engine Gasoline Fuel                A/T   \n",
+       "...                                                   ...                ...   \n",
+       "3948         395.0HP 5.7L 8 Cylinder Engine Gasoline Fuel                A/T   \n",
+       "84168         557.0HP Electric Motor Electric Fuel System                A/T   \n",
+       "186205      362.0HP 3.0L V6 Cylinder Engine Gasoline Fuel        7-Speed A/T   \n",
+       "38335                                     2.0 Liter Turbo          Automatic   \n",
+       "93996   379.0HP 3.0L Flat 6 Cylinder Engine Gasoline Fuel        8-Speed A/T   \n",
        "\n",
-       "        stem-root stem-surface stem-color veil-type veil-color has-ring  \\\n",
-       "2006170       NaN          NaN          w       NaN        NaN        f   \n",
-       "207690        NaN          NaN          n       NaN        NaN        f   \n",
-       "2396937       NaN          NaN          w       NaN        NaN        f   \n",
-       "890751        NaN            t          n       NaN        NaN        f   \n",
-       "530679        NaN          NaN          w       NaN        NaN        f   \n",
-       "...           ...          ...        ...       ...        ...      ...   \n",
-       "2537501       NaN          NaN          w       NaN        NaN        f   \n",
-       "1498074       NaN            s          n       NaN        NaN        f   \n",
-       "1399239       NaN          NaN          n       NaN        NaN        f   \n",
-       "275415          b          NaN          u       NaN        NaN        f   \n",
-       "252426        NaN          NaN          n       NaN        NaN        f   \n",
+       "                       ext_col          int_col  \\\n",
+       "71793                      Red            Black   \n",
+       "33214                    Black            Black   \n",
+       "4936                     Black            Black   \n",
+       "121116                    Gray  Black/Gun Metal   \n",
+       "36972                     Gray             Gray   \n",
+       "...                        ...              ...   \n",
+       "3948                      Gray             Gray   \n",
+       "84168                     Gray            Black   \n",
+       "186205                   White            Beige   \n",
+       "38335   Graphite Grey Metallic                –   \n",
+       "93996                   Silver            Black   \n",
        "\n",
-       "        ring-type spore-print-color habitat season  \n",
-       "2006170         f               NaN       d      a  \n",
-       "207690          f               NaN       l      s  \n",
-       "2396937         f                 p       g      u  \n",
-       "890751          f               NaN       g      a  \n",
-       "530679          f               NaN       d      a  \n",
-       "...           ...               ...     ...    ...  \n",
-       "2537501         f               NaN       d      u  \n",
-       "1498074         f               NaN       d      w  \n",
-       "1399239         f               NaN       l      u  \n",
-       "275415          f               NaN       g      a  \n",
-       "252426          f               NaN       d      s  \n",
+       "                                      accident clean_title  \n",
+       "71793                            None reported         Yes  \n",
+       "33214   At least 1 accident or damage reported         Yes  \n",
+       "4936                             None reported         Yes  \n",
+       "121116                           None reported         NaN  \n",
+       "36972   At least 1 accident or damage reported         Yes  \n",
+       "...                                        ...         ...  \n",
+       "3948    At least 1 accident or damage reported         Yes  \n",
+       "84168                            None reported         Yes  \n",
+       "186205                           None reported         Yes  \n",
+       "38335                            None reported         NaN  \n",
+       "93996                            None reported         Yes  \n",
        "\n",
-       "[100000 rows x 20 columns]"
+       "[20000 rows x 11 columns]"
       ]
      },
      "execution_count": 2,
@@ -405,7 +297,7 @@
     }
    ],
    "source": [
-    "target_column = 'class'\n",
+    "target_column = 'price'\n",
     "X_train = train_dataset.drop(columns=[target_column, 'id'])\n",
     "y_train = train_dataset[[target_column]]\n",
     "X_train"
@@ -413,7 +305,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "2e13738e-0446-4557-9a94-f26ff18c3d10",
    "metadata": {},
    "outputs": [],
@@ -424,141 +316,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "036e9c02-0cd8-4ab3-884f-93a2782e0c42",
    "metadata": {
     "scrolled": true
    },
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 0 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 2024 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 37581 / 100000\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
       "  y = column_or_1d(y, warn=True)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/datamart_profiler/core.py:199: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
-      "  data = data.astype(object).fillna('').astype(str)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "training, score: 0.9919818399939008...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 1490 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 5572 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n",
-      "<ast>:6: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "<ast>:5: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
       "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
       "\n",
       "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
       "\n",
       "\n",
-      "<ast>:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
-      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
-      "\n",
-      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
-      "\n",
-      "\n",
-      "<ast>:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "<ast>:6: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
       "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
       "\n",
       "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
       "\n",
       "\n",
-      "<ast>:14: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "<ast>:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
       "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
       "\n",
       "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
@@ -567,78 +349,16 @@
       "/ext3/miniconda3/lib/python3.9/site-packages/datamart_profiler/core.py:199: FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n",
       "  data = data.astype(object).fillna('').astype(str)\n",
       "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
-      "  return fit_method(estimator, *args, **kwargs)\n",
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/base.py:1474: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
       "  return fit_method(estimator, *args, **kwargs)\n"
      ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "training, score: 0.9903666194552061...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 3513 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 2263 / 100000\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/ext3/miniconda3/lib/python3.9/site-packages/sklearn/preprocessing/_label.py:114: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
-      "  y = column_or_1d(y, warn=True)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLMFeatureGenerator generate data with bad quality: 2024 / 100000\n"
-     ]
     }
    ],
    "source": [
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.impute import SimpleImputer\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.ensemble import RandomForestRegressor\n",
     "from sklearn.preprocessing import LabelEncoder\n",
-    "from sklearn.metrics import f1_score, roc_auc_score\n",
+    "from sklearn.metrics import f1_score, roc_auc_score, mean_squared_error\n",
     "from sklearn.metrics import make_scorer\n",
     "from sklearn.model_selection import cross_val_score\n",
     "\n",
@@ -688,7 +408,19 @@
     "            y_gen = label_encoder.fit_transform(y_train)\n",
     "            \n",
     "            generator = LLMFeatureGenerator(\n",
-    "                description=\"Here's the information of the target dataset: This data set includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms in the Agaricus and Lepiota Family (pp. 500-525).  Each species is identified as definitely edible, definitely poisonous, or of unknown edibility and not recommended.  This latter class was combined with the poisonous one.  The Guide clearly states that there is no simple rule for determining the edibility of a mushroom; no rule like ``leaflets three, let it be'' for Poisonous Oak and Ivy.\"\n",
+    "                description=\"\"\"\n",
+    "Used Car Price Prediction Dataset is a comprehensive collection of automotive information extracted from the popular automotive marketplace website, https://www.cars.com. This dataset comprises 4,009 data points, each representing a unique vehicle listing, and includes nine distinct features providing valuable insights into the world of automobiles.\n",
+    "    Brand & Model: Identify the brand or company name along with the specific model of each vehicle.\n",
+    "    Model Year: Discover the manufacturing year of the vehicles, crucial for assessing depreciation and technology advancements.\n",
+    "    Mileage: Obtain the mileage of each vehicle, a key indicator of wear and tear and potential maintenance requirements.\n",
+    "    Fuel Type: Learn about the type of fuel the vehicles run on, whether it's gasoline, diesel, electric, or hybrid.\n",
+    "    Engine Type: Understand the engine specifications, shedding light on performance and efficiency.\n",
+    "    Transmission: Determine the transmission type, whether automatic, manual, or another variant.\n",
+    "    Exterior & Interior Colors: Explore the aesthetic aspects of the vehicles, including exterior and interior color options.\n",
+    "    Accident History: Discover whether a vehicle has a prior history of accidents or damage, crucial for informed decision-making.\n",
+    "    Clean Title: Evaluate the availability of a clean title, which can impact the vehicle's resale value and legal status.\n",
+    "    Price: Access the listed prices for each vehicle, aiding in price comparison and budgeting.\n",
+    "This dataset is a valuable resource for automotive enthusiasts, buyers, and researchers interested in analyzing trends, making informed purchasing decisions or conducting studies related to the automotive industry and consumer preferences. Whether you are a data analyst, car buyer, or researcher, this dataset offers a wealth of information to explore and analyze.\"\"\"\n",
     "            )\n",
     "            X_gen = generator.fit_transform(X_train, y_train)\n",
     "            X_gen[target_column] = y_gen\n",
@@ -705,13 +437,13 @@
     "            pipeline = Pipeline(steps=[\n",
     "                ('sklearn.impute.SimpleImputer', SimpleImputer(strategy='most_frequent', keep_empty_features=True)),\n",
     "                ('sklearn.compose.ColumnTransformer', generate_column_transformer(metadata)),\n",
-    "                ('sklearn.ensemble.RandomForestClassifier', RandomForestClassifier())\n",
+    "                ('sklearn.ensemble.RandomForestRegressor', RandomForestRegressor())\n",
     "            ])\n",
     "            \n",
     "            \n",
     "            pipeline.fit(X_gen, y_gen)\n",
     "    \n",
-    "            scores = cross_val_score(pipeline, X_gen, y_gen, scoring=make_scorer(f1_score), cv=5)\n",
+    "            scores = cross_val_score(pipeline, X_gen, y_gen, scoring=make_scorer(mean_squared_error), cv=5)\n",
     "            score = sum(scores) / len(scores)\n",
     "            if score > best_score:\n",
     "                best_score = score\n",
@@ -729,10 +461,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
+   "id": "cfde5200-86d2-44e9-9b0e-6cc8a302ebfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "code = llm_featgen.code\n",
+    "code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "id": "a294fef2-465b-4610-b099-5edf96a1fa4c",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<ast>:6: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "<ast>:7: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "<ast>:8: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -773,130 +540,130 @@
        "      <th>veil_type_missing</th>\n",
        "      <th>veil_color_missing</th>\n",
        "      <th>spore_print_color_missing</th>\n",
-       "      <th>cap_diameter_to_stem_height_ratio</th>\n",
-       "      <th>stem_width_to_stem_height_ratio</th>\n",
+       "      <th>cap_shape_surface_interaction</th>\n",
+       "      <th>stem_dimensions_ratio</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2006170</th>\n",
-       "      <td>0.76</td>\n",
-       "      <td>x</td>\n",
-       "      <td>g</td>\n",
-       "      <td>o</td>\n",
+       "      <th>857763</th>\n",
+       "      <td>5.99</td>\n",
+       "      <td>p</td>\n",
+       "      <td>s</td>\n",
+       "      <td>w</td>\n",
        "      <td>f</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>y</td>\n",
-       "      <td>3.59</td>\n",
-       "      <td>0.82</td>\n",
+       "      <td>e</td>\n",
+       "      <td>c</td>\n",
+       "      <td>p</td>\n",
+       "      <td>18.48</td>\n",
+       "      <td>12.66</td>\n",
        "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.211699</td>\n",
-       "      <td>0.228412</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>p_s</td>\n",
+       "      <td>1.459716</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>207690</th>\n",
-       "      <td>5.72</td>\n",
-       "      <td>p</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>g</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
+       "      <th>360364</th>\n",
+       "      <td>8.58</td>\n",
+       "      <td>x</td>\n",
+       "      <td>s</td>\n",
+       "      <td>w</td>\n",
        "      <td>f</td>\n",
-       "      <td>5.37</td>\n",
-       "      <td>18.09</td>\n",
+       "      <td>s</td>\n",
+       "      <td>c</td>\n",
+       "      <td>w</td>\n",
+       "      <td>7.22</td>\n",
+       "      <td>31.27</td>\n",
        "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1.065177</td>\n",
-       "      <td>3.368715</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>x_s</td>\n",
+       "      <td>0.230892</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2396937</th>\n",
-       "      <td>2.24</td>\n",
-       "      <td>x</td>\n",
-       "      <td>i</td>\n",
-       "      <td>l</td>\n",
+       "      <th>839097</th>\n",
+       "      <td>8.25</td>\n",
        "      <td>f</td>\n",
-       "      <td>a</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>p</td>\n",
-       "      <td>4.11</td>\n",
-       "      <td>2.57</td>\n",
+       "      <td>t</td>\n",
+       "      <td>e</td>\n",
+       "      <td>f</td>\n",
+       "      <td>x</td>\n",
+       "      <td>d</td>\n",
+       "      <td>w</td>\n",
+       "      <td>6.46</td>\n",
+       "      <td>17.42</td>\n",
        "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.545012</td>\n",
-       "      <td>0.625304</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>f_t</td>\n",
+       "      <td>0.370838</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>890751</th>\n",
-       "      <td>1.52</td>\n",
-       "      <td>b</td>\n",
-       "      <td>h</td>\n",
-       "      <td>n</td>\n",
-       "      <td>f</td>\n",
-       "      <td>a</td>\n",
-       "      <td>missing</td>\n",
+       "      <th>3013331</th>\n",
+       "      <td>10.58</td>\n",
+       "      <td>x</td>\n",
+       "      <td>e</td>\n",
        "      <td>n</td>\n",
-       "      <td>5.01</td>\n",
-       "      <td>1.66</td>\n",
+       "      <td>t</td>\n",
+       "      <td>p</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>o</td>\n",
+       "      <td>6.63</td>\n",
+       "      <td>23.48</td>\n",
        "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.303393</td>\n",
-       "      <td>0.331337</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>x_e</td>\n",
+       "      <td>0.282368</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>530679</th>\n",
-       "      <td>7.67</td>\n",
-       "      <td>f</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>y</td>\n",
+       "      <th>2834831</th>\n",
+       "      <td>15.45</td>\n",
+       "      <td>x</td>\n",
+       "      <td>s</td>\n",
+       "      <td>n</td>\n",
        "      <td>f</td>\n",
-       "      <td>a</td>\n",
+       "      <td>s</td>\n",
        "      <td>c</td>\n",
-       "      <td>w</td>\n",
-       "      <td>6.54</td>\n",
-       "      <td>17.52</td>\n",
+       "      <td>g</td>\n",
+       "      <td>9.50</td>\n",
+       "      <td>19.29</td>\n",
        "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1.172783</td>\n",
-       "      <td>2.678899</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>x_s</td>\n",
+       "      <td>0.492483</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -923,213 +690,213 @@
        "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2537501</th>\n",
-       "      <td>5.67</td>\n",
-       "      <td>x</td>\n",
-       "      <td>h</td>\n",
-       "      <td>y</td>\n",
-       "      <td>t</td>\n",
-       "      <td>x</td>\n",
-       "      <td>c</td>\n",
-       "      <td>y</td>\n",
-       "      <td>6.71</td>\n",
-       "      <td>14.65</td>\n",
-       "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.845007</td>\n",
-       "      <td>2.183308</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1498074</th>\n",
-       "      <td>5.85</td>\n",
+       "      <th>2247836</th>\n",
+       "      <td>4.34</td>\n",
        "      <td>x</td>\n",
-       "      <td>s</td>\n",
-       "      <td>b</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>w</td>\n",
        "      <td>f</td>\n",
        "      <td>x</td>\n",
        "      <td>c</td>\n",
        "      <td>w</td>\n",
-       "      <td>6.47</td>\n",
-       "      <td>9.63</td>\n",
+       "      <td>7.93</td>\n",
+       "      <td>8.71</td>\n",
        "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.904173</td>\n",
-       "      <td>1.488408</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>x_unknown</td>\n",
+       "      <td>0.910448</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1399239</th>\n",
-       "      <td>3.59</td>\n",
+       "      <th>703166</th>\n",
+       "      <td>3.09</td>\n",
        "      <td>f</td>\n",
-       "      <td>l</td>\n",
        "      <td>y</td>\n",
+       "      <td>n</td>\n",
        "      <td>f</td>\n",
-       "      <td>x</td>\n",
-       "      <td>d</td>\n",
-       "      <td>y</td>\n",
-       "      <td>5.16</td>\n",
-       "      <td>4.47</td>\n",
+       "      <td>e</td>\n",
+       "      <td>c</td>\n",
+       "      <td>w</td>\n",
+       "      <td>5.21</td>\n",
+       "      <td>3.50</td>\n",
        "      <td>...</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.695736</td>\n",
-       "      <td>0.866279</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>f_y</td>\n",
+       "      <td>1.488571</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>275415</th>\n",
-       "      <td>9.80</td>\n",
+       "      <th>2159902</th>\n",
+       "      <td>2.27</td>\n",
        "      <td>f</td>\n",
-       "      <td>missing</td>\n",
-       "      <td>n</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>u</td>\n",
        "      <td>f</td>\n",
-       "      <td>s</td>\n",
+       "      <td>a</td>\n",
        "      <td>c</td>\n",
-       "      <td>g</td>\n",
-       "      <td>7.09</td>\n",
-       "      <td>21.54</td>\n",
+       "      <td>n</td>\n",
+       "      <td>3.42</td>\n",
+       "      <td>3.37</td>\n",
        "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>1.382228</td>\n",
-       "      <td>3.038082</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>f_unknown</td>\n",
+       "      <td>1.014837</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>252426</th>\n",
-       "      <td>4.61</td>\n",
-       "      <td>o</td>\n",
-       "      <td>missing</td>\n",
+       "      <th>273217</th>\n",
+       "      <td>26.70</td>\n",
+       "      <td>x</td>\n",
+       "      <td>s</td>\n",
        "      <td>n</td>\n",
        "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>f</td>\n",
-       "      <td>5.27</td>\n",
-       "      <td>19.09</td>\n",
+       "      <td>p</td>\n",
+       "      <td>unknown</td>\n",
+       "      <td>y</td>\n",
+       "      <td>5.91</td>\n",
+       "      <td>40.87</td>\n",
        "      <td>...</td>\n",
-       "      <td>True</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.874763</td>\n",
-       "      <td>3.622391</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>x_s</td>\n",
+       "      <td>0.144605</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>323296</th>\n",
+       "      <td>8.04</td>\n",
+       "      <td>s</td>\n",
+       "      <td>t</td>\n",
+       "      <td>p</td>\n",
+       "      <td>t</td>\n",
+       "      <td>d</td>\n",
+       "      <td>c</td>\n",
+       "      <td>p</td>\n",
+       "      <td>7.16</td>\n",
+       "      <td>14.78</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>s_t</td>\n",
+       "      <td>0.484438</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>99982 rows × 26 columns</p>\n",
+       "<p>66988 rows × 27 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
        "         cap-diameter cap-shape cap-surface cap-color does-bruise-or-bleed  \\\n",
-       "2006170          0.76         x           g         o                    f   \n",
-       "207690           5.72         p     missing         g                    f   \n",
-       "2396937          2.24         x           i         l                    f   \n",
-       "890751           1.52         b           h         n                    f   \n",
-       "530679           7.67         f     missing         y                    f   \n",
+       "857763           5.99         p           s         w                    f   \n",
+       "360364           8.58         x           s         w                    f   \n",
+       "839097           8.25         f           t         e                    f   \n",
+       "3013331         10.58         x           e         n                    t   \n",
+       "2834831         15.45         x           s         n                    f   \n",
        "...               ...       ...         ...       ...                  ...   \n",
-       "2537501          5.67         x           h         y                    t   \n",
-       "1498074          5.85         x           s         b                    f   \n",
-       "1399239          3.59         f           l         y                    f   \n",
-       "275415           9.80         f     missing         n                    f   \n",
-       "252426           4.61         o     missing         n                    f   \n",
+       "2247836          4.34         x     unknown         w                    f   \n",
+       "703166           3.09         f           y         n                    f   \n",
+       "2159902          2.27         f     unknown         u                    f   \n",
+       "273217          26.70         x           s         n                    f   \n",
+       "323296           8.04         s           t         p                    t   \n",
        "\n",
        "        gill-attachment gill-spacing gill-color  stem-height  stem-width  ...  \\\n",
-       "2006170         missing      missing          y         3.59        0.82  ...   \n",
-       "207690                f            f          f         5.37       18.09  ...   \n",
-       "2396937               a      missing          p         4.11        2.57  ...   \n",
-       "890751                a      missing          n         5.01        1.66  ...   \n",
-       "530679                a            c          w         6.54       17.52  ...   \n",
+       "857763                e            c          p        18.48       12.66  ...   \n",
+       "360364                s            c          w         7.22       31.27  ...   \n",
+       "839097                x            d          w         6.46       17.42  ...   \n",
+       "3013331               p      unknown          o         6.63       23.48  ...   \n",
+       "2834831               s            c          g         9.50       19.29  ...   \n",
        "...                 ...          ...        ...          ...         ...  ...   \n",
-       "2537501               x            c          y         6.71       14.65  ...   \n",
-       "1498074               x            c          w         6.47        9.63  ...   \n",
-       "1399239               x            d          y         5.16        4.47  ...   \n",
-       "275415                s            c          g         7.09       21.54  ...   \n",
-       "252426                f            f          f         5.27       19.09  ...   \n",
+       "2247836               x            c          w         7.93        8.71  ...   \n",
+       "703166                e            c          w         5.21        3.50  ...   \n",
+       "2159902               a            c          n         3.42        3.37  ...   \n",
+       "273217                p      unknown          y         5.91       40.87  ...   \n",
+       "323296                d            c          p         7.16       14.78  ...   \n",
        "\n",
        "        cap_surface_missing gill_attachment_missing gill_spacing_missing  \\\n",
-       "2006170               False                    True                 True   \n",
-       "207690                 True                   False                False   \n",
-       "2396937               False                   False                 True   \n",
-       "890751                False                   False                 True   \n",
-       "530679                 True                   False                False   \n",
+       "857763                    0                       0                    0   \n",
+       "360364                    0                       0                    0   \n",
+       "839097                    0                       0                    0   \n",
+       "3013331                   0                       0                    1   \n",
+       "2834831                   0                       0                    0   \n",
        "...                     ...                     ...                  ...   \n",
-       "2537501               False                   False                False   \n",
-       "1498074               False                   False                False   \n",
-       "1399239               False                   False                False   \n",
-       "275415                 True                   False                False   \n",
-       "252426                 True                   False                False   \n",
+       "2247836                   1                       0                    0   \n",
+       "703166                    0                       0                    0   \n",
+       "2159902                   1                       0                    0   \n",
+       "273217                    0                       0                    1   \n",
+       "323296                    0                       0                    0   \n",
        "\n",
        "        stem_root_missing stem_surface_missing veil_type_missing  \\\n",
-       "2006170              True                 True              True   \n",
-       "207690               True                 True              True   \n",
-       "2396937              True                 True              True   \n",
-       "890751               True                False              True   \n",
-       "530679               True                 True              True   \n",
+       "857763                  1                    0                 1   \n",
+       "360364                  1                    1                 1   \n",
+       "839097                  0                    1                 1   \n",
+       "3013331                 1                    1                 1   \n",
+       "2834831                 1                    1                 1   \n",
        "...                   ...                  ...               ...   \n",
-       "2537501              True                 True              True   \n",
-       "1498074              True                False              True   \n",
-       "1399239              True                 True              True   \n",
-       "275415              False                 True              True   \n",
-       "252426               True                 True              True   \n",
+       "2247836                 0                    1                 1   \n",
+       "703166                  1                    0                 1   \n",
+       "2159902                 1                    0                 1   \n",
+       "273217                  1                    1                 1   \n",
+       "323296                  1                    1                 1   \n",
        "\n",
-       "         veil_color_missing  spore_print_color_missing  \\\n",
-       "2006170                True                       True   \n",
-       "207690                 True                       True   \n",
-       "2396937                True                      False   \n",
-       "890751                 True                       True   \n",
-       "530679                 True                       True   \n",
-       "...                     ...                        ...   \n",
-       "2537501                True                       True   \n",
-       "1498074                True                       True   \n",
-       "1399239                True                       True   \n",
-       "275415                 True                       True   \n",
-       "252426                 True                       True   \n",
+       "        veil_color_missing  spore_print_color_missing  \\\n",
+       "857763                   1                          1   \n",
+       "360364                   1                          1   \n",
+       "839097                   1                          1   \n",
+       "3013331                  1                          1   \n",
+       "2834831                  1                          1   \n",
+       "...                    ...                        ...   \n",
+       "2247836                  0                          1   \n",
+       "703166                   1                          1   \n",
+       "2159902                  1                          1   \n",
+       "273217                   1                          1   \n",
+       "323296                   1                          1   \n",
        "\n",
-       "         cap_diameter_to_stem_height_ratio  stem_width_to_stem_height_ratio  \n",
-       "2006170                           0.211699                         0.228412  \n",
-       "207690                            1.065177                         3.368715  \n",
-       "2396937                           0.545012                         0.625304  \n",
-       "890751                            0.303393                         0.331337  \n",
-       "530679                            1.172783                         2.678899  \n",
-       "...                                    ...                              ...  \n",
-       "2537501                           0.845007                         2.183308  \n",
-       "1498074                           0.904173                         1.488408  \n",
-       "1399239                           0.695736                         0.866279  \n",
-       "275415                            1.382228                         3.038082  \n",
-       "252426                            0.874763                         3.622391  \n",
+       "         cap_shape_surface_interaction  stem_dimensions_ratio  \n",
+       "857763                             p_s               1.459716  \n",
+       "360364                             x_s               0.230892  \n",
+       "839097                             f_t               0.370838  \n",
+       "3013331                            x_e               0.282368  \n",
+       "2834831                            x_s               0.492483  \n",
+       "...                                ...                    ...  \n",
+       "2247836                      x_unknown               0.910448  \n",
+       "703166                             f_y               1.488571  \n",
+       "2159902                      f_unknown               1.014837  \n",
+       "273217                             x_s               0.144605  \n",
+       "323296                             s_t               0.484438  \n",
        "\n",
-       "[99982 rows x 26 columns]"
+       "[66988 rows x 27 columns]"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1145,13 +912,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "e6e2832f-fb1d-408d-8dac-080738643b3d",
    "metadata": {},
    "outputs": [],
    "source": [
     "import logging\n",
-    "automl = AutoMLClassifier(time_bound=20,\n",
+    "automl = AutoMLRegressor(time_bound=1,\n",
     "                          output_folder=\"tmp/\",\n",
     "                          checkpoints_folder=\"tmp/\",\n",
     "                          verbose=logging.INFO,\n",
@@ -1161,7 +928,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "2f91a087-849c-4bc5-97a7-30fa9927511b",
    "metadata": {},
    "outputs": [],
@@ -1197,7 +964,7 @@
      "text": [
       ":job_id:01000000\n",
       ":actor_name:RolloutWorker\n",
-      "2024-08-31 19:06:56,196\tWARNING env.py:162 -- Your env doesn't have a .spec.max_episode_steps attribute. Your horizon will default to infinity, and your environment will not be reset.\n"
+      "2024-08-31 20:00:18,501\tWARNING env.py:162 -- Your env doesn't have a .spec.max_episode_steps attribute. Your horizon will default to infinity, and your environment will not be reset.\n"
      ]
     },
     {
@@ -1212,10 +979,34 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-08-31 19:07:13,298\tWARNING deprecation.py:50 -- DeprecationWarning: `_get_slice_indices` has been deprecated. This will raise an error in the future!\n",
+      "2024-08-31 20:00:35,317\tWARNING deprecation.py:50 -- DeprecationWarning: `_get_slice_indices` has been deprecated. This will raise an error in the future!\n",
       "TBB Warning: The number of workers is currently limited to 3. The request for 95 workers is ignored. Further requests for more workers will be silently ignored until the limit changes.\n",
       "\n"
      ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO|2024-08-31 20:01:33|Scored pipeline, score=0.984\n",
+      "INFO|2024-08-31 20:01:34|Scored pipeline, score=0.98\n",
+      "INFO|2024-08-31 20:01:34|Scored pipeline, score=0.986\n",
+      "INFO|2024-08-31 20:01:35|Scored pipeline, score=0.986\n",
+      "INFO|2024-08-31 20:01:35|Scored pipeline, score=0.988\n",
+      "INFO|2024-08-31 20:01:35|Scored pipeline, score=0.98\n",
+      "INFO|2024-08-31 20:01:38|Scored pipeline, score=0.986\n",
+      "INFO|2024-08-31 20:01:40|Scored pipeline, score=0.986\n",
+      "INFO|2024-08-31 20:01:40|Scored pipeline, score=0.976\n",
+      "INFO|2024-08-31 20:01:41|Scored pipeline, score=0.978\n",
+      "INFO|2024-08-31 20:01:41|Scored pipeline, score=0.976\n",
+      "INFO|2024-08-31 20:01:41|Scored pipeline, score=0.98\n",
+      "INFO|2024-08-31 20:01:41|Scored pipeline, score=0.98\n",
+      "INFO|2024-08-31 20:01:41|Found 13 pipelines\n",
+      "[INFO][abstract_initial_design.py:147] Using 40 initial design configurations and 0 additional configurations.\n",
+      "[INFO][smbo.py:497] Continuing from previous run.\n",
+      "[INFO][abstract_intensifier.py:287] Added existing seed 209652396 from runhistory to the intensifier.\n",
+      "[INFO][abstract_intensifier.py:305] Using only one seed for deterministic scenario.\n"
+     ]
     }
    ],
    "source": [
@@ -1242,6 +1033,26 @@
     "automl.score(X_val, y_val)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2224ca93-6dc0-4d8a-9b31-307e4fcd9f18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ast\n",
+    "\n",
+    "def parse_by_llm(x):\n",
+    "    loc = {}\n",
+    "    access_scope = {\"df\": x, \"pd\": pd, \"np\": np}\n",
+    "    parsed = ast.parse(code)\n",
+    "    exec(compile(parsed, filename=\"<ast>\", mode=\"exec\"), access_scope, loc)\n",
+    "\n",
+    "X_test = test_dataset.drop(columns=['id'])\n",
+    "parse_by_llm(X_test)\n",
+    "X_test"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,