From 2c4b26c15042efaa3b1221e05c6217faa7ddd5bf Mon Sep 17 00:00:00 2001
From: Kajanan Selvanesan <kajananselvanesan@gmail.com>
Date: Sun, 12 May 2024 00:04:34 +0530
Subject: [PATCH] feat: Introduce use case for dwell time

---
 .gitignore                                    |   1 +
 README.md                                     |  25 +-
 examples/pipeline.py                          |   4 +-
 src/_pipeline.py                              | 458 ++++++++++++++++++
 src/models/use_cases/__init__.py              |   1 +
 src/models/use_cases/dwell_time/__init__.py   |   3 +
 .../use_cases/dwell_time/bus/__init__.py      |   1 +
 .../use_cases/dwell_time/bus/mme4bdt.py       |  73 +++
 src/pipeline.py                               |  96 +---
 9 files changed, 568 insertions(+), 94 deletions(-)
 create mode 100644 src/_pipeline.py
 create mode 100644 src/models/use_cases/dwell_time/__init__.py
 create mode 100644 src/models/use_cases/dwell_time/bus/__init__.py
 create mode 100644 src/models/use_cases/dwell_time/bus/mme4bdt.py

diff --git a/.gitignore b/.gitignore
index 1e9f2f7..9f0247a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,6 +51,7 @@ Temporary Items
 ### PyCharm ###
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+.idea/
 
 # User-specific stuff
 .idea/**/workspace.xml
diff --git a/README.md b/README.md
index 0039c6d..f81c7f3 100644
--- a/README.md
+++ b/README.md
@@ -76,8 +76,10 @@ Output:
 3. `pipeline` can be used to run experiments with the all the steps by simply running one function with some parameters.
 ```py
 from datetime import datetime
-from ibat.pipeline import run_exp
+
 from ibat.concept_drift_detector.strategies import DDM
+from ibat.datasets import BUS_654_FEATURES_ENCODED_DWELL_TIMES
+from ibat.pipeline import run_dt_exp
 
 
 def datetime_from_string(datetime_string: str) -> datetime:
@@ -86,19 +88,24 @@ def datetime_from_string(datetime_string: str) -> datetime:
 
 if __name__ == "__main__":
     cdd_strategy = DDM(
-        drift_level=2,
-        min_num_instances=2,
+        warning_level=0.1,
+        drift_level=1.5,
+        min_num_instances=1,
     )
-    run_exp(
+    run_dt_exp(
+        dt_df=BUS_654_FEATURES_ENCODED_DWELL_TIMES.dataframe,
         hist_start=datetime_from_string("2021-10-01"),
-        hist_end=datetime_from_string("2022-01-01"),
-        stream_start=datetime_from_string("2022-01-01"),
+        hist_end=datetime_from_string("2022-02-01"),
+        stream_start=datetime_from_string("2022-02-01"),
         stream_end=datetime_from_string("2022-11-01"),
-        interval_min=60,
+        interval_min=60 * 2,
+        chunk_size=100,
         active_strategy=True,
+        is_buffer_enabled=False,
         cdd_strategy=cdd_strategy,
-        output_parent_dir="../experiments",
-        label="m-xgb-s-xgb_model",
+        incremental_learning=True,
+        output_parent_dir="./demo",
+        label="demo-dt-exp-for-hbp",
     )
 ```
 
diff --git a/examples/pipeline.py b/examples/pipeline.py
index 8760d8a..80c7ad4 100644
--- a/examples/pipeline.py
+++ b/examples/pipeline.py
@@ -2,7 +2,7 @@
 
 from src.concept_drift_detector.strategies import DDM
 from src.datasets import BUS_654_FEATURES_ENCODED_DWELL_TIMES
-from src.pipeline import run_exp
+from src.pipeline import run_dt_exp
 
 
 def datetime_from_string(datetime_string: str) -> datetime:
@@ -28,7 +28,7 @@ def datetime_from_string(datetime_string: str) -> datetime:
     folder_path_to_save_result = "../experiments"
     experiment_label = "m-xgb-s-xgb_model-d-batch_r2"
 
-    run_exp(
+    run_dt_exp(
         dt_df=dwell_time_df,
         hist_start=historical_data_starting_from,
         hist_end=historical_data_ending_at,
diff --git a/src/_pipeline.py b/src/_pipeline.py
new file mode 100644
index 0000000..4c32864
--- /dev/null
+++ b/src/_pipeline.py
@@ -0,0 +1,458 @@
+import os
+import warnings
+from datetime import datetime, timedelta
+from time import time
+from typing import Optional
+
+import matplotlib.pyplot as plt
+from numpy import mean
+from pandas import concat, DataFrame, to_datetime
+from pandas.errors import SettingWithCopyWarning
+from sklearn.metrics import (
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    root_mean_squared_error,
+)
+from src.concept_drift_detector.strategies import IStrategy
+from src.datasets import (
+    BUS_654_FEATURES_ADDED_RUNNING_TIMES,
+    BUS_654_FEATURES_ENCODED_DWELL_TIMES,
+)
+from src.models.use_cases.arrival_time.bus import MME4BAT
+
+
+def run_exp(
+    dt_df: DataFrame,
+    hist_start: datetime,
+    hist_end: datetime,
+    stream_start: datetime,
+    stream_end: datetime,
+    interval_min: float,
+    chunk_size: int,
+    active_strategy: Optional[bool] = False,
+    is_buffer_enabled: Optional[bool] = False,
+    cdd_strategy: Optional[IStrategy] = None,
+    incremental_learning: Optional[bool] = True,
+    output_parent_dir: Optional[str] = "./",
+    label: Optional[str] = "",
+) -> None:
+    """
+    Run the experiment.
+
+    Args:
+        dt_df: Dwell time dataframe to contact the experiment.
+        hist_start: Start timestamp (inclusive) for historical data.
+        hist_end: End timestamp (exclusive) for historical data.
+        stream_start: Start timestamp (inclusive) for streaming data.
+        stream_end: End timestamp (exclusive) for streaming data.
+        interval_min: Minimum waiting time (in minutes) for data processing.
+        chunk_size: Minimum required amount of data for data processing.
+        active_strategy: Flag indicating whether to use active strategy (default is False).
+        is_buffer_enabled: To do (default is False).
+        cdd_strategy: Strategy to be used for detecting concept drift. Required if active_strategy is True.
+        incremental_learning: To do (default is True).
+        output_parent_dir: Parent directory's path to save experiment results.
+        label: Experiment label (default is an empty string).
+
+    Returns:
+        None
+    """
+    # To do: warnings.filterwarnings("ignore", category=SettingWithCopyWarning)
+    warnings.filterwarnings("ignore")
+
+    if interval_min == 0 and chunk_size == 0:
+        raise ValueError("Both interval_min & chunk_size cannot be set to zero.")
+    else:
+        count_not_enough = False
+        is_end_reached = False
+
+        hybrid_bp = interval_min != 0 and chunk_size != 0
+        scheduled_bp = interval_min != 0 and chunk_size == 0
+
+        if hybrid_bp:
+            bp_technique = "HYBRID"
+        elif scheduled_bp:
+            bp_technique = "SCHEDULED"
+        else:
+            bp_technique = "PERIODIC"
+
+    if active_strategy and cdd_strategy is None:
+        raise ValueError("cdd_strategy must be provided when active_strategy is True.")
+    else:
+        strategy = "ACTIVE" if active_strategy else "PASSIVE"
+
+    print(f"BATCH PROCESSING TECHNIQUE: {bp_technique} | CONCEPT DRIFT HANDLING STRATEGY: {strategy}")
+
+    rt_df: DataFrame = BUS_654_FEATURES_ADDED_RUNNING_TIMES.dataframe
+    # dt_df: DataFrame = BUS_654_FEATURES_ENCODED_DWELL_TIMES.dataframe
+
+    dt_df["arrival_datetime"] = to_datetime(
+        dt_df["date"] + " " + dt_df["arrival_time"], format="%Y-%m-%d %H:%M:%S"
+    )
+    dt_df.sort_values(by="arrival_datetime", inplace=True)
+
+    base_model: Optional[MME4BAT] = None
+    model: Optional[MME4BAT] = None
+
+    result_dt_df = DataFrame(
+        columns=dt_df.columns.tolist()
+        + ["true_prediction", "base_model_prediction", "model_prediction"]
+    )
+
+    true_predictions = []
+    base_model_predictions = []
+    model_predictions = []
+
+    processing_times = []
+
+    dt_x_buffer: Optional[DataFrame] = None
+    dt_y_buffer: Optional[DataFrame] = None
+
+    from_date_time = hist_start
+    to_date_time = hist_end
+
+    while from_date_time < stream_end:
+        if hybrid_bp and count_not_enough:
+            temp_df = dt_df.loc[
+                (dt_df["arrival_datetime"] >= from_date_time),
+                :,
+            ].reset_index(drop=True)
+            if temp_df.shape[0] < chunk_size:
+                is_end_reached = True
+            else:
+                to_date_time = temp_df["arrival_datetime"].iloc[chunk_size - 1] + timedelta(minutes=1)
+
+        print(
+            f"DATA STREAM: [{from_date_time.strftime('%Y-%m-%d %H:%M:%S')} - {to_date_time.strftime('%Y-%m-%d %H:%M:%S')})",
+            end="",
+            flush=True,
+        )
+
+        dt_chunk: DataFrame = dt_df.loc[
+            (from_date_time <= dt_df["arrival_datetime"])
+            & (dt_df["arrival_datetime"] < to_date_time),
+            :,
+        ].reset_index(drop=True)
+        count_not_enough = dt_chunk.shape[0] < chunk_size
+
+        if (
+            scheduled_bp
+            or (hybrid_bp and not count_not_enough)
+            or is_end_reached
+        ):
+            from_date_time = (
+                stream_start if from_date_time == hist_start else to_date_time
+            )
+            to_date_time = from_date_time + timedelta(minutes=interval_min)
+
+            if len(dt_chunk) == 0:
+                print(" | NO INSTANCES")
+                continue
+            else:
+                print(f" | NUMBER OF INSTANCES: {len(dt_chunk):04d}", end="")
+
+            numeric_dt_chunk = dt_chunk.select_dtypes(include="number")
+
+            dt_x: DataFrame = numeric_dt_chunk.drop(columns=["dwell_time_in_seconds"])
+            dt_y: DataFrame = numeric_dt_chunk[["dwell_time_in_seconds"]]
+
+            if not model:
+                base_model = MME4BAT()
+                model = MME4BAT(cdd_strategy=cdd_strategy)
+
+                base_model.fit(rt_x=None, rt_y=None, dt_x=dt_x, dt_y=dt_y)
+                model.fit(rt_x=None, rt_y=None, dt_x=dt_x, dt_y=dt_y)
+
+                print(" | MODEL INITIATED")
+            else:
+                true_prediction = dt_y["dwell_time_in_seconds"].tolist()
+                base_model_prediction = base_model.predict(rt_x=None, dt_x=dt_x)[
+                    "prediction"
+                ].tolist()
+                model_prediction = model.predict(rt_x=None, dt_x=dt_x)[
+                    "prediction"
+                ].tolist()
+
+                true_predictions.extend(true_prediction)
+                base_model_predictions.extend(base_model_prediction)
+                model_predictions.extend(model_prediction)
+
+                dt_chunk["true_prediction"] = true_prediction
+                dt_chunk["base_model_prediction"] = base_model_prediction
+                dt_chunk["model_prediction"] = model_prediction
+
+                result_dt_df = concat([result_dt_df, dt_chunk], ignore_index=True)
+
+                start_time = time()
+
+                if active_strategy:
+                    if is_buffer_enabled:
+                        if (dt_x_buffer is None) or (dt_y_buffer is None):
+                            dt_x_buffer = dt_x
+                            dt_y_buffer = dt_y
+                        else:
+                            dt_x_buffer = concat([dt_x_buffer, dt_x], ignore_index=True)
+                            dt_y_buffer = concat([dt_y_buffer, dt_y], ignore_index=True)
+                        ni_dt_x = dt_x_buffer
+                        ni_dt_y = dt_y_buffer
+                    else:
+                        ni_dt_x = dt_x
+                        ni_dt_y = dt_y
+
+                    is_detected = model.is_concept_drift_detected(
+                        ni_rt_x=None, ni_rt_y=None, ni_dt_x=dt_x, ni_dt_y=dt_y
+                    )
+                    if is_detected:
+                        model.incremental_fit(
+                            ni_rt_x=None,
+                            ni_rt_y=None,
+                            ni_dt_x=ni_dt_x,
+                            ni_dt_y=ni_dt_y,
+                        )
+                        dt_x_buffer = None
+                        dt_y_buffer = None
+                else:
+                    model.incremental_fit(
+                        ni_rt_x=None, ni_rt_y=None, ni_dt_x=dt_x, ni_dt_y=dt_y
+                    )
+                    print()
+
+                end_time = time()
+                processing_time = end_time - start_time
+                processing_times.append(processing_time)
+        else:
+            print(f" | NUMBER OF INSTANCES: {len(dt_chunk):04d} | COUNT IS NOT ENOUGH. WAITING FOR MORE DATA POINTS.")
+
+    print("\rDATA STREAMING ENDED.", flush=True)
+    print(
+        "\rGENERATING & EXPORTING THE RESULTS...",
+        end="",
+        flush=True,
+    )
+
+    cdd_strategy_content = ""
+    if active_strategy:
+        cdd_strategy_content = f"- {cdd_strategy.__class__.__name__}:\n\n"
+        cdd_strategy_content += f"| Attribute | Value |\n|---|---|\n"
+
+        for attr, value in cdd_strategy.get_attributes().items():
+            cdd_strategy_content += f"| {attr} | {value} |\n"
+
+    base_model_mae = mean_absolute_error(true_predictions, base_model_predictions)
+    model_mae = mean_absolute_error(true_predictions, model_predictions)
+
+    base_model_rmse = root_mean_squared_error(true_predictions, base_model_predictions)
+    model_rmse = root_mean_squared_error(true_predictions, model_predictions)
+
+    md_file_content = f"""
+# Experiment: {label}
+
+## Parameters
+- Historical data starting from: {hist_start}
+- Historical data ending at: {hist_end}
+- Streaming data starting from: {stream_start}
+- Streaming data ending at: {stream_end}
+- Minimum waiting time: {interval_min} minutes
+- Minimum required amount of data: {chunk_size}
+- Batch processing technique: {bp_technique}
+- Concept drift handling strategy: {strategy}
+- Is buffer enabled: {is_buffer_enabled}
+- Concept drift detection algorithm: {cdd_strategy.__class__.__name__ if active_strategy else None}
+{cdd_strategy_content}
+
+## Results
+- Model performance metrics:
+
+| Model                                | MAE (s)              | RMSE (s)              |
+|--------------------------------------|----------------------|-----------------------|
+| Base model (XGBoost)                 | {base_model_mae:.3f} | {base_model_rmse:.3f} |
+| Base model with incremental learning | {model_mae:.3f}      | {model_rmse:.3f}      |
+
+- Error reduction percentage in terms of MAE: {(base_model_mae - model_mae) * 100 / base_model_mae:.3f} %
+- Average processing time after the batch preparation: {mean(processing_times) * 1000:.3f} ms
+    """
+
+    output_dir = os.path.join(
+        output_parent_dir,
+        f"ex-{label}-{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}",
+    )
+    os.makedirs(output_dir, exist_ok=True)
+
+    with open(f"{output_dir}/README.md", "w") as f:
+        f.write(md_file_content)
+
+    print("\rGENERATING & EXPORTING THE RESULTS ENDED.", flush=True)
+
+    # print(
+    #     f"MAE for base model: {mean_absolute_error(true_prediction, base_model_prediction)}"
+    # )
+    # print(
+    #     f"MAPE for base model: {mean_absolute_percentage_error(true_prediction, base_model_prediction) * 100}"
+    # )
+    # print(
+    #     f"RMSE for base model: {mean_squared_error(true_prediction, base_model_prediction, squared=False)}"
+    # )
+    # print("-----------------------------------------------------------------")
+    # print(
+    #     f"MAE for incremental online learning model: {mean_absolute_error(true_prediction, model_prediction)}"
+    # )
+    # print(
+    #     f"""MAPE for incremental online learning model: {
+    #     mean_absolute_percentage_error(true_prediction, model_prediction) * 100
+    # }"""
+    # )
+    # print(
+    #     f"""RMSE for incremental online learning model: {
+    #     mean_squared_error(true_prediction, model_prediction, squared=False)
+    # }"""
+    # )
+
+    # dt_pred_df = dt_df[1000:-100]
+    # # dt_pred_df = dt_df[1000:-83]
+    # if (
+    #     len(dt_pred_df)
+    #     == len(true_prediction)
+    #     == len(model_prediction)
+    #     == len(base_model_prediction)
+    # ):
+    #     dt_pred_df["true_prediction"] = true_prediction
+    #     dt_pred_df["model_prediction"] = model_prediction
+    #     dt_pred_df["base_model_prediction"] = base_model_prediction
+    # else:
+    #     print(
+    #         len(true_prediction),
+    #         len(model_prediction),
+    #         len(base_model_prediction),
+    #         dt_pred_df.shape,
+    #     )
+    #     print("Error: The lengths of the arrays and the DataFrame did not match.")
+
+    # plt.figure(figsize=(100, 20))
+    #
+    # x = list(range(len(true_prediction)))
+    # plt.plot(x, true_prediction, label="True dwell time")
+    # plt.plot(x, base_model_prediction, label="Base model prediction")
+    # plt.plot(x, model_prediction, label="Prediction with incremental online learning")
+    #
+    # plt.xlabel("X")
+    # plt.ylabel("Dwell time (s)")
+    # plt.title("Multiple Lines on a Common Graph")
+    #
+    # plt.legend()
+    # plt.savefig("dt.png", dpi=150)
+
+    df = result_dt_df
+    df["date"] = to_datetime(df["date"])
+    df["arrival_time"] = to_datetime(df["arrival_time"], format="%H:%M:%S")
+
+    directions = df["direction"].unique()
+    bus_stops = df["bus_stop"].unique()
+
+    starting_time = datetime.strptime("06:00:00", "%H:%M:%S")
+    ending_time = datetime.strptime("18:00:00", "%H:%M:%S")
+
+    from_time = starting_time
+
+    while from_time < ending_time:
+        to_time = from_time + timedelta(minutes=60)
+        print(
+            f"\rGENERATING & EXPORTING PLOTS FOR EACH BUS STOP: [{from_time.strftime('%H:%M:%S')} - {to_time.strftime('%H:%M:%S')})",
+            end="",
+            flush=True,
+        )
+
+        for direction in directions:
+            for bus_stop in bus_stops:
+                filtered_df = df[
+                    (df["direction"] == direction)
+                    & (df["bus_stop"] == bus_stop)
+                    & (df["arrival_time"].dt.time >= from_time.time())
+                    & (df["arrival_time"].dt.time < to_time.time())
+                ]
+
+                export_at = os.path.join(
+                    output_dir,
+                    f"dt-d-{direction}-ti-{from_time.strftime('%H-%M-%S')}_{to_time.strftime('%H-%M-%S')}",
+                )
+                os.makedirs(export_at, exist_ok=True)
+
+                if len(filtered_df) > 0:
+                    export_mean_dt_plot_as_image(
+                        df=filtered_df,
+                        bus_stop=bus_stop,
+                        starting_time=from_time,
+                        ending_time=to_time,
+                        export_at=export_at,
+                    )
+
+        from_time = to_time
+
+    print("\rGENERATING & EXPORTING PLOTS FOR EACH BUS STOP ENDED.", flush=True)
+
+
+def export_mean_dt_plot_as_image(
+    df: DataFrame,
+    bus_stop: int,
+    starting_time,
+    ending_time,
+    export_at: str,
+) -> None:
+    dt_in_seconds_df = (
+        df.groupby("date")["dwell_time_in_seconds"]
+        .mean()
+        .reset_index()
+        .sort_values(by="date")
+    )
+
+    base_model_prediction_df = (
+        df.groupby("date")["base_model_prediction"]
+        .mean()
+        .reset_index()
+        .sort_values(by="date")
+    )
+
+    model_prediction_df = (
+        df.groupby("date")["model_prediction"]
+        .mean()
+        .reset_index()
+        .sort_values(by="date")
+    )
+
+    x = model_prediction_df["date"]
+
+    plt.figure(figsize=(13, 5))
+    plt.plot(
+        x,
+        dt_in_seconds_df["dwell_time_in_seconds"],
+        label="True dwell time",
+        marker="o",
+        linestyle="-",
+        color="blue",
+    )
+    plt.plot(
+        x,
+        base_model_prediction_df["base_model_prediction"],
+        label="Base model prediction",
+        marker="o",
+        linestyle="--",
+        color="green",
+    )
+    plt.plot(
+        x,
+        model_prediction_df["model_prediction"],
+        label="Prediction with incremental online learning",
+        marker="o",
+        linestyle="--",
+        color="orange",
+    )
+    plt.xlabel("Date")
+    plt.ylabel("Mean dwell time (s)")
+    plt.title(
+        f"The mean dwell time at bus stop {bus_stop} for each day between {starting_time.time()} and {ending_time.time()}."
+    )
+    plt.xticks(rotation=45)
+    plt.grid(True)
+    plt.legend()
+    plt.tight_layout()
+    plt.savefig(f"{export_at}/{bus_stop}.png", dpi=200)
+    plt.close()
diff --git a/src/models/use_cases/__init__.py b/src/models/use_cases/__init__.py
index b0869e2..a86ab28 100644
--- a/src/models/use_cases/__init__.py
+++ b/src/models/use_cases/__init__.py
@@ -1,3 +1,4 @@
 from . import (
     arrival_time,
+    dwell_time,
 )
diff --git a/src/models/use_cases/dwell_time/__init__.py b/src/models/use_cases/dwell_time/__init__.py
new file mode 100644
index 0000000..aa24235
--- /dev/null
+++ b/src/models/use_cases/dwell_time/__init__.py
@@ -0,0 +1,3 @@
+from . import (
+    bus,
+)
diff --git a/src/models/use_cases/dwell_time/bus/__init__.py b/src/models/use_cases/dwell_time/bus/__init__.py
new file mode 100644
index 0000000..8ac1cd3
--- /dev/null
+++ b/src/models/use_cases/dwell_time/bus/__init__.py
@@ -0,0 +1 @@
+from .mme4bdt import MME4BDT
diff --git a/src/models/use_cases/dwell_time/bus/mme4bdt.py b/src/models/use_cases/dwell_time/bus/mme4bdt.py
new file mode 100644
index 0000000..c089636
--- /dev/null
+++ b/src/models/use_cases/dwell_time/bus/mme4bdt.py
@@ -0,0 +1,73 @@
+from copy import deepcopy
+from typing import Optional
+
+from src.concept_drift_detector import CDD
+from src.concept_drift_detector.strategies import IStrategy
+from src.models.base_models.ensemble.xgboost import XGBClassifier, XGBRegressor
+
+
+class MME4BDT:
+    def __init__(self, cdd_strategy: Optional[IStrategy] = None) -> None:
+        self.xgb_dt_classifier = XGBClassifier()
+        self.xgb_dt_regressor = XGBRegressor()
+
+        if cdd_strategy:
+            self._cdd_of_xgb_dt_classifier = CDD(
+                strategy=deepcopy(cdd_strategy),
+            )
+            self._cdd_of_xgb_dt_regressor = CDD(
+                strategy=deepcopy(cdd_strategy),
+            )
+
+    def fit(self, dt_x, dt_y) -> None:
+        is_dt_gt_0 = dt_y.iloc[:, 0].apply(lambda dt: 1 if dt > 0 else 0)
+        self.xgb_dt_classifier.fit(dt_x, is_dt_gt_0)
+
+        dt_x_gt_0 = dt_x[dt_y.iloc[:, 0] > 0]
+        dt_y_gt_0 = dt_y[dt_y.iloc[:, 0] > 0]
+        self.xgb_dt_regressor.fit(dt_x_gt_0, dt_y_gt_0)
+
+    def incremental_fit(self, ni_dt_x, ni_dt_y) -> None:
+        is_ni_dt_gt_0 = ni_dt_y.iloc[:, 0].apply(lambda dt: 1 if dt > 0 else 0)
+        self.xgb_dt_classifier.incremental_fit(ni_dt_x, is_ni_dt_gt_0)
+
+        ni_dt_x_gt_0 = ni_dt_x[ni_dt_y.iloc[:, 0] > 0]
+        ni_dt_y_gt_0 = ni_dt_y[ni_dt_y.iloc[:, 0] > 0]
+        self.xgb_dt_regressor.incremental_fit(ni_dt_x_gt_0, ni_dt_y_gt_0)
+
+    def predict(self, dt_x):
+        is_dt_gt_0 = self.xgb_dt_classifier.predict(dt_x)
+
+        dt_x_gt_0 = dt_x[is_dt_gt_0.iloc[:, 0] == 1]
+        dt_y_gt_0 = self.xgb_dt_regressor.predict(dt_x_gt_0)
+
+        dt_y = is_dt_gt_0.copy()
+        dt_y.loc[dt_y["prediction"] > 0, "prediction"] = dt_y_gt_0[
+            "prediction"
+        ].to_numpy()
+        dt_y.loc[dt_y["prediction"] < 0, "prediction"] = 0
+
+        return dt_y
+
+    def is_concept_drift_detected(self, ni_dt_x, ni_dt_y) -> bool:
+        try:
+            is_detected_1 = self._cdd_of_xgb_dt_classifier.is_concept_drift_detected(
+                model=self.xgb_dt_classifier,
+                ni_x=ni_dt_x,
+                ni_y=ni_dt_y,
+            )
+
+            ni_dt_x_gt_0 = ni_dt_x[ni_dt_y.iloc[:, 0] > 0]
+            ni_dt_y_gt_0 = ni_dt_y[ni_dt_y.iloc[:, 0] > 0]
+
+            is_detected_2 = self._cdd_of_xgb_dt_regressor.is_concept_drift_detected(
+                model=self.xgb_dt_regressor,
+                ni_x=ni_dt_x_gt_0,
+                ni_y=ni_dt_y_gt_0,
+            )
+            print(
+                f" | CDD at xgb_dt_classifier: {is_detected_1} | CDD at xgb_dt_regressor: {is_detected_2}"
+            )
+            return is_detected_1 or is_detected_2
+        except NameError as e:
+            raise e
diff --git a/src/pipeline.py b/src/pipeline.py
index 4c32864..5175bce 100644
--- a/src/pipeline.py
+++ b/src/pipeline.py
@@ -7,21 +7,15 @@
 import matplotlib.pyplot as plt
 from numpy import mean
 from pandas import concat, DataFrame, to_datetime
-from pandas.errors import SettingWithCopyWarning
 from sklearn.metrics import (
     mean_absolute_error,
-    mean_absolute_percentage_error,
     root_mean_squared_error,
 )
 from src.concept_drift_detector.strategies import IStrategy
-from src.datasets import (
-    BUS_654_FEATURES_ADDED_RUNNING_TIMES,
-    BUS_654_FEATURES_ENCODED_DWELL_TIMES,
-)
-from src.models.use_cases.arrival_time.bus import MME4BAT
+from src.models.use_cases.dwell_time.bus import MME4BDT
 
 
-def run_exp(
+def run_dt_exp(
     dt_df: DataFrame,
     hist_start: datetime,
     hist_end: datetime,
@@ -37,7 +31,7 @@ def run_exp(
     label: Optional[str] = "",
 ) -> None:
     """
-    Run the experiment.
+    Run a dwell time experiment.
 
     Args:
         dt_df: Dwell time dataframe to contact the experiment.
@@ -57,7 +51,6 @@ def run_exp(
     Returns:
         None
     """
-    # To do: warnings.filterwarnings("ignore", category=SettingWithCopyWarning)
     warnings.filterwarnings("ignore")
 
     if interval_min == 0 and chunk_size == 0:
@@ -83,16 +76,13 @@ def run_exp(
 
     print(f"BATCH PROCESSING TECHNIQUE: {bp_technique} | CONCEPT DRIFT HANDLING STRATEGY: {strategy}")
 
-    rt_df: DataFrame = BUS_654_FEATURES_ADDED_RUNNING_TIMES.dataframe
-    # dt_df: DataFrame = BUS_654_FEATURES_ENCODED_DWELL_TIMES.dataframe
-
     dt_df["arrival_datetime"] = to_datetime(
         dt_df["date"] + " " + dt_df["arrival_time"], format="%Y-%m-%d %H:%M:%S"
     )
     dt_df.sort_values(by="arrival_datetime", inplace=True)
 
-    base_model: Optional[MME4BAT] = None
-    model: Optional[MME4BAT] = None
+    base_model: Optional[MME4BDT] = None
+    model: Optional[MME4BDT] = None
 
     result_dt_df = DataFrame(
         columns=dt_df.columns.tolist()
@@ -157,19 +147,19 @@ def run_exp(
             dt_y: DataFrame = numeric_dt_chunk[["dwell_time_in_seconds"]]
 
             if not model:
-                base_model = MME4BAT()
-                model = MME4BAT(cdd_strategy=cdd_strategy)
+                base_model = MME4BDT()
+                model = MME4BDT(cdd_strategy=cdd_strategy)
 
-                base_model.fit(rt_x=None, rt_y=None, dt_x=dt_x, dt_y=dt_y)
-                model.fit(rt_x=None, rt_y=None, dt_x=dt_x, dt_y=dt_y)
+                base_model.fit(dt_x=dt_x, dt_y=dt_y)
+                model.fit(dt_x=dt_x, dt_y=dt_y)
 
                 print(" | MODEL INITIATED")
             else:
                 true_prediction = dt_y["dwell_time_in_seconds"].tolist()
-                base_model_prediction = base_model.predict(rt_x=None, dt_x=dt_x)[
+                base_model_prediction = base_model.predict(dt_x=dt_x)[
                     "prediction"
                 ].tolist()
-                model_prediction = model.predict(rt_x=None, dt_x=dt_x)[
+                model_prediction = model.predict(dt_x=dt_x)[
                     "prediction"
                 ].tolist()
 
@@ -200,12 +190,10 @@ def run_exp(
                         ni_dt_y = dt_y
 
                     is_detected = model.is_concept_drift_detected(
-                        ni_rt_x=None, ni_rt_y=None, ni_dt_x=dt_x, ni_dt_y=dt_y
+                        ni_dt_x=dt_x, ni_dt_y=dt_y
                     )
                     if is_detected:
                         model.incremental_fit(
-                            ni_rt_x=None,
-                            ni_rt_y=None,
                             ni_dt_x=ni_dt_x,
                             ni_dt_y=ni_dt_y,
                         )
@@ -213,7 +201,7 @@ def run_exp(
                         dt_y_buffer = None
                 else:
                     model.incremental_fit(
-                        ni_rt_x=None, ni_rt_y=None, ni_dt_x=dt_x, ni_dt_y=dt_y
+                        ni_dt_x=dt_x, ni_dt_y=dt_y
                     )
                     print()
 
@@ -283,64 +271,6 @@ def run_exp(
 
     print("\rGENERATING & EXPORTING THE RESULTS ENDED.", flush=True)
 
-    # print(
-    #     f"MAE for base model: {mean_absolute_error(true_prediction, base_model_prediction)}"
-    # )
-    # print(
-    #     f"MAPE for base model: {mean_absolute_percentage_error(true_prediction, base_model_prediction) * 100}"
-    # )
-    # print(
-    #     f"RMSE for base model: {mean_squared_error(true_prediction, base_model_prediction, squared=False)}"
-    # )
-    # print("-----------------------------------------------------------------")
-    # print(
-    #     f"MAE for incremental online learning model: {mean_absolute_error(true_prediction, model_prediction)}"
-    # )
-    # print(
-    #     f"""MAPE for incremental online learning model: {
-    #     mean_absolute_percentage_error(true_prediction, model_prediction) * 100
-    # }"""
-    # )
-    # print(
-    #     f"""RMSE for incremental online learning model: {
-    #     mean_squared_error(true_prediction, model_prediction, squared=False)
-    # }"""
-    # )
-
-    # dt_pred_df = dt_df[1000:-100]
-    # # dt_pred_df = dt_df[1000:-83]
-    # if (
-    #     len(dt_pred_df)
-    #     == len(true_prediction)
-    #     == len(model_prediction)
-    #     == len(base_model_prediction)
-    # ):
-    #     dt_pred_df["true_prediction"] = true_prediction
-    #     dt_pred_df["model_prediction"] = model_prediction
-    #     dt_pred_df["base_model_prediction"] = base_model_prediction
-    # else:
-    #     print(
-    #         len(true_prediction),
-    #         len(model_prediction),
-    #         len(base_model_prediction),
-    #         dt_pred_df.shape,
-    #     )
-    #     print("Error: The lengths of the arrays and the DataFrame did not match.")
-
-    # plt.figure(figsize=(100, 20))
-    #
-    # x = list(range(len(true_prediction)))
-    # plt.plot(x, true_prediction, label="True dwell time")
-    # plt.plot(x, base_model_prediction, label="Base model prediction")
-    # plt.plot(x, model_prediction, label="Prediction with incremental online learning")
-    #
-    # plt.xlabel("X")
-    # plt.ylabel("Dwell time (s)")
-    # plt.title("Multiple Lines on a Common Graph")
-    #
-    # plt.legend()
-    # plt.savefig("dt.png", dpi=150)
-
     df = result_dt_df
     df["date"] = to_datetime(df["date"])
     df["arrival_time"] = to_datetime(df["arrival_time"], format="%H:%M:%S")