From a7992adb3c62dffbd6a5b07db2c99117f3a7283f Mon Sep 17 00:00:00 2001
From: Kelly Castro <kelly@decastro.com>
Date: Thu, 17 Oct 2024 01:27:50 -0300
Subject: [PATCH 1/2] Added functions for detection and treatment of outliers

---
 bibmon/_outlier_handling.py   |  60 ++++++++++++
 bibmon/_preprocess.py         | 174 ++++++++--------------------------
 test/test_outlier_handling.py |  52 ++++++++++
 3 files changed, 150 insertions(+), 136 deletions(-)
 create mode 100644 bibmon/_outlier_handling.py
 create mode 100644 test/test_outlier_handling.py

diff --git a/bibmon/_outlier_handling.py b/bibmon/_outlier_handling.py
new file mode 100644
index 0000000..7007a2a
--- /dev/null
+++ b/bibmon/_outlier_handling.py
@@ -0,0 +1,60 @@
+import pandas as pd
+import numpy as np
+from scipy.stats.mstats import winsorize
+
+def detect_outliers_iqr(df: pd.DataFrame, cols: list) -> pd.DataFrame:
+    """
+    Detects outliers in a DataFrame using the IQR (Interquartile Range) method.
+
+    Args:
+        df (pd.DataFrame): DataFrame with the data.
+        cols (list): List of columns for which outliers will be detected.
+
+    Returns:
+        pd.DataFrame: DataFrame with outliers flagged as 1 and other points as 0.
+    """
+
+    df_outliers = df.copy()
+    for col in cols:
+        Q1 = df_outliers[col].quantile(0.25)
+        Q3 = df_outliers[col].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        df_outliers[col] = ((df_outliers[col] < lower_bound) | (df_outliers[col] > upper_bound)).astype(int)
+    return df_outliers
+
+def remove_outliers(df: pd.DataFrame, cols: list, method: str = 'remove') -> pd.DataFrame:
+    """
+    Removes or handles outliers in a DataFrame using the IQR (Interquartile Range) method.
+
+    Args:
+        df (pd.DataFrame): DataFrame with the data.
+        cols (list): List of columns for which outliers will be removed or handled.
+        method (str): Method for handling outliers. Can be 'remove' (removes outliers),
+                      'median' (replaces outliers with the median), or 'winsorize' (applies winsorization).
+                      Default: 'remove'.
+
+    Returns:
+        pd.DataFrame: DataFrame with outliers removed or handled.
+    """
+
+    df_outliers = df.copy()
+    for col in cols:
+        Q1 = df_outliers[col].quantile(0.25)
+        Q3 = df_outliers[col].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+
+        if method == 'remove':
+            df_outliers = df_outliers[(df_outliers[col] >= lower_bound) & (df_outliers[col] <= upper_bound)]
+        elif method == 'median':
+            median = df_outliers[col].median()
+            df_outliers.loc[(df_outliers[col] < lower_bound) | (df_outliers[col] > upper_bound), col] = median
+        elif method == 'winsorize':
+            df_outliers[col] = winsorize(df_outliers[col], limits=[0.05, 0.05])
+        else:
+            raise ValueError("Invalid method. Choose between 'remove', 'median', or 'winsorize'.")
+
+    return df_outliers
\ No newline at end of file
diff --git a/bibmon/_preprocess.py b/bibmon/_preprocess.py
index 0a85adb..7755d22 100644
--- a/bibmon/_preprocess.py
+++ b/bibmon/_preprocess.py
@@ -1,6 +1,7 @@
 import copy
 import pandas as pd
 import statsmodels.tsa.tsatools
+from ._outlier_handling import detect_outliers_iqr, remove_outliers
 
 ###############################################################################
 
@@ -10,7 +11,7 @@ class PreProcess ():
     
     Parameters
     ----------
-            
+        
         f_pp: list, optional
             List containing strings with names of methods to be used 
             in the preprocessing of the train data. The list of methods 
@@ -42,19 +43,23 @@ class PreProcess ():
         add_moving_average()
         
     * Noise treatment:
-        moving_average_filter()       
+        moving_average_filter()      
+    
+    * Outlier handling:
+        detect_outliers_iqr();
+        remove_outliers()
     
     """
-            
+        
     ###########################################################################
     
     def __init__(self, f_pp = None, a_pp = None, is_Y = False):
- 
+
         self.is_Y = is_Y
-        self.f_pp = f_pp        
-        self._a_pp = a_pp     
+        self.f_pp = f_pp         
+        self._a_pp = a_pp      
         if self.f_pp is not None:
-            self.params_per_func = {f: {} for f in f_pp}        
+            self.params_per_func = {f: {} for f in f_pp}         
 
     ###########################################################################
 
@@ -74,7 +79,7 @@ def a_pp(self, a_pp):
             self.params_per_func = {f: {} for f in self.f_pp}
             
             if a_pp is not None:
-                                
+                                  
                 for pname, pval in a_pp.items():
                     func, param = pname.split('__',1)
                     self.params_per_func[func][param] = pval
@@ -93,10 +98,10 @@ def apply(self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-           Processed data.
-        """         
+            Processed data.
+        """        
         
         df_processed = df
         
@@ -105,7 +110,7 @@ def apply(self, df, train_or_test = 'train'):
             df_processed = getattr(self, f)(df_processed,
                                             train_or_test, 
                                             **self.params_per_func[f])
-                
+            
         return df_processed
     
     ######################### 
@@ -125,10 +130,10 @@ def remove_empty_variables (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-           Processed data.
-        """           
+            Processed data.
+        """        
         if train_or_test == 'train':
             return df.dropna(axis=1, how='all')
         elif train_or_test == 'test':
@@ -137,7 +142,7 @@ def remove_empty_variables (self, df, train_or_test = 'train'):
     ###########################################################################
 
     def remove_frozen_variables (self, df, train_or_test = 'train',
-                                 threshold = 1e-6): 
+                                    threshold = 1e-6): 
         """
         Removes variables whose variation falls below a given limit.
     
@@ -150,10 +155,10 @@ def remove_frozen_variables (self, df, train_or_test = 'train',
         threshold: float, optional
             Variance limit to consider a variable as frozen.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-           Processed data.
-        """                                              
+            Processed data.
+        """                                                                  
         if not self.is_Y:
             if train_or_test == 'train':
                 return df.loc[:, df.var(ddof=1) > threshold]
@@ -180,9 +185,9 @@ def ffill_nan (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-           Processed data.
+            Processed data.
         """    
         return df.ffill().bfill()
 
@@ -199,9 +204,9 @@ def remove_observations_with_nan (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-           Processed data.
+            Processed data.
         """    
         return df.dropna(axis=0, how='any')
     
@@ -220,11 +225,10 @@ def replace_nan_with_values (self, df, train_or_test = 'train', val = 0):
         val: int or float
             Value to be used in the replacement.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-        Processed data.
-        """    
-                                    
+            Processed data.
+        """                                          
         return df.fillna(val)
 
     ###############
@@ -243,9 +247,9 @@ def back_to_units (self, df):
         df: pandas.DataFrame
             Data to be processed.
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-        Processed data.
+            Processed data.
         """    
         return df*self.SD + self.Mu
     
@@ -264,9 +268,9 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
         mode: string, optional
             Type of normalization (standard, robust, m-robust or s-robust).
         Returns
-        ----------                
+        ----------                 
         : pandas.DataFrame
-        Processed data.
+            Processed data.
         """    
         if train_or_test == 'train':
             
@@ -275,13 +279,13 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
                 self.SD = df.std(ddof=1)
             elif mode == 'robust':
                 self.Mu = df.median()
-                self.SD = df.mad()               
+                self.SD = df.mad()                   
             elif mode == 'm-robust':
                 self.Mu = df.median()
                 self.SD = df.std(ddof=1)
             elif mode == 's-robust':
                 self.Mu = df.mean()
-                self.SD = df.mad()               
+                self.SD = df.mad()                   
             
             return (df - self.Mu)/self.SD
         
@@ -289,106 +293,4 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
         
             return (df - self.Mu)/self.SD
 
-    ##############################
-    # ADDING DYNAMICS
-    ##############################
-        
-    ###########################################################################
-
-    def apply_lag (self, df, train_or_test = 'train', lag = 1):
-        """
-        Generation of time-delayed variables.
-
-        Parameters
-        ----------
-        df: pandas.DataFrame
-            Data to be processed.
-        train_or_test: string, optional
-            Indicates which step the data corresponds to.
-        lag: int, optional
-            Number of delays to be considered.
-        Returns
-        ----------                
-        : pandas.DataFrame
-        Processed data.
-        """    
-                        
-        if self.is_Y:
-            return df.iloc[lag:,:]
-        else:    
-            array_lagged = statsmodels.tsa.tsatools.lagmat(df, maxlag = lag, 
-                                                           trim = "forward", 
-                                                       original = 'in')[lag:,:]   
-            new_columns = []
-            for l in range(lag):
-                new_columns.append(df.columns+' - lag '+str(l+1))
-            columns_lagged = df.columns.append(new_columns)
-            index_lagged = df.index[lag:]
-            df_lagged = pd.DataFrame(array_lagged, index = index_lagged,
-                                     columns = columns_lagged)
-            
-            return df_lagged  
-        
-    ###########################################################################
-
-    def add_moving_average (self, df, train_or_test = 'train', WS = 10):
-        """
-        Adding variables filtered by moving average.
-        Attention! Do not confuse with moving_average_filter, in which
-        the original variables are not kept in the dataset.
-
-        Parameters
-        ----------
-        df: pandas.DataFrame
-            Data to be processed.
-        train_or_test: string, optional
-            Indicates which step the data corresponds to.
-        WS: int, optional
-            Window size of the filter.
-        Returns
-        ----------                
-        : pandas.DataFrame
-        Processed data.
-        """    
-        if self.is_Y:
-            return df
-                
-        new_df = copy.deepcopy(df)
-                
-        for column in df:
-            new_df[column+' MA'] = new_df[column].rolling(WS).mean()
-        
-        return new_df.drop(df.index[:WS])
-
-    ##############################
-    # NOISE TREATMENT
-    ##############################
-
-    ###########################################################################
-
-    def moving_average_filter (self, df,  train_or_test = 'train', WS = 10):
-        """
-        Moving average noise filter.
-
-        Parameters
-        ----------
-        df: pandas.DataFrame
-            Data to be processed.
-        train_or_test: string, optional
-            Indicates which step the data corresponds to.
-        WS: int, optional
-            Window size of the filter.
-        Returns
-        ----------                
-        : pandas.DataFrame
-        Processed data.
-        """    
-        new_df = copy.deepcopy(df)
-                
-        for column in df:
-            new_df[column] = new_df[column].rolling(WS).mean()
-            
-        if hasattr(df,'name'):
-            new_df.name = df.name
-                        
-        return new_df.drop(df.index[:WS])
\ No newline at end of file
+    ##############################
\ No newline at end of file
diff --git a/test/test_outlier_handling.py b/test/test_outlier_handling.py
new file mode 100644
index 0000000..e862140
--- /dev/null
+++ b/test/test_outlier_handling.py
@@ -0,0 +1,52 @@
+import unittest
+import pandas as pd
+from bibmon._outlier_handling import detect_outliers_iqr, remove_outliers
+
+class TestOutlierHandling(unittest.TestCase):
+
+    def test_detect_outliers_iqr(self):
+        # Create a sample DataFrame with outliers
+        data = {'col1': [1, 2, 3, 4, 5, 100]}
+        df = pd.DataFrame(data)
+
+        # Run the detect_outliers_iqr function
+        df_outliers = detect_outliers_iqr(df, ['col1'])
+
+        # Check if the outlier was detected correctly
+        self.assertEqual(df_outliers['col1'].tolist(), [0, 0, 0, 0, 0, 1])
+
+    def test_remove_outliers_remove(self):
+        # Create a sample DataFrame with outliers
+        data = {'col1': [1, 2, 3, 4, 5, 100]}
+        df = pd.DataFrame(data)
+
+        # Run the remove_outliers function with method='remove'
+        df_outliers = remove_outliers(df, ['col1'], method='remove')
+
+        # Check if the outlier was removed correctly
+        self.assertEqual(df_outliers['col1'].tolist(), [1, 2, 3, 4, 5])
+
+    def test_remove_outliers_median(self):
+        # Create a sample DataFrame with outliers
+        data = {'col1': [1, 2, 3, 4, 5, 100]}
+        df = pd.DataFrame(data)
+
+        # Run the remove_outliers function with method='median'
+        df_outliers = remove_outliers(df, ['col1'], method='median')
+
+        # Check if the outlier was replaced by the median
+        self.assertEqual(df_outliers['col1'].tolist(), [1, 2, 3, 4, 5, 3])
+
+    def test_remove_outliers_winsorize(self):
+        # Create a sample DataFrame with outliers
+        data = {'col1': [1, 2, 3, 4, 5, 100]}
+        df = pd.DataFrame(data)
+
+        # Run the remove_outliers function with method='winsorize'
+        df_outliers = remove_outliers(df, ['col1'], method='winsorize')
+
+        # Check if the outlier was winsorized
+        self.assertTrue(df_outliers['col1'].tolist()[-1] < 100)  # Check if the value was limited
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file

From 8b78bdd72d97e37adb2a651d38a376fc8e721b15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Afr=C3=A2nio=20Melo?=
 <40374017+afraniomelo@users.noreply.github.com>
Date: Fri, 10 Jan 2025 12:59:23 -0300
Subject: [PATCH 2/2] adjustments to incorporate pr53

---
 bibmon/_bibmon_tools.py       |  37 ++++++
 bibmon/_outlier_handling.py   |  60 ---------
 bibmon/_preprocess.py         | 243 ++++++++++++++++++++++++++++------
 test/test_outlier_handling.py |  52 --------
 4 files changed, 243 insertions(+), 149 deletions(-)
 delete mode 100644 bibmon/_outlier_handling.py
 delete mode 100644 test/test_outlier_handling.py

diff --git a/bibmon/_bibmon_tools.py b/bibmon/_bibmon_tools.py
index fd598d9..02aee50 100644
--- a/bibmon/_bibmon_tools.py
+++ b/bibmon/_bibmon_tools.py
@@ -66,6 +66,43 @@ def create_df_with_noise (array,
     return df
     
 ###############################################################################
+def detect_outliers_iqr(df: pd.DataFrame, 
+                        cols: list = None) -> pd.DataFrame:
+    """
+    Detects outliers in a DataFrame using the IQR (Interquartile Range) 
+    method.
+
+    Parameters
+    ----------
+    df: pandas.DataFrame
+        Data to be processed.
+    cols: list
+        List of columns for which outliers will be detected.
+        Default: None (which results in considering all cols)
+    Returns
+    ----------                
+    : pandas.DataFrame: 
+        DataFrame with outliers flagged as 1 
+        and other points as 0.
+    """
+
+    df_outliers = df.copy()
+
+    if cols is None:
+        cols = list(df.columns)
+
+    for col in cols:
+        Q1 = df_outliers[col].quantile(0.25)
+        Q3 = df_outliers[col].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        df_outliers[col] = ((df_outliers[col] < lower_bound) | 
+                            (df_outliers[col] > upper_bound)).astype(int)
+    return df_outliers
+
+###############################################################################
+
 
 def align_dfs_by_rows (df1, df2):
     """
diff --git a/bibmon/_outlier_handling.py b/bibmon/_outlier_handling.py
deleted file mode 100644
index 7007a2a..0000000
--- a/bibmon/_outlier_handling.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import pandas as pd
-import numpy as np
-from scipy.stats.mstats import winsorize
-
-def detect_outliers_iqr(df: pd.DataFrame, cols: list) -> pd.DataFrame:
-    """
-    Detects outliers in a DataFrame using the IQR (Interquartile Range) method.
-
-    Args:
-        df (pd.DataFrame): DataFrame with the data.
-        cols (list): List of columns for which outliers will be detected.
-
-    Returns:
-        pd.DataFrame: DataFrame with outliers flagged as 1 and other points as 0.
-    """
-
-    df_outliers = df.copy()
-    for col in cols:
-        Q1 = df_outliers[col].quantile(0.25)
-        Q3 = df_outliers[col].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-        df_outliers[col] = ((df_outliers[col] < lower_bound) | (df_outliers[col] > upper_bound)).astype(int)
-    return df_outliers
-
-def remove_outliers(df: pd.DataFrame, cols: list, method: str = 'remove') -> pd.DataFrame:
-    """
-    Removes or handles outliers in a DataFrame using the IQR (Interquartile Range) method.
-
-    Args:
-        df (pd.DataFrame): DataFrame with the data.
-        cols (list): List of columns for which outliers will be removed or handled.
-        method (str): Method for handling outliers. Can be 'remove' (removes outliers),
-                      'median' (replaces outliers with the median), or 'winsorize' (applies winsorization).
-                      Default: 'remove'.
-
-    Returns:
-        pd.DataFrame: DataFrame with outliers removed or handled.
-    """
-
-    df_outliers = df.copy()
-    for col in cols:
-        Q1 = df_outliers[col].quantile(0.25)
-        Q3 = df_outliers[col].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-
-        if method == 'remove':
-            df_outliers = df_outliers[(df_outliers[col] >= lower_bound) & (df_outliers[col] <= upper_bound)]
-        elif method == 'median':
-            median = df_outliers[col].median()
-            df_outliers.loc[(df_outliers[col] < lower_bound) | (df_outliers[col] > upper_bound), col] = median
-        elif method == 'winsorize':
-            df_outliers[col] = winsorize(df_outliers[col], limits=[0.05, 0.05])
-        else:
-            raise ValueError("Invalid method. Choose between 'remove', 'median', or 'winsorize'.")
-
-    return df_outliers
\ No newline at end of file
diff --git a/bibmon/_preprocess.py b/bibmon/_preprocess.py
index 7755d22..8a40de4 100644
--- a/bibmon/_preprocess.py
+++ b/bibmon/_preprocess.py
@@ -1,7 +1,7 @@
 import copy
 import pandas as pd
 import statsmodels.tsa.tsatools
-from ._outlier_handling import detect_outliers_iqr, remove_outliers
+import scipy.stats.mstats 
 
 ###############################################################################
 
@@ -11,7 +11,7 @@ class PreProcess ():
     
     Parameters
     ----------
-        
+            
         f_pp: list, optional
             List containing strings with names of methods to be used 
             in the preprocessing of the train data. The list of methods 
@@ -43,23 +43,22 @@ class PreProcess ():
         add_moving_average()
         
     * Noise treatment:
-        moving_average_filter()      
-    
+        moving_average_filter()   
+
     * Outlier handling:
-        detect_outliers_iqr();
-        remove_outliers()
+        process_outliers_iqr()
     
     """
-        
+            
     ###########################################################################
     
     def __init__(self, f_pp = None, a_pp = None, is_Y = False):
-
+ 
         self.is_Y = is_Y
-        self.f_pp = f_pp         
-        self._a_pp = a_pp      
+        self.f_pp = f_pp        
+        self._a_pp = a_pp     
         if self.f_pp is not None:
-            self.params_per_func = {f: {} for f in f_pp}         
+            self.params_per_func = {f: {} for f in f_pp}        
 
     ###########################################################################
 
@@ -79,7 +78,7 @@ def a_pp(self, a_pp):
             self.params_per_func = {f: {} for f in self.f_pp}
             
             if a_pp is not None:
-                                  
+                                
                 for pname, pval in a_pp.items():
                     func, param = pname.split('__',1)
                     self.params_per_func[func][param] = pval
@@ -98,10 +97,10 @@ def apply(self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
-        """        
+           Processed data.
+        """         
         
         df_processed = df
         
@@ -110,7 +109,7 @@ def apply(self, df, train_or_test = 'train'):
             df_processed = getattr(self, f)(df_processed,
                                             train_or_test, 
                                             **self.params_per_func[f])
-            
+                
         return df_processed
     
     ######################### 
@@ -130,10 +129,10 @@ def remove_empty_variables (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
-        """        
+           Processed data.
+        """           
         if train_or_test == 'train':
             return df.dropna(axis=1, how='all')
         elif train_or_test == 'test':
@@ -142,7 +141,7 @@ def remove_empty_variables (self, df, train_or_test = 'train'):
     ###########################################################################
 
     def remove_frozen_variables (self, df, train_or_test = 'train',
-                                    threshold = 1e-6): 
+                                 threshold = 1e-6): 
         """
         Removes variables whose variation falls below a given limit.
     
@@ -155,10 +154,10 @@ def remove_frozen_variables (self, df, train_or_test = 'train',
         threshold: float, optional
             Variance limit to consider a variable as frozen.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
-        """                                                                  
+           Processed data.
+        """                                              
         if not self.is_Y:
             if train_or_test == 'train':
                 return df.loc[:, df.var(ddof=1) > threshold]
@@ -185,9 +184,9 @@ def ffill_nan (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
+           Processed data.
         """    
         return df.ffill().bfill()
 
@@ -204,9 +203,9 @@ def remove_observations_with_nan (self, df, train_or_test = 'train'):
         train_or_test: string, optional
             Indicates which step the data corresponds to.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
+           Processed data.
         """    
         return df.dropna(axis=0, how='any')
     
@@ -225,10 +224,11 @@ def replace_nan_with_values (self, df, train_or_test = 'train', val = 0):
         val: int or float
             Value to be used in the replacement.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
-        """                                          
+        Processed data.
+        """    
+                                    
         return df.fillna(val)
 
     ###############
@@ -247,9 +247,9 @@ def back_to_units (self, df):
         df: pandas.DataFrame
             Data to be processed.
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
+        Processed data.
         """    
         return df*self.SD + self.Mu
     
@@ -268,9 +268,9 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
         mode: string, optional
             Type of normalization (standard, robust, m-robust or s-robust).
         Returns
-        ----------                 
+        ----------                
         : pandas.DataFrame
-            Processed data.
+        Processed data.
         """    
         if train_or_test == 'train':
             
@@ -279,13 +279,13 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
                 self.SD = df.std(ddof=1)
             elif mode == 'robust':
                 self.Mu = df.median()
-                self.SD = df.mad()                   
+                self.SD = df.mad()
             elif mode == 'm-robust':
                 self.Mu = df.median()
                 self.SD = df.std(ddof=1)
             elif mode == 's-robust':
                 self.Mu = df.mean()
-                self.SD = df.mad()                   
+                self.SD = df.mad()
             
             return (df - self.Mu)/self.SD
         
@@ -293,4 +293,173 @@ def normalize (self, df, train_or_test = 'train', mode = 'standard'):
         
             return (df - self.Mu)/self.SD
 
-    ##############################
\ No newline at end of file
+    ##############################
+    # ADDING DYNAMICS
+    ##############################
+        
+    ###########################################################################
+
+    def apply_lag (self, df, train_or_test = 'train', lag = 1):
+        """
+        Generation of time-delayed variables.
+
+        Parameters
+        ----------
+        df: pandas.DataFrame
+            Data to be processed.
+        train_or_test: string, optional
+            Indicates which step the data corresponds to.
+        lag: int, optional
+            Number of delays to be considered.
+        Returns
+        ----------                
+        : pandas.DataFrame
+        Processed data.
+        """    
+                        
+        if self.is_Y:
+            return df.iloc[lag:,:]
+        else:    
+            array_lagged = statsmodels.tsa.tsatools.lagmat(df, maxlag = lag, 
+                                                           trim = "forward", 
+                                                       original = 'in')[lag:,:]   
+            new_columns = []
+            for l in range(lag):
+                new_columns.append(df.columns+' - lag '+str(l+1))
+            columns_lagged = df.columns.append(new_columns)
+            index_lagged = df.index[lag:]
+            df_lagged = pd.DataFrame(array_lagged, index = index_lagged,
+                                     columns = columns_lagged)
+            
+            return df_lagged  
+        
+    ###########################################################################
+
+    def add_moving_average (self, df, train_or_test = 'train', WS = 10):
+        """
+        Adding variables filtered by moving average.
+        Attention! Do not confuse with moving_average_filter, in which
+        the original variables are not kept in the dataset.
+
+        Parameters
+        ----------
+        df: pandas.DataFrame
+            Data to be processed.
+        train_or_test: string, optional
+            Indicates which step the data corresponds to.
+        WS: int, optional
+            Window size of the filter.
+        Returns
+        ----------                
+        : pandas.DataFrame
+        Processed data.
+        """    
+        if self.is_Y:
+            return df
+                
+        new_df = copy.deepcopy(df)
+                
+        for column in df:
+            new_df[column+' MA'] = new_df[column].rolling(WS).mean()
+        
+        return new_df.drop(df.index[:WS])
+
+    ##############################
+    # NOISE TREATMENT
+    ##############################
+
+    ###########################################################################
+
+    def moving_average_filter (self, df,  train_or_test = 'train', WS = 10):
+        """
+        Moving average noise filter.
+
+        Parameters
+        ----------
+        df: pandas.DataFrame
+            Data to be processed.
+        train_or_test: string, optional
+            Indicates which step the data corresponds to.
+        WS: int, optional
+            Window size of the filter.
+        Returns
+        ----------                
+        : pandas.DataFrame
+        Processed data.
+        """    
+        new_df = copy.deepcopy(df)
+                
+        for column in df:
+            new_df[column] = new_df[column].rolling(WS).mean()
+            
+        if hasattr(df,'name'):
+            new_df.name = df.name
+                        
+        return new_df.drop(df.index[:WS])
+    
+    ####################
+    # OUTLIER HANDLING
+    ####################
+
+    ###########################################################################
+
+    def process_outliers_iqr(self, df: pd.DataFrame,
+                            train_or_test: bool = 'train',
+                            cols: list = None, 
+                            method: str = 'remove') -> pd.DataFrame:
+        """
+        Removes or handles univariate outliers in a DataFrame using 
+        the IQR (Interquartile Range) method.
+
+        Parameters
+        ----------
+        df: pandas.DataFrame
+            Data to be processed.
+        train_or_test: string, optional
+            Indicates which step the data corresponds to.
+        cols: list, optional
+            List of columns for which outliers will be removed or handled.
+            Default: None (which results in considering all cols)
+        method: str
+            Method for handling outliers. Can be 'remove' (removes outliers),
+            'median' (replaces outliers with the median), 
+            or 'winsorize' (applies winsorization).
+           Default: 'remove'.
+        Returns
+        ----------                
+        : pandas.DataFrame: 
+            DataFrame with outliers removed or handled.
+        """
+
+        if 'train_or_test' == 'test':
+            # it doesn't make sense to process outliers in the test data
+            # returning unchanged df:
+            return df
+
+        df_outliers = df.copy()
+
+        if cols is None:
+            cols = list(df.columns)
+
+        for col in cols:
+            Q1 = df_outliers[col].quantile(0.25)
+            Q3 = df_outliers[col].quantile(0.75)
+            IQR = Q3 - Q1
+            lower_bound = Q1 - 1.5 * IQR
+            upper_bound = Q3 + 1.5 * IQR
+
+            if method == 'remove':
+                df_outliers = df_outliers[(df_outliers[col] >= lower_bound) &
+                                           (df_outliers[col] <= upper_bound)]
+            elif method == 'median':
+                median = df_outliers[col].median()
+                df_outliers.loc[(df_outliers[col] < lower_bound) | 
+                                (df_outliers[col] > upper_bound), col] = median
+            elif method == 'winsorize':
+                df_outliers[col]=scipy.stats.mstats.winsorize(df_outliers[col], 
+                                                           limits=[0.05, 0.05])
+            else:
+                raise ValueError("Invalid method. Choose between 'remove', \
+                                 'median', or 'winsorize'.")
+
+        return df_outliers
\ No newline at end of file
diff --git a/test/test_outlier_handling.py b/test/test_outlier_handling.py
deleted file mode 100644
index e862140..0000000
--- a/test/test_outlier_handling.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import unittest
-import pandas as pd
-from bibmon._outlier_handling import detect_outliers_iqr, remove_outliers
-
-class TestOutlierHandling(unittest.TestCase):
-
-    def test_detect_outliers_iqr(self):
-        # Create a sample DataFrame with outliers
-        data = {'col1': [1, 2, 3, 4, 5, 100]}
-        df = pd.DataFrame(data)
-
-        # Run the detect_outliers_iqr function
-        df_outliers = detect_outliers_iqr(df, ['col1'])
-
-        # Check if the outlier was detected correctly
-        self.assertEqual(df_outliers['col1'].tolist(), [0, 0, 0, 0, 0, 1])
-
-    def test_remove_outliers_remove(self):
-        # Create a sample DataFrame with outliers
-        data = {'col1': [1, 2, 3, 4, 5, 100]}
-        df = pd.DataFrame(data)
-
-        # Run the remove_outliers function with method='remove'
-        df_outliers = remove_outliers(df, ['col1'], method='remove')
-
-        # Check if the outlier was removed correctly
-        self.assertEqual(df_outliers['col1'].tolist(), [1, 2, 3, 4, 5])
-
-    def test_remove_outliers_median(self):
-        # Create a sample DataFrame with outliers
-        data = {'col1': [1, 2, 3, 4, 5, 100]}
-        df = pd.DataFrame(data)
-
-        # Run the remove_outliers function with method='median'
-        df_outliers = remove_outliers(df, ['col1'], method='median')
-
-        # Check if the outlier was replaced by the median
-        self.assertEqual(df_outliers['col1'].tolist(), [1, 2, 3, 4, 5, 3])
-
-    def test_remove_outliers_winsorize(self):
-        # Create a sample DataFrame with outliers
-        data = {'col1': [1, 2, 3, 4, 5, 100]}
-        df = pd.DataFrame(data)
-
-        # Run the remove_outliers function with method='winsorize'
-        df_outliers = remove_outliers(df, ['col1'], method='winsorize')
-
-        # Check if the outlier was winsorized
-        self.assertTrue(df_outliers['col1'].tolist()[-1] < 100)  # Check if the value was limited
-
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file