Skip to content

Commit

Permalink
Merge pull request #71 from afraniomelo/main
Browse files Browse the repository at this point in the history
PR-67 Adaptation
  • Loading branch information
afraniomelo authored Jan 8, 2025
2 parents a139a97 + 7391c5c commit 2f6ce06
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 5 deletions.
10 changes: 6 additions & 4 deletions bibmon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
from ._esn import ESN
from ._sbm import SBM
from ._sklearn_regressor import sklearnRegressor
from ._sklearn_manifold import sklearnManifold
from ._preprocess import PreProcess
from ._load_data import load_tennessee_eastman, load_real_data
from ._bibmon_tools import train_val_test_split, complete_analysis, comparative_table, targets_comparative_table, spearmanr_dendrogram, create_df_with_dates, create_df_with_noise, align_dfs_by_rows

__all__ = ['Autoencoder','PCA','ESN','SBM',
'sklearnRegressor', 'PreProcess',
'load_tennessee_eastman', 'load_real_data',
'train_val_test_split', 'complete_analysis', 'comparative_table',
'sklearnRegressor','sklearnManifold' ,
'PreProcess','load_tennessee_eastman',
'load_real_data', 'train_val_test_split',
'complete_analysis', 'comparative_table',
'targets_comparative_table',
'spearmanr_dendrogram', 'create_df_with_dates',
'spearmanr_dendrogram', 'create_df_with_dates',
'create_df_with_noise', 'align_dfs_by_rows']
156 changes: 156 additions & 0 deletions bibmon/_sklearn_manifold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# -*- coding: utf-8 -*-
"""
Created on Tue 8 08:14:01 2024
@author: leovo
"""


import matplotlib.pyplot as plt
import pandas as pd

from ._generic_model import GenericModel

###############################################################################

class sklearnManifold(GenericModel):
"""
Interface for sklearn manifold learning models.
Parameters
----------
manifold_model: any manifold model that uses the sklearn interface.
For example:
* sklearn.manifold.MDS,
* sklearn.manifold.Isomap,
* sklearn.manifold.TSNE,
* sklearn.manifold.LocallyLinearEmbedding,
* etc....
"""

###########################################################################

def __init__(self, manifold_model):
self.has_Y = False # Default set to False, because Manifold algorithms don't require a target variable
self.manifold_model = manifold_model

self.name = self.manifold_model.__class__.__name__

###########################################################################

def train_core(self):
"""
Fits the manifold model using the training data.
"""
## Check if the input is a pandas DataFrame
if isinstance(self.X_train, pd.DataFrame):
# If it's a DataFrame, use the `.values` attribute to extract numpy array
self.transformed_data = self.manifold_model.fit_transform(self.X_train.values)
else:
# If it's already a numpy array, use it directly
self.transformed_data = self.manifold_model.fit_transform(self.X_train)

###########################################################################

def fit_transform(self,X):
"""
Fits the clustering method and returns the transformed data
"""

self.X_train=X #Attributing training data to variable X passed in the m
self.train_core() #Training the method with train_core

"""
Returning the transformed data for visualization
"""
return self.transformed_data


def map_from_X(self,X_test):
"""
Applies the transformation to a new dataset. Note that some manifold
models, like TSNE, may not have a direct `transform` method.
"""
if hasattr(self.manifold_model, 'transform'):
return self.manifold_model.transform(X_test)
else:
raise NotImplementedError("This manifold model does not support transformation on new data.")

###########################################################################

def set_hyperparameters(self, params_dict):
"""
Sets the hyperparameters for the manifold model.
"""
for key, value in params_dict.items():
setattr(self.manifold_model, key, value)

###########################################################################

def transform(self, X_test):
"""
Transforms the input data using the trained manifold model by calling map_from_X.
Parameters
----------
X_test: array-like or DataFrame
The new data to transform.
Returns
-------
transformed_data: array-like
The transformed data.
"""
return self.map_from_X(X_test)

def plot_embedding(self):
"""
Plots the 2D or 3D embedding resulting from the manifold model.
"""
if self.transformed_data.shape[1] == 2:
plt.scatter(self.transformed_data[:, 0], self.transformed_data[:, 1], s=50, cmap='viridis')
plt.title(f"{self.name} 2D Embedding")
plt.xlabel("Component 1")
plt.ylabel("Component 2")
elif self.transformed_data.shape[1] == 3:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(self.transformed_data[:, 0], self.transformed_data[:, 1], self.transformed_data[:, 2], s=50, cmap='viridis')
ax.set_title(f"{self.name} 3D Embedding")
ax.set_xlabel("Component 1")
ax.set_ylabel("Component 2")
ax.set_zlabel("Component 3")
else:
print("Embedding dimensionality is not 2D or 3D; custom plotting is required.")

def clusters_visualization(self, X):
"""
Fits the manifold model, transforms the data, and plots the resulting 2D or 3D embedding.
Parameters
----------
X: array-like or DataFrame
The data to fit and transform.
"""
# Perform fit_transform and store the transformed data
transformed_data = self.fit_transform(X)

# Plot the 2D or 3D embedding based on the transformed data
if transformed_data.shape[1] == 2:
plt.scatter(transformed_data[:, 0], transformed_data[:, 1], s=50, cmap='viridis')
plt.title(f"{self.name} 2D Embedding")
plt.xlabel("Component 1")
plt.ylabel("Component 2")
elif transformed_data.shape[1] == 3:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(transformed_data[:, 0], transformed_data[:, 1], transformed_data[:, 2], s=50, cmap='viridis')
ax.set_title(f"{self.name} 3D Embedding")
ax.set_xlabel("Component 1")
ax.set_ylabel("Component 2")
ax.set_zlabel("Component 3")
else:
print("Embedding dimensionality is not 2D or 3D; custom plotting is required.")

plt.show()
78 changes: 78 additions & 0 deletions test/test_manifold_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 8 09:34:01 2024
@author: Leonardo Voltolini
"""

import bibmon
from sklearn.preprocessing import StandardScaler
import numpy as np



SC=StandardScaler()

# loading the data from TEP
df_train, df_test = bibmon.load_tennessee_eastman(train_id = 0,
test_id = 1)

#Transforming training and testing data using StandardScaler
X_train=SC.fit_transform(df_train)
X_test=SC.transform(df_test)

#Concatenating train and the test, because manifold models normally
#don't require a separation between training and testing folds
X=np.concatenate( (X_train, X_test),axis=0)


for attr in bibmon.__all__:
a = getattr(bibmon,attr)
if isinstance(a, type):
'''
Verifying if the attribute a is generic model from sklearn manifold
and then applying the adequate model as wanted
'''
if a.__base__ == bibmon._generic_model.GenericModel:
if a == bibmon.sklearnManifold:
from sklearn.manifold import TSNE
model = a(TSNE(n_components=2)) #Creating the model

'''
Computing the embeeding data from fit_transform function
and subsequently plotting the clustering in the appropriate
dimension
'''
embedded_data=model.fit_transform(X)
model.plot_embedding()

#%%

'''
This implementation does the same as previous cell, but, it applies
a distinct model and automatically computes fit_transform and clusters
visualization
'''

for attr in bibmon.__all__:
a = getattr(bibmon,attr)
if isinstance(a, type):
if a.__base__ == bibmon._generic_model.GenericModel:
if a == bibmon.sklearnManifold:
from sklearn.manifold import MDS
model = a(MDS(n_components=3))

'''
The below code transforms the data and presents the
graph for cluster visualization
'''
model.clusters_visualization(X)









6 changes: 5 additions & 1 deletion test/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def test_models_with_df_inputs():
for attr in bibmon.__all__:
a = getattr(bibmon,attr)
if isinstance(a, type):
if a.__base__ == bibmon._generic_model.GenericModel:
if a.__base__ == bibmon._generic_model.GenericModel:
if a == bibmon.sklearnManifold:
continue
if a == bibmon.sklearnRegressor:
from sklearn.linear_model import LinearRegression
m = a(LinearRegression())
Expand Down Expand Up @@ -148,6 +150,8 @@ def test_models_with_np_array_inputs():
a = getattr(bibmon,attr)
if isinstance(a, type):
if a.__base__ == bibmon._generic_model.GenericModel:
if a == bibmon.sklearnManifold:
continue
if a == bibmon.sklearnRegressor:
from sklearn.linear_model import LinearRegression
m = a(LinearRegression())
Expand Down

0 comments on commit 2f6ce06

Please sign in to comment.