-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Create `autoencoders.py` * Add `AutoClassifier` class * Update `autoencoders.py` * Add `concatenate` to `classifier` output * [FIX] `AutoClassifier` architecture * Update `tools.py` * Add normalization to `DataFrameEncoder` * minor changes * Update `numeric_tools.py` * Update `xi_corr` function * Update `autoencoders.py` * Add `call_existing_code` * Add `build_model` * Add `setup_model` * [FIX] `autoencoders.py` * minor changes * Update `VERSION` and `setup.py` * Add docstrings * Update `generate-docs.yml`
- Loading branch information
Showing
7 changed files
with
294 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.2.12 | ||
1.2.13 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .autoencoders import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
import os | ||
from functools import partial | ||
|
||
import keras_tuner | ||
import numpy as np | ||
import pandas as pd | ||
import tensorflow as tf | ||
from pandas.core.frame import DataFrame | ||
from tensorflow.keras.models import Model | ||
|
||
from likelihood.tools import OneHotEncoder | ||
|
||
|
||
class AutoClassifier(Model): | ||
""" | ||
An auto-classifier model that automatically determines the best classification strategy based on the input data. | ||
Attributes: | ||
- input_shape: The shape of the input data. | ||
- num_classes: The number of classes in the dataset. | ||
- units: The number of neurons in each hidden layer. | ||
- activation: The type of activation function to use for the neural network layers. | ||
Methods: | ||
__init__(self, input_shape, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters. | ||
""" | ||
|
||
def __init__(self, input_shape, num_classes, units, activation): | ||
""" | ||
Initializes an AutoClassifier instance with the given parameters. | ||
Parameters | ||
---------- | ||
input_shape : `int` | ||
The shape of the input data. | ||
num_classes : `int` | ||
The number of classes in the dataset. | ||
units : `int` | ||
The number of neurons in each hidden layer. | ||
activation : `str` | ||
The type of activation function to use for the neural network layers. | ||
Returns | ||
------- | ||
`None` | ||
""" | ||
super(AutoClassifier, self).__init__() | ||
self.units = units | ||
self.shape = input_shape | ||
|
||
self.encoder = tf.keras.Sequential( | ||
[ | ||
tf.keras.layers.Dense(units=units, activation=activation), | ||
tf.keras.layers.Dense(units=int(units / 2), activation=activation), | ||
] | ||
) | ||
|
||
self.decoder = tf.keras.Sequential( | ||
[ | ||
tf.keras.layers.Dense(units=units, activation=activation), | ||
tf.keras.layers.Dense(units=input_shape, activation=activation), | ||
] | ||
) | ||
|
||
self.classifier = tf.keras.Sequential( | ||
[tf.keras.layers.Dense(num_classes, activation="softmax")] | ||
) | ||
|
||
def call(self, x): | ||
encoded = self.encoder(x) | ||
decoded = self.decoder(encoded) | ||
combined = tf.concat([decoded, encoded], axis=1) | ||
classifier = self.classifier(combined) | ||
return classifier | ||
|
||
|
||
def call_existing_code( | ||
units: int, | ||
activation: str, | ||
threshold: float, | ||
optimizer: str, | ||
input_shape: None | int = None, | ||
num_classes: None | int = None, | ||
) -> AutoClassifier: | ||
""" | ||
Calls an existing AutoClassifier instance. | ||
Parameters | ||
---------- | ||
units : `int` | ||
The number of neurons in each hidden layer. | ||
activation : `str` | ||
The type of activation function to use for the neural network layers. | ||
threshold : `float` | ||
The threshold for the classifier. | ||
optimizer : `str` | ||
The type of optimizer to use for the neural network layers. | ||
input_shape : `None` | `int` | ||
The shape of the input data. | ||
num_classes : `int` | ||
The number of classes in the dataset. | ||
Returns | ||
------- | ||
`AutoClassifier` | ||
The AutoClassifier instance. | ||
""" | ||
model = AutoClassifier( | ||
input_shape=input_shape, num_classes=num_classes, units=units, activation=activation | ||
) | ||
model.compile( | ||
optimizer=optimizer, | ||
loss="categorical_crossentropy", | ||
metrics=[tf.keras.metrics.F1Score(threshold=threshold)], | ||
) | ||
return model | ||
|
||
|
||
def build_model(hp, input_shape: None | int, num_classes: None | int) -> AutoClassifier: | ||
"""Builds a neural network model using Keras Tuner's search algorithm. | ||
Parameters | ||
---------- | ||
hp : `keras_tuner.HyperParameters` | ||
The hyperparameters to tune. | ||
input_shape : `None` | `int` | ||
The shape of the input data. | ||
num_classes : `int` | ||
The number of classes in the dataset. | ||
Returns | ||
------- | ||
`keras.Model` | ||
The neural network model. | ||
""" | ||
units = hp.Int("units", min_value=int(input_shape * 0.2), max_value=input_shape, step=2) | ||
activation = hp.Choice("activation", ["sigmoid", "relu", "tanh", "selu", "softplus"]) | ||
optimizer = hp.Choice("optimizer", ["sgd", "adam", "adadelta"]) | ||
threshold = hp.Float("threshold", min_value=0.1, max_value=0.9, sampling="log") | ||
|
||
model = call_existing_code( | ||
units=units, | ||
activation=activation, | ||
threshold=threshold, | ||
optimizer=optimizer, | ||
input_shape=input_shape, | ||
num_classes=num_classes, | ||
) | ||
return model | ||
|
||
|
||
def setup_model( | ||
data: DataFrame, target: str, epochs: int, train_size: float = 0.7, seed=None, **kwargs | ||
) -> AutoClassifier: | ||
"""Setup model for training and tuning. | ||
Parameters | ||
---------- | ||
data : `DataFrame` | ||
The dataset to train the model on. | ||
target : `str` | ||
The name of the target column. | ||
epochs : `int` | ||
The number of epochs to train the model for. | ||
train_size : `float` | ||
The proportion of the dataset to use for training. | ||
seed : `Any` | `int` | ||
The random seed to use for reproducibility. | ||
Keyword Arguments: | ||
---------- | ||
Additional keyword arguments to pass to the model. | ||
max_trials : `int` | ||
The maximum number of trials to perform. | ||
directory : `str` | ||
The directory to save the model to. | ||
project_name : `str` | ||
The name of the project. | ||
objective : `str` | ||
The objective to optimize. | ||
verbose : `bool` | ||
Whether to print verbose output. | ||
Returns | ||
------- | ||
model : `AutoClassifier` | ||
The trained model. | ||
""" | ||
max_trials = kwargs["max_trials"] if "max_trials" in kwargs else 10 | ||
directory = kwargs["directory"] if "directory" in kwargs else "./my_dir" | ||
project_name = kwargs["project_name"] if "project_name" in kwargs else "get_best" | ||
objective = kwargs["objective"] if "objective" in kwargs else "val_loss" | ||
verbose = kwargs["verbose"] if "verbose" in kwargs else True | ||
|
||
X = data.drop(columns=target) | ||
y = data[target] | ||
# Verify if there are categorical columns in the dataframe | ||
assert ( | ||
X.select_dtypes(include=["object"]).empty == True | ||
), "Categorical variables within the DataFrame must be encoded, this is done by using the DataFrameEncoder from likelihood." | ||
validation_split = 1.0 - train_size | ||
# Create my_dir path if it does not exist | ||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
|
||
# Create a Classifier instance | ||
y_encoder = OneHotEncoder() | ||
y = y_encoder.encode(y.to_list()) | ||
X = X.to_numpy() | ||
X = np.asarray(X).astype(np.float32) | ||
|
||
y = pd.DataFrame(y, columns=["class_0", "class_1"]) | ||
y = y.to_numpy() | ||
y = np.asarray(y).astype(np.float32) | ||
|
||
input_shape = X.shape[1] | ||
num_classes = y.shape[1] | ||
global build_model | ||
build_model = partial(build_model, input_shape=input_shape, num_classes=num_classes) | ||
|
||
# Create the AutoKeras model | ||
tuner = keras_tuner.RandomSearch( | ||
hypermodel=build_model, | ||
objective=objective, | ||
max_trials=max_trials, | ||
directory=directory, | ||
project_name=project_name, | ||
seed=seed, | ||
) | ||
|
||
tuner.search(X, y, epochs=epochs, validation_split=validation_split) | ||
models = tuner.get_best_models(num_models=2) | ||
best_model = models[0] | ||
|
||
# save model | ||
best_model.save("./my_dir/best_model.keras") | ||
|
||
if verbose: | ||
tuner.results_summary() | ||
else: | ||
# Load the best model from the directory | ||
best_model = tf.keras.models.load_model("./my_dir/best_model.keras") | ||
|
||
return best_model | ||
|
||
|
||
######################################################################################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.