Skip to content

Commit

Permalink
Add getBestModel function
Browse files Browse the repository at this point in the history
  • Loading branch information
jzsmoreno committed Nov 12, 2024
1 parent 5a7b3b9 commit e045dcd
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 5 deletions.
135 changes: 130 additions & 5 deletions fraud_detection/Soms_FraudDetection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
@author: J. Ivan Avalos
"""

import os
import pickle
import sys
from functools import partial
from typing import Callable

import numpy as np
import pandas as pd
from likelihood import walkers
from minisom import MiniSom
from sklearn.preprocessing import MinMaxScaler

Expand Down Expand Up @@ -119,12 +124,12 @@ def getMetrics(dataset, fraud_id):
f1_score = 0 # Avoid division by zero

# Output the metrics
print("MinSom accuracy : ", accuracy)
print("MinSom precision : ", precision)
print("MinSom recall : ", recall)
print("MinSom F1-score : ", f1_score)
# print("MinSom accuracy : ", accuracy)
# print("MinSom precision : ", precision)
# print("MinSom recall : ", recall)
# print("MinSom F1-score : ", f1_score)

return accuracy
return [accuracy, precision, recall, f1_score]


def load_model(filepath):
Expand All @@ -133,6 +138,121 @@ def load_model(filepath):
return model


def model(x, theta, sc=None, dataset=None):
# Apply the MinSom model to the input data
nx = int(round(theta[0], 0))
ny = int(round(theta[1], 0))
sigma = theta[2]
learning_rate = abs(theta[3])
num_iterations = int(round(theta[4], 0))
dist_int = theta[5]
som = somTrained(x, nx, ny, sigma, learning_rate, num_iterations)
try:
fraud_id = getFrauds(som, x, dist_int, sc)
metrics = getMetrics(dataset, fraud_id)
return np.array(metrics)
except:
return np.array([0.0, 0.0, 0.0, 0.0])


def getBestModel(
x, model, iterations: int = 100, num_models: int = 10, sc=None, dataset=None, patience: int = 5
) -> MiniSom:
# Initialize the best model and its performance
best_model = None
mean_performance = []
best_metric_f1 = 0
best_metric_acc = 0
min_error_so_far = np.inf
y = np.array([100.0, 100.0, 100.0, 100.0])
theta = np.array([5.0, 5.0, 0.5, 0.01, 50, 0.75])
conditions = [
2.0,
10.0,
2.0,
10.0,
0.01,
0.95,
0.001,
0.95,
10.0,
100.0,
0.1,
0.95,
]
partial_model = partial(model, sc=sc, dataset=dataset)

# Variable to track the number of consecutive iterations without improvement
no_improvement_counter = 0

for i in range(num_models):
print("model ", i)
# Initialize the model with random parameters
par, error = walkers(
20,
x,
y,
partial_model,
theta,
conditions,
0.05,
iterations,
0.25,
1.0 * 10**-3,
False,
None,
)
try:
n = np.where(error == min(error))[0][0]
except:
print(error)
_parameters = par[n]
print("min_error_so_far : ", min_error_so_far)
_model = somTrained(
x,
int(round(_parameters[0], 0)),
int(round(_parameters[1], 0)),
_parameters[2],
abs(_parameters[3]),
int(round(_parameters[4], 0)),
)
try:
fraud_id = getFrauds(som, x, _parameters[5], sc)
metrics = getMetrics(dataset, fraud_id)

# Check if the model's performance improves
if (best_metric_f1 < metrics[-1]) or (best_metric_acc < metrics[0]):
best_metric_f1 = metrics[-1]
best_metric_acc = metrics[0]
min_error_so_far = error[n]
best_model = _model
best_parameters = _parameters
mean_performance.append(metrics)
print("MinSom accuracy : ", mean_performance[-1][-4])
print("MinSom precision : ", mean_performance[-1][-3])
print("MinSom recall : ", mean_performance[-1][-2])
print("MinSom F1-score : ", mean_performance[-1][-1])

# Reset the no-improvement counter since we found a better model
no_improvement_counter = 0
else:
# Increment the no-improvement counter
no_improvement_counter += 1

# Early stopping: If no improvement for `patience` consecutive iterations, stop
if no_improvement_counter >= patience:
print(
f"Early stopping after {no_improvement_counter} iterations without improvement."
)
break

except:
print("error in getFrauds")
break

return best_model, mean_performance, best_parameters


if __name__ == "__main__":
# Cargar datos
dataset, features, isFraud = getData()
Expand All @@ -148,3 +268,8 @@ def load_model(filepath):
pickle.dump(som, outfile)

som = load_model(filepath)
print("\nSearching for the best model...")
best_model, mean_performance, best_parameters = getBestModel(
features_transformed, model, num_models=30, sc=sc, dataset=dataset
)
print("Best model MinSom F1-score : ", mean_performance[-1][-1])
Binary file modified fraud_detection/som.p
Binary file not shown.

0 comments on commit e045dcd

Please sign in to comment.