From e045dcd81832576255b4aab9ddda930893a4f047 Mon Sep 17 00:00:00 2001 From: jzsmoreno <42299052+jzsmoreno@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:53:42 -0600 Subject: [PATCH] Add getBestModel function --- fraud_detection/Soms_FraudDetection.py | 135 ++++++++++++++++++++++++- fraud_detection/som.p | Bin 4793 -> 4793 bytes 2 files changed, 130 insertions(+), 5 deletions(-) diff --git a/fraud_detection/Soms_FraudDetection.py b/fraud_detection/Soms_FraudDetection.py index c7acfcd..3fbabb7 100644 --- a/fraud_detection/Soms_FraudDetection.py +++ b/fraud_detection/Soms_FraudDetection.py @@ -4,10 +4,15 @@ @author: J. Ivan Avalos """ +import os import pickle +import sys +from functools import partial +from typing import Callable import numpy as np import pandas as pd +from likelihood import walkers from minisom import MiniSom from sklearn.preprocessing import MinMaxScaler @@ -119,12 +124,12 @@ def getMetrics(dataset, fraud_id): f1_score = 0 # Avoid division by zero # Output the metrics - print("MinSom accuracy : ", accuracy) - print("MinSom precision : ", precision) - print("MinSom recall : ", recall) - print("MinSom F1-score : ", f1_score) + # print("MinSom accuracy : ", accuracy) + # print("MinSom precision : ", precision) + # print("MinSom recall : ", recall) + # print("MinSom F1-score : ", f1_score) - return accuracy + return [accuracy, precision, recall, f1_score] def load_model(filepath): @@ -133,6 +138,121 @@ def load_model(filepath): return model +def model(x, theta, sc=None, dataset=None): + # Apply the MinSom model to the input data + nx = int(round(theta[0], 0)) + ny = int(round(theta[1], 0)) + sigma = theta[2] + learning_rate = abs(theta[3]) + num_iterations = int(round(theta[4], 0)) + dist_int = theta[5] + som = somTrained(x, nx, ny, sigma, learning_rate, num_iterations) + try: + fraud_id = getFrauds(som, x, dist_int, sc) + metrics = getMetrics(dataset, fraud_id) + return np.array(metrics) + except: + return np.array([0.0, 0.0, 0.0, 0.0]) + + +def getBestModel( + x, model, iterations: int = 100, num_models: int = 10, sc=None, dataset=None, patience: int = 5 +) -> MiniSom: + # Initialize the best model and its performance + best_model = None + mean_performance = [] + best_metric_f1 = 0 + best_metric_acc = 0 + min_error_so_far = np.inf + y = np.array([100.0, 100.0, 100.0, 100.0]) + theta = np.array([5.0, 5.0, 0.5, 0.01, 50, 0.75]) + conditions = [ + 2.0, + 10.0, + 2.0, + 10.0, + 0.01, + 0.95, + 0.001, + 0.95, + 10.0, + 100.0, + 0.1, + 0.95, + ] + partial_model = partial(model, sc=sc, dataset=dataset) + + # Variable to track the number of consecutive iterations without improvement + no_improvement_counter = 0 + + for i in range(num_models): + print("model ", i) + # Initialize the model with random parameters + par, error = walkers( + 20, + x, + y, + partial_model, + theta, + conditions, + 0.05, + iterations, + 0.25, + 1.0 * 10**-3, + False, + None, + ) + try: + n = np.where(error == min(error))[0][0] + except: + print(error) + _parameters = par[n] + print("min_error_so_far : ", min_error_so_far) + _model = somTrained( + x, + int(round(_parameters[0], 0)), + int(round(_parameters[1], 0)), + _parameters[2], + abs(_parameters[3]), + int(round(_parameters[4], 0)), + ) + try: + fraud_id = getFrauds(som, x, _parameters[5], sc) + metrics = getMetrics(dataset, fraud_id) + + # Check if the model's performance improves + if (best_metric_f1 < metrics[-1]) or (best_metric_acc < metrics[0]): + best_metric_f1 = metrics[-1] + best_metric_acc = metrics[0] + min_error_so_far = error[n] + best_model = _model + best_parameters = _parameters + mean_performance.append(metrics) + print("MinSom accuracy : ", mean_performance[-1][-4]) + print("MinSom precision : ", mean_performance[-1][-3]) + print("MinSom recall : ", mean_performance[-1][-2]) + print("MinSom F1-score : ", mean_performance[-1][-1]) + + # Reset the no-improvement counter since we found a better model + no_improvement_counter = 0 + else: + # Increment the no-improvement counter + no_improvement_counter += 1 + + # Early stopping: If no improvement for `patience` consecutive iterations, stop + if no_improvement_counter >= patience: + print( + f"Early stopping after {no_improvement_counter} iterations without improvement." + ) + break + + except: + print("error in getFrauds") + break + + return best_model, mean_performance, best_parameters + + if __name__ == "__main__": # Cargar datos dataset, features, isFraud = getData() @@ -148,3 +268,8 @@ def load_model(filepath): pickle.dump(som, outfile) som = load_model(filepath) + print("\nSearching for the best model...") + best_model, mean_performance, best_parameters = getBestModel( + features_transformed, model, num_models=30, sc=sc, dataset=dataset + ) + print("Best model MinSom F1-score : ", mean_performance[-1][-1]) diff --git a/fraud_detection/som.p b/fraud_detection/som.p index 1361af90044d508e7bd9e41c5c737c4b0cb09444..037322b974407a05bd6cdb399615d5b5a60f88ae 100644 GIT binary patch delta 3708 zcmV-?4ukQzCAlTAM*)9MvAw@={=46Rihh(mc>)<*NV0YM5;2m7%EI`^wgckamr1et ztmzncu29u z^o-&WO_VQ_uIR3}rFb#jpY@Q9d%Pvy-`-l=)qYgRl}rJNii~{4Pf1P0K+R09#k#@B zb!rA058$DCn4RnY`X^;jRJ=&;8|q-hGN2d6E8;V=apsus+}ghJ|Am+hqPo_J1jF!> z*3T~$l@;&o?xugV(=bgo@Su~c$4S8eB!LV5nKwzR6j@u+(8p1#cH;RBE&5*?rwSBi z(xKH`1t3|2RH+^cBTxW49#ioo*NXW$k3g;3P;H=A5E*_Ab7g9#2NR{Q-FaEp4;!XO z*Fgrb-8j~GZnU;&KfWbEOwVeJk}%y-Snfns+IizxT|a-#MYz)6!IOAI%^#7xgB&%r zeNco%ja_tbU)=G>=hQVxtuAT){<7G5A>tg#-!y=@PR^wJaly+q_l5sU3)JidXly_s}iNl`y)hYpXb= z1c0#At_FXY!Q7fL{_lxp9ep=Jn$s7u3f6N@Fi08O&B%=5|M+7K72gyJ~Pk4$mr-0hfX3BVmAake(Si*`Y&8?=FUUF8-C=XwbU<)e6N^+ zoVW*y4@xO71gZYFqAje7K&MyikkR4R2Vxy0ZXQL(khU9WA;|P0<0o$ZcXw~N)E|_? z7bt&AkKnK5fzEK_e8NKF7MVwmVT5q}^d?2&ETjUqajPt4$}a^zg5k%*N%K?6(RFEE zC|$`8$pWJ|k5?W{eGYcYy`s}(Ik+)39|)&9by7bXapr{L^f}J8Tp5g#h#7g$4%5^i zIW6O{%|O`e{#`1hBHQb>01taUM3}4Zz;J&s3onB9rtRcWDP;^c%J_IFEs-2~lS6`B zMe)p9zqt^^CEF@K6GX@`qdRUkfu2`?Aj1IttE3(bl2HW0A&3tiNJaZ7mUc@|>-BkG z6=6;Fsbm;o!gp5VT;+=3A0=1{_ zny`#DcNL&TqjXxumQ~UV55qC4)f0vy-_>-22rRusjJy{Yk)iF|Q=t(lx_8-7Ir)F1 z0_F_XID=YTi*l^ND@o^`)NjCH1B;4OF?KI=8N4qAU!nuYPm zgAtgc`L-lg-sEwZp|s6%QQ1jABG_e}O(=o|J|1%0;7J=L^h}`?%h4%)^1=dr8Ipu- zX|_+u7F<6y3C?W*i<*_wP(!WtME-_w*Ud}MX)INd-rRqiW-HHs z!ZR&NN_-^de=2-$mX57;*#5!;s=mM0QUd} zo&UW?xU?gB*0PB9pn8<3DWLufdNWLR>~hid4Piuz4<7$#@qi3D9t-4Z%WK~>ovC!- z4W8%jTV*i8Nq$x0S;(x|>Yvp9B=%A5kXN|8y3~QQ@yZDw&TDM2EJ}YXAU~Ija-T)U z=})BVG84li0y{67%1LHn)h5(b&KOX>1+yVBorD8f9fe?4L@PGYo&GY*^OnF!cq1ls zwX_G5=Sa7{+zawQO$45(0XZkEP8B=I2V+FXqnqG1Q8?C|uIZc>3-=Er5?F5>8pz`O z=`xT|vT#YFmBl`#C!2o^-&SYw3;l0Im^f1c`ix#_abGOz(8v+$6WX-x&tBhM)INeH zbKDkELwdGfNXmYsfQ|ze_ke_qQY2xiIh`|=9-7k|L z)tQ2?{%Z#^dJ4TP4vZb|ghYWqw&2UrYqkjn@k?k9t~+=2hA@A|r4-z0FEYrjp7v#w z>C8e1RD<>*(+zBI z3?$Hh*RdO-#JUt9@$8Om%6^`+& zO)$Yy??XiM;~K}fL5k2)X+oedl-@>;raCd8pK1W`5P5%X2a&O#EF9xIns1|ZA6|~r z!8PGQYzKa_)<5xsQ;jQ_-XZ7k3M0ilq5$z^aVFcy@ik-(H;E&bxWZ>%qN`*f7yqqA zCW%ip>on?1;W;Eg$hvmV7Tw#^;{*tu(O(k?db*cYsFh0-Li=8GT0tASOa+05sp6Z7 zP68%!#EXA1rEIxl<_w-?{0l9BGz$M28qq%eHSk@l*{6?774?o7|3`$%sw~&XygH@u zsN9Mf={vLKm3Y9<^!LrW@L0?J+SB1fE!{ZjS!v69RGLX zP)}U)t?&KXu=3;dX3Mny2opDVI#Od%qWU0`uIYd7zwCz^hh*!Fyug8B0A-E2v#jzO ztdo?I^gGf-RW`10rN&YM^RueCrMJ2k`wSz3gi# zYDJ8N1b$R9mrlo1nOKN1XyEfZyvRed0lTw$H-dKnK?^@hrTO9u;QbJnlaP!OL^-yX zPtjku#7F~jzELDYDR?LHKf3gBJx?5xl(7;^s;@Rb8SrBQU)Q8)6167=##B>xUo{kq zozR(b@pWM@5?8h;)ke|+^>PBQ-^Nu{Cwg`X5IWOsYFLzXlwynnaBp*zP2Q6e3#EU; z!F?)o1LQx*v=d19ce_7^6SfqTPsTsIr1?F$e8N9D%VXY`1=~MxnCerS7u`R}=W`DV zNZdceiY(uI2B1I1M$wJ(2j)Mgn>1xWT*5!yAo72!U4}oq%HATz1GGQ%{Q*#$8s9%- zz-c%E5b!FoSbKr-r>)ZKqSZB{?#OUdUyH;`Sdyp_#ATVA;@(P+Lu(+__+ zS;Go(+c47h)VLE^AKRJPs z4v;R?Kj&ljTizVVKTrQMhW`%SKV{Y!sISD`KTbiAas2$+Kaf_xrAk1(Kk3IfN#4ul zKi`3|r`EXSKcdDc;+-n6Ka;s>j7TECKbd`Z_Q74p%VnW4D*&=|3)TbH<0ur$0WSapwBj;_B^u-#>R! z+ec@RvOhg8-J@alfI(-9NNz-_1)j)<2@XWXf{a(Lbf`6>EQ*`NcmIVwyZ- z;@Uq;Hc7SuQQ|+m?7So-ncP1FYg}mfrma6=yWl+@ed9ljU4zkD#?n7M($}2?#g#uG zgcp3u=lVD2%s1Xa-o-*TiZ+d%H{$ z)yUvK6pnln*b(kOXCQidc)`j)>C0iqynVwzSHW= zUjh)gKPQ|Rt;|n`KclfGk~u-;KX**JN`o5fKkrS1Kj}i-KOgg-bDN9OKePl21$?60 zKj|sYj(;TCKhA$>lToo%p#S8Gl(Z_$v^s;#tfH}lRuPV zDOD<)>_656SGUtv-aoy^KMLI_u|F((j!L6Lls|FLztac9;Xf@QUhV+I)IU0i^tKHi z-ando^BJ`Zy+2UWhXvUS+&_rN-NL#Q zFHw|bO!_}COOuG+Q1U`>G3F;`={^1Ov-p5B0NY5KsbcZ#jnm delta 3708 zcmV-?4ukQzCAlTAM*)ARa3lahZF}F4+k5Ytu*&N_Kc~=SlLhM4;Zg=RK;Gx?Ylj7s zSH#!29&NciM?+uF`&s+0pd89k5=vFJZSkCRi^rf)wU5c6+f>7Iny*FB8_;|Uca2r^ z7c@)ZD7h+a7Zfr$gFQs)=3IFvDV+vFY=t;jM$uAdQi74{B)xyi9#M(l9I&J~ICP}# z19(l^dw%Vrp1gG4m}H8aX6;7}3+VW`c@?{`4$iy1Iao77t3m_}4H5{o0PAwy(W||@ zIv~5W(tBi1?)Q*iVHMVJO|;+sy_h-4rP=i$i%yzX z|F~Eb%m5L8r4n17B5OB-R_pqvzSq9ob#ADao$pgMP~o+D5p`V3P^MS2?0EuiFnn2| zIL(ou+q3&n?&E4HqDSBDaF)6|%b>26u(z4N+f%)bpMawk1Fx$LRZrRQ{<>to9OKK- zK=SMO;;et8*_#=%1Sh2Ns@d@Igzw$GZ>I=jwo~Ko=(-or)-e&ud;6vbW)tz{)9GB4 zLgO5Y>*%VeXVc=_OTHgImnjTFLrcA4F7nZ}Y*D#IlG!c{ zxU$GWAV!NQ5x>jB&mHN=UCk?)fOcsERk;B1OssWAEsoP2I9KHh+X6I% zLzd@wX;=wNz#Zx*mR{u)CDyjbSrZNo`S%XX+;^)$wXrs#SZyzDj{JJ?8Y}&sK zN#=}0OaxBLm0BcjStR|OTK}nupH}%~jmER7)_L`(#u20oK#`~<`uv_TJEi@q4X)XZ zng6H623GHWH>*oP1>Y3F4@=h)@)1tT8*W$6-Je8L+&wi#NSPJkrnLGLGn17V{e*ut z8FHumQ`2)u;}^3vfbz!1#jyRkI=4XysD#%rBU_$`<9I>}YCCO((pk4CW;{)@U^9-H z2JRnk1RLaev;&Q>|Fv~Rz1eK14><}Jw!OWSd_W{Gu}mGK9Tvbz&S#cNMkb?bEOUs1 z$yv7G4rWX%oZmkD__mZ*P5q(fD^Gv-67WFOo0bAr7BasE^igPVV80GzHn+B@0$gwR z%B0qx@0fjtT4vuTA$4E?gz|dDt^BkXPV7Q+5ef{g=Z+^j!WB8g=2^~d`4{m6>(GEx zgSAnAj*>D=1Lsx$yLox687L|9F3YkB`*@ z1kz|lOEiDg$)}7dtV)0UQhV53Cn~*bi#x3vZE--??i{-OtLX8fQ8*>Eo-wb+kxY3@sYU-GAd6e(bvU6dr{> z#0+q8*jL&eUk{|QF&Lk)lAi%o`ZYx%s{}RdnSv4Zk`&=YnKH><0wsTFw;kft&#nY* zXf@%0INFq(BFZW9AseV_*>X3PDXjn2y#>5*RqqdgNS4FwH zWu3>0c_NvBzCF+yK&pR&F>TS`fpk0LR(@bQpG!(7WdiF?Zg)gET)wV)rvI7kN0TtH zAP{C=XYR-axepBi&}~;{yXXxQQS8ct>ZQ9u!#j4c5bgF;cg_2FAa}nH=fk`hFNGUx zH2Rmtu<)RvgTQ?ZwmiDIHv0vh*5ZkL#=xv^d+1pPV;IuCrO|f%~!o);O|IQRQN#j|Gps21rCh-4_ zz{1Mw53KkY(Plpqj*hii<8_Tg znr0L0satx=_5#Px*E3OQjc&E0KD}4_%rs<{6rdO;3p@au*DGky7kJ_ear_|4>_C`s ztmUf0ZP99~yEgUWzOBrF1JKtR{P{;U%(145FzOLm*iQ148SDxrV@%Ty=`NkoAEEn= zdk6CJQAU3R#7tlHE;G>fzVowo8h#VlT=7orJQ{7)a?)jGpbnS5|@9-vC^F=3|P?80yI4lezY^)N%|#7+<$ez5jr{g85_2iO5L#()K1IYdTwBiH6U!HHxj>kaR512lEUJx+@zk(jh7>^L;#Ic`pp?xF`xNyy zGqPV?Lr+;Ib*3t#u0(kBp zMS0vmQNA@Lh&AFrl29R4wkq8}+@d||myWwXNt}rM)KV8{SiowOi zKdbiYu3*`}KUtxnoih4{KW`!WvVkAQKi#D(K(saAKYfWR?geP6KUDL?IkX^eKjTVA z`qshXKX|a7fI67zKmM(?4rhNs)IX@F`zY+lzCY@&r596R(LeqC`IFRh<3GNnIpKd` z;y(o-s|>B&vp*e^zNfWSW?NfwGXWu_jK2z+6h_ydoTOpu6>5)H1dRjg?<>fyrV_ki8#Lqt?0ia4C z?$AF%H(4TJZN@+HN63G{RMFf&CR3}Lt})_2A?;JF@PFJtypl;{((bW8-x5YFSoShEzOL3kW@&##9_gCVKW^_}nVyT; zKM}TQreCR|Kb2<(Ce`qX_*d07tvp*6BpbGi%=0C??!H+@&%s;lxIPBNFlRvEZ zNU%cB(?5mDEuAnL-9Jqq)%Qjfz(2t5hEhzCc|Te0wM%`-&_BAvWd;!x>OVFmsOEaC z(m&vny02a6zd!hh2Cq9&+&|Ihj9o8Y+&@(DkR&Rp+dn_NZdl9r@WnqjNl-@MXzM@w zq4wx!4!%Ddt9p680ERz6$ex$Gmg_&IjrH;SnB6}tZv}OmNX9=Q8x7PL%6yaW4>L!C z*$P}3+VVeYB?>JvU-~~cV~uNoH1j`>$ZX7ickw@t(EP6k*!Dl|AE_^stN1@_3pXmV adh|bsf_Yv!!1X^6(BCBpAo{au5Ksblrbrh6