Merge pull request #8 from jakekrell/main

v3.0.0
ESMS-Group-Public · Nov 2, 2023 · 7013a0d · 7013a0d
2 parents 1443aec + ab40e76
commit 7013a0d
Show file tree

Hide file tree

Showing 5 changed files with 1,095 additions and 612 deletions.
diff --git a/Examples/GP Integrate/GP_integrate_example.py b/Examples/GP Integrate/GP_integrate_example.py
@@ -1,69 +1,80 @@
-import numpy as np
-import FoKL
-from FoKL import FoKLRoutines
-from FoKL import getKernels
-import matplotlib.pyplot as plt
-
-
-#inputs
-traininputs = np.loadtxt('traininputs.csv.txt',dtype=float,delimiter=',')
-traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',')
-traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',')
-y = np.loadtxt('y.txt',dtype=float,delimiter=',')
-utest = np.loadtxt('utest.csv',dtype=float,delimiter=',')
-# data
-relats_in = [1,1,1,1,1,1]
-
-# generating phis from the spline coefficients text file in emulator_python
-# combined with the splineconvert script
-
-phis = getKernels.sp500()
-
-# a
-a = 1000
-
-# b
-b = 1
-
-# atau
-atau = 4
-
-# btau
-btau = 0.6091
-
-# tolerance
-tolerance = 3
-relats_in  = [1,1,1,1,1,1]
-# draws
-draws = 2000
-
-gimmie = False
-way3 = True
-threshav = 0
-threshstda = 0
-threshstdb = 100
-aic = False
-n,m = np.shape(y)
-norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])]
-norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])]
-norms = np.transpose([norms1,norms2])
-# Running emulator_Xin routine and visualization
-model1 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)
-model2 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, 1, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)
-
-betas1, mtx1, evs1 = model1.fit(traininputs, traindata1)
-betas2, mtx2, evs2 = model2.fit(traininputs, traindata2)
-
-betas1 = betas1[1000:]
-betas2 = betas2[1000:]
-
-start = 4
-stop = 3750*4
-stepsize = 4
-used_inputs = [[1,1,1],[1,1,1]]
-ic = y[:,int(m/2)-1]
-
-T,Y = FoKLRoutines.FoKL.GP_Integrate([np.mean(betas1,axis=0),np.mean(betas2,axis=0)], [mtx1,mtx2], utest, norms, phis, start, stop, ic, stepsize, used_inputs)
-
-plt.plot(T,Y[0],T,y[0][3750:7500])
-plt.plot(T,Y[1],T,y[1][3750:7500])
+from FoKL import FoKLRoutines
+from FoKL.GP_Integrate import GP_Integrate
+import numpy as np
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+
+
+# ======================================================================================================================
+
+# Inputs:
+traininputs = np.loadtxt('traininputs.txt',dtype=float,delimiter=',')
+traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',')
+traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',')
+traindata = [traindata1, traindata2]
+y = np.loadtxt('y.txt',dtype=float,delimiter=',')
+utest = np.loadtxt('utest.csv',dtype=float,delimiter=',')
+
+# User-defined constant hyperparameters (to override default values):
+relats_in = [1,1,1,1,1,1]
+a = 1000
+b = 1
+draws = 2000
+way3 = True
+threshav = 0
+threshstda = 0
+threshstdb = 100
+
+# Initializing FoKL model with constant hypers:
+model = FoKLRoutines.FoKL(relats_in=relats_in, a=a, b=b, draws=draws, way3=way3, threshav=threshav, threshstda=threshstda, threshstdb=threshstdb)
+
+# User-defined variable hyperparameters (to iterate through either for different data or for sweeping the same data):
+btau = [0.6091, 1]
+
+# Iterating through datasets:
+betas = []
+mtx = []
+for ii in range(2):
+
+    print("\nCurrently fitting model to dataset", int(ii+1),". . .")
+
+    # Updating model with current iteration of variable hyperparameters:
+    model.btau = btau[ii]
+
+    # Running emulator routine for current model/data:
+    betas_i, mtx_i, _ = model.fit(traininputs, traindata[ii])
+
+    print("Done!")
+
+    # Store values for post-processing (i.e., GP integration):
+    betas.append(betas_i[1000:])
+    mtx.append(mtx_i)
+
+    # Clear all attributes (except for hypers) so previous results do not influence the next iteration:
+    model.clear()
+
+# ======================================================================================================================
+
+# Integrating with FoKL.GP_Integrate():
+
+phis = model.phis # same for all models iterated through, so just grab value from most recent model
+
+n,m = np.shape(y)
+norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])]
+norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])]
+norms = np.transpose([norms1,norms2])
+
+start = 4
+stop = 3750*4
+stepsize = 4
+used_inputs = [[1,1,1],[1,1,1]]
+ic = y[:,int(m/2)-1]
+
+T, Y = GP_Integrate([np.mean(betas[0],axis=0),np.mean(betas[1],axis=0)], [mtx[0],mtx[1]], utest, norms, phis, start, stop, ic, stepsize, used_inputs)
+
+plt.figure()
+plt.plot(T,Y[0],T,y[0][3750:7500])
+plt.plot(T,Y[1],T,y[1][3750:7500])
+plt.show()
+
diff --git a/Examples/Sigmoid/SigmoidTest.py b/Examples/Sigmoid/SigmoidTest.py
@@ -1,150 +1,64 @@
-import numpy as np
-import FoKL
 from FoKL import FoKLRoutines
-from FoKL import getKernels
-
-# Setting input parameters
-
-#inputs
-X = np.loadtxt('X.csv',dtype=float,delimiter=',')
-Y = np.loadtxt('Y.csv',dtype=float,delimiter=',')
-
-m, n = np.shape(X)  # reading dimensions of input variables
-X_reshape = np.reshape(X, (m*n,1), order='F')  # reshaping, fortran index order
-
-m, n = np.shape(Y)
-Y_reshape = np.reshape(Y, (m*n,1), order='F')
-
-inputs = []
-
-for i in range(len(X_reshape)):
-    inputs.append([float(X_reshape[i]), float(Y_reshape[i])])
-
-# data
-data_load = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',')
-
-data = np.reshape(data_load, (m*n,1), order='F')
-
-phis = getKernels.sp500()
-
-# a
-a = 9
-
-# b
-b = 0.01
-
-# atau
-atau = 3
-
-# btau
-btau = 4000
-
-# tolerance
-tolerance = 3
-relats_in = []
-# draws
-draws = 1000
-
-gimmie = False
-way3 = False
-threshav = 0.05
-threshstda = 0.5
-threshstdb = 2
-aic = True
-
-# define model
-model = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)
-"""
-        initialization inputs:
-
-        'relats' is a boolean matrix indicating which terms should be excluded
-        from the model building; for instance if a certain main effect should be
-        excluded relats will include a row with a 1 in the column for that input
-        and zeros elsewhere; if a certain two way interaction should be excluded
-        there should be a row with ones in those columns and zeros elsewhere
-        to exclude no terms 'relats = np.array([[0]])'. An example of excluding
-        the first input main effect and its interaction with the third input for
-        a case with three total inputs is:'relats = np.array([[1,0,0],[1,0,1]])'
-
-        'phis' are a data structure with the spline coefficients for the basis
-        functions, built with 'spline_coefficient.txt' and 'splineconvert' or
-        'spline_coefficient_500.txt' and 'splineconvert500' (the former provides
-        25 basis functions: enough for most things -- while the latter provides
-        500: definitely enough for anything)
-
-        'a' and 'b' are the shape and scale parameters of the ig distribution for
-        the observation error variance of the data. the observation error model is
-        white noise choose the mode of the ig distribution to match the noise in
-        the output dataset and the mean to broaden it some
-
-        'atau' and 'btau' are the parameters of the ig distribution for the 'tau
-        squared' parameter: the variance of the beta priors is iid normal mean
-        zero with variance equal to sigma squared times tau squared. tau squared
-        must be scaled in the prior such that the product of tau squared and sigma
-        squared scales with the output dataset
-
-        'tolerance' controls how hard the function builder tries to find a better
-        model once adding terms starts to show diminishing returns. a good
-        default is 3 -- large datasets could benefit from higher values
-
-        'draws' is the total number of draws from the posterior for each tested
-        model
-
-        'draws' is the total number of draws from the posterior for each tested
-        
-         'threshav' is a threshold for proposing terms for elimination based on
-         their mean values (larger thresholds lead to more elimination)
-        
-         'threshstda' is a threshold standard deviation -- expressed as a fraction 
-         relative to the mean -- that pairs with 'threshav'.
-         terms with coefficients that are lower than 'threshav' and higher than
-         'threshstda' will be proposed for elimination (elimination will happen or not 
-         based on relative BIC values)
-        
-         'threshstdb' is a threshold standard deviation that is independent of the
-         mean value of the coefficient -- all with a standard deviation (fraction 
-         relative to mean) exceeding
-         this value will be proposed for elimination
-         
-        'gimmie' is a boolean causing the routine to return the most complex
-        model tried instead of the model with the optimum bic
-
-        'aic' is a boolean specifying the use of the aikaike information
-        criterion 
-    """
-# Running emulator routine and visualization, fit model to data
-betas, mtx, evs = model.fit(inputs, data)
-"""
-    inputs: 
-        'inputs' - normalzied inputs
-        
-        'data' - results
-    
-    outputs:
-         'betas' are a draw from the posterior distribution of coefficients: matrix, with
-         rows corresponding to draws and columns corresponding to terms in the GP
-        
-         'mtx' is the basis function interaction matrix from the
-         best model: matrix, with rows corresponding to terms in the GP (and thus to the 
-         columns of 'betas' and columns corresponding to inputs. a given entry in the 
-         matrix gives the order of the basis function appearing in a given term in the GP.
-         all basis functions indicated on a given row are multiplied together.
-         a zero indicates no basis function from a given input is present in a given term
-        
-         'ev' is a vector of BIC values from all of the models
-         evaluated
-"""
-
-meen, bounds, rmse = model.coverage3(inputs, data, draws, 1)
-"""
-    inputs:
-        'inputs' - normalized test inputs
-        'data' - test set results
-        'draws' - number of beta models used from model.fit()
-        plotting binary - 1 = show plot, 0 = do not show plot
-        
-    outputs:
-        'meen' - indexed predicted value
-        'bounds' - 95% confidence interval for each point
-        'rmse' - root mean square error
-"""
+import numpy as np
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+
+
+# ======================================================================================================================
+
+# Inputs:
+X_grid = np.loadtxt('X.csv',dtype=float,delimiter=',')
+Y_grid = np.loadtxt('Y.csv',dtype=float,delimiter=',')
+
+# Data:
+Z_grid = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',')
+
+# Reshaping grid matrices into vectors via fortran index order:
+m, n = np.shape(X_grid) # = np.shape(Y_grid) = np.shape(Z_grid) = dimensions of grid
+X = np.reshape(X_grid, (m*n,1), order='F')
+Y = np.reshape(Y_grid, (m*n,1), order='F')
+Z = np.reshape(Z_grid, (m*n,1), order='F')
+
+# Initializing FoKL model with some user-defined hyperparameters (leaving others blank for default values):
+model = FoKLRoutines.FoKL(a=9, b=0.01, atau=3, btau=4000, aic=True)
+
+# Training FoKL model on a random selection of 100% (or 100%, 80%, 60%, etc.) of the dataset:
+train_all = [1] # = [1, 0.8, 0.6] etc. if sweeping through the percentage of data to train on
+betas_all = []
+mtx_all = []
+evs_all = []
+meen_all = []
+bounds_all = []
+rmse_all = []
+for train in train_all:
+
+    print("\nCurrently fitting model to",train * 100,"% of data . . .")
+
+    # Running emulator routine to fit model to training data as a function of the corresponding training inputs:
+    betas, mtx, evs = model.fit([X, Y], Z, train=train)
+
+    # Provide feedback to user before the figure from coverage3() pops up and pauses the code:
+    print("\nDone! Please close the figure to continue.\n")
+
+    # Evaluating and visualizing predicted values of data as a function of all inputs (train set plus test set):
+    title = 'FoKL Model Trained on ' + str(train * 100) + '% of Data'
+    meen, bounds, rmse = model.coverage3(plot='bounds',title=title,legend=1)
+
+    # Store any values from iteration if performing additional post-processing or analysis:
+    betas_all.append(betas)
+    mtx_all.append(mtx)
+    evs_all.append(evs)
+    meen_all.append(meen)
+    bounds_all.append(bounds)
+    rmse_all.append(rmse)
+
+    # Reset the model so that all attributes of the FoKL class are removed except for the hyperparameters:
+    model.clear()
+
+# ======================================================================================================================
+
+# Post-processing:
+print("\nThe results are as follows:")
+for ii in range(len(train_all)):
+    print("\n   ",train_all[ii]*100,"% of Data:\n    --> RMSE =",rmse_all[ii])
+