Skip to content

Commit

Permalink
Merge pull request #8 from jakekrell/main
Browse files Browse the repository at this point in the history
v3.0.0
  • Loading branch information
dmebane authored Nov 2, 2023
2 parents 1443aec + ab40e76 commit 7013a0d
Show file tree
Hide file tree
Showing 5 changed files with 1,095 additions and 612 deletions.
149 changes: 80 additions & 69 deletions Examples/GP Integrate/GP_integrate_example.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,80 @@
import numpy as np
import FoKL
from FoKL import FoKLRoutines
from FoKL import getKernels
import matplotlib.pyplot as plt


#inputs
traininputs = np.loadtxt('traininputs.csv.txt',dtype=float,delimiter=',')
traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',')
traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',')
y = np.loadtxt('y.txt',dtype=float,delimiter=',')
utest = np.loadtxt('utest.csv',dtype=float,delimiter=',')
# data
relats_in = [1,1,1,1,1,1]

# generating phis from the spline coefficients text file in emulator_python
# combined with the splineconvert script

phis = getKernels.sp500()

# a
a = 1000

# b
b = 1

# atau
atau = 4

# btau
btau = 0.6091

# tolerance
tolerance = 3
relats_in = [1,1,1,1,1,1]
# draws
draws = 2000

gimmie = False
way3 = True
threshav = 0
threshstda = 0
threshstdb = 100
aic = False
n,m = np.shape(y)
norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])]
norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])]
norms = np.transpose([norms1,norms2])
# Running emulator_Xin routine and visualization
model1 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)
model2 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, 1, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)

betas1, mtx1, evs1 = model1.fit(traininputs, traindata1)
betas2, mtx2, evs2 = model2.fit(traininputs, traindata2)

betas1 = betas1[1000:]
betas2 = betas2[1000:]

start = 4
stop = 3750*4
stepsize = 4
used_inputs = [[1,1,1],[1,1,1]]
ic = y[:,int(m/2)-1]

T,Y = FoKLRoutines.FoKL.GP_Integrate([np.mean(betas1,axis=0),np.mean(betas2,axis=0)], [mtx1,mtx2], utest, norms, phis, start, stop, ic, stepsize, used_inputs)

plt.plot(T,Y[0],T,y[0][3750:7500])
plt.plot(T,Y[1],T,y[1][3750:7500])
from FoKL import FoKLRoutines
from FoKL.GP_Integrate import GP_Integrate
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


# ======================================================================================================================

# Inputs:
traininputs = np.loadtxt('traininputs.txt',dtype=float,delimiter=',')
traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',')
traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',')
traindata = [traindata1, traindata2]
y = np.loadtxt('y.txt',dtype=float,delimiter=',')
utest = np.loadtxt('utest.csv',dtype=float,delimiter=',')

# User-defined constant hyperparameters (to override default values):
relats_in = [1,1,1,1,1,1]
a = 1000
b = 1
draws = 2000
way3 = True
threshav = 0
threshstda = 0
threshstdb = 100

# Initializing FoKL model with constant hypers:
model = FoKLRoutines.FoKL(relats_in=relats_in, a=a, b=b, draws=draws, way3=way3, threshav=threshav, threshstda=threshstda, threshstdb=threshstdb)

# User-defined variable hyperparameters (to iterate through either for different data or for sweeping the same data):
btau = [0.6091, 1]

# Iterating through datasets:
betas = []
mtx = []
for ii in range(2):

print("\nCurrently fitting model to dataset", int(ii+1),". . .")

# Updating model with current iteration of variable hyperparameters:
model.btau = btau[ii]

# Running emulator routine for current model/data:
betas_i, mtx_i, _ = model.fit(traininputs, traindata[ii])

print("Done!")

# Store values for post-processing (i.e., GP integration):
betas.append(betas_i[1000:])
mtx.append(mtx_i)

# Clear all attributes (except for hypers) so previous results do not influence the next iteration:
model.clear()

# ======================================================================================================================

# Integrating with FoKL.GP_Integrate():

phis = model.phis # same for all models iterated through, so just grab value from most recent model

n,m = np.shape(y)
norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])]
norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])]
norms = np.transpose([norms1,norms2])

start = 4
stop = 3750*4
stepsize = 4
used_inputs = [[1,1,1],[1,1,1]]
ic = y[:,int(m/2)-1]

T, Y = GP_Integrate([np.mean(betas[0],axis=0),np.mean(betas[1],axis=0)], [mtx[0],mtx[1]], utest, norms, phis, start, stop, ic, stepsize, used_inputs)

plt.figure()
plt.plot(T,Y[0],T,y[0][3750:7500])
plt.plot(T,Y[1],T,y[1][3750:7500])
plt.show()

212 changes: 63 additions & 149 deletions Examples/Sigmoid/SigmoidTest.py
Original file line number Diff line number Diff line change
@@ -1,150 +1,64 @@
import numpy as np
import FoKL
from FoKL import FoKLRoutines
from FoKL import getKernels

# Setting input parameters

#inputs
X = np.loadtxt('X.csv',dtype=float,delimiter=',')
Y = np.loadtxt('Y.csv',dtype=float,delimiter=',')

m, n = np.shape(X) # reading dimensions of input variables
X_reshape = np.reshape(X, (m*n,1), order='F') # reshaping, fortran index order

m, n = np.shape(Y)
Y_reshape = np.reshape(Y, (m*n,1), order='F')

inputs = []

for i in range(len(X_reshape)):
inputs.append([float(X_reshape[i]), float(Y_reshape[i])])

# data
data_load = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',')

data = np.reshape(data_load, (m*n,1), order='F')

phis = getKernels.sp500()

# a
a = 9

# b
b = 0.01

# atau
atau = 3

# btau
btau = 4000

# tolerance
tolerance = 3
relats_in = []
# draws
draws = 1000

gimmie = False
way3 = False
threshav = 0.05
threshstda = 0.5
threshstdb = 2
aic = True

# define model
model = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic)
"""
initialization inputs:
'relats' is a boolean matrix indicating which terms should be excluded
from the model building; for instance if a certain main effect should be
excluded relats will include a row with a 1 in the column for that input
and zeros elsewhere; if a certain two way interaction should be excluded
there should be a row with ones in those columns and zeros elsewhere
to exclude no terms 'relats = np.array([[0]])'. An example of excluding
the first input main effect and its interaction with the third input for
a case with three total inputs is:'relats = np.array([[1,0,0],[1,0,1]])'
'phis' are a data structure with the spline coefficients for the basis
functions, built with 'spline_coefficient.txt' and 'splineconvert' or
'spline_coefficient_500.txt' and 'splineconvert500' (the former provides
25 basis functions: enough for most things -- while the latter provides
500: definitely enough for anything)
'a' and 'b' are the shape and scale parameters of the ig distribution for
the observation error variance of the data. the observation error model is
white noise choose the mode of the ig distribution to match the noise in
the output dataset and the mean to broaden it some
'atau' and 'btau' are the parameters of the ig distribution for the 'tau
squared' parameter: the variance of the beta priors is iid normal mean
zero with variance equal to sigma squared times tau squared. tau squared
must be scaled in the prior such that the product of tau squared and sigma
squared scales with the output dataset
'tolerance' controls how hard the function builder tries to find a better
model once adding terms starts to show diminishing returns. a good
default is 3 -- large datasets could benefit from higher values
'draws' is the total number of draws from the posterior for each tested
model
'draws' is the total number of draws from the posterior for each tested
'threshav' is a threshold for proposing terms for elimination based on
their mean values (larger thresholds lead to more elimination)
'threshstda' is a threshold standard deviation -- expressed as a fraction
relative to the mean -- that pairs with 'threshav'.
terms with coefficients that are lower than 'threshav' and higher than
'threshstda' will be proposed for elimination (elimination will happen or not
based on relative BIC values)
'threshstdb' is a threshold standard deviation that is independent of the
mean value of the coefficient -- all with a standard deviation (fraction
relative to mean) exceeding
this value will be proposed for elimination
'gimmie' is a boolean causing the routine to return the most complex
model tried instead of the model with the optimum bic
'aic' is a boolean specifying the use of the aikaike information
criterion
"""
# Running emulator routine and visualization, fit model to data
betas, mtx, evs = model.fit(inputs, data)
"""
inputs:
'inputs' - normalzied inputs
'data' - results
outputs:
'betas' are a draw from the posterior distribution of coefficients: matrix, with
rows corresponding to draws and columns corresponding to terms in the GP
'mtx' is the basis function interaction matrix from the
best model: matrix, with rows corresponding to terms in the GP (and thus to the
columns of 'betas' and columns corresponding to inputs. a given entry in the
matrix gives the order of the basis function appearing in a given term in the GP.
all basis functions indicated on a given row are multiplied together.
a zero indicates no basis function from a given input is present in a given term
'ev' is a vector of BIC values from all of the models
evaluated
"""

meen, bounds, rmse = model.coverage3(inputs, data, draws, 1)
"""
inputs:
'inputs' - normalized test inputs
'data' - test set results
'draws' - number of beta models used from model.fit()
plotting binary - 1 = show plot, 0 = do not show plot
outputs:
'meen' - indexed predicted value
'bounds' - 95% confidence interval for each point
'rmse' - root mean square error
"""
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


# ======================================================================================================================

# Inputs:
X_grid = np.loadtxt('X.csv',dtype=float,delimiter=',')
Y_grid = np.loadtxt('Y.csv',dtype=float,delimiter=',')

# Data:
Z_grid = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',')

# Reshaping grid matrices into vectors via fortran index order:
m, n = np.shape(X_grid) # = np.shape(Y_grid) = np.shape(Z_grid) = dimensions of grid
X = np.reshape(X_grid, (m*n,1), order='F')
Y = np.reshape(Y_grid, (m*n,1), order='F')
Z = np.reshape(Z_grid, (m*n,1), order='F')

# Initializing FoKL model with some user-defined hyperparameters (leaving others blank for default values):
model = FoKLRoutines.FoKL(a=9, b=0.01, atau=3, btau=4000, aic=True)

# Training FoKL model on a random selection of 100% (or 100%, 80%, 60%, etc.) of the dataset:
train_all = [1] # = [1, 0.8, 0.6] etc. if sweeping through the percentage of data to train on
betas_all = []
mtx_all = []
evs_all = []
meen_all = []
bounds_all = []
rmse_all = []
for train in train_all:

print("\nCurrently fitting model to",train * 100,"% of data . . .")

# Running emulator routine to fit model to training data as a function of the corresponding training inputs:
betas, mtx, evs = model.fit([X, Y], Z, train=train)

# Provide feedback to user before the figure from coverage3() pops up and pauses the code:
print("\nDone! Please close the figure to continue.\n")

# Evaluating and visualizing predicted values of data as a function of all inputs (train set plus test set):
title = 'FoKL Model Trained on ' + str(train * 100) + '% of Data'
meen, bounds, rmse = model.coverage3(plot='bounds',title=title,legend=1)

# Store any values from iteration if performing additional post-processing or analysis:
betas_all.append(betas)
mtx_all.append(mtx)
evs_all.append(evs)
meen_all.append(meen)
bounds_all.append(bounds)
rmse_all.append(rmse)

# Reset the model so that all attributes of the FoKL class are removed except for the hyperparameters:
model.clear()

# ======================================================================================================================

# Post-processing:
print("\nThe results are as follows:")
for ii in range(len(train_all)):
print("\n ",train_all[ii]*100,"% of Data:\n --> RMSE =",rmse_all[ii])

Loading

0 comments on commit 7013a0d

Please sign in to comment.