-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from jakekrell/main
v3.0.0
- Loading branch information
Showing
5 changed files
with
1,095 additions
and
612 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,80 @@ | ||
import numpy as np | ||
import FoKL | ||
from FoKL import FoKLRoutines | ||
from FoKL import getKernels | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
#inputs | ||
traininputs = np.loadtxt('traininputs.csv.txt',dtype=float,delimiter=',') | ||
traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',') | ||
traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',') | ||
y = np.loadtxt('y.txt',dtype=float,delimiter=',') | ||
utest = np.loadtxt('utest.csv',dtype=float,delimiter=',') | ||
# data | ||
relats_in = [1,1,1,1,1,1] | ||
|
||
# generating phis from the spline coefficients text file in emulator_python | ||
# combined with the splineconvert script | ||
|
||
phis = getKernels.sp500() | ||
|
||
# a | ||
a = 1000 | ||
|
||
# b | ||
b = 1 | ||
|
||
# atau | ||
atau = 4 | ||
|
||
# btau | ||
btau = 0.6091 | ||
|
||
# tolerance | ||
tolerance = 3 | ||
relats_in = [1,1,1,1,1,1] | ||
# draws | ||
draws = 2000 | ||
|
||
gimmie = False | ||
way3 = True | ||
threshav = 0 | ||
threshstda = 0 | ||
threshstdb = 100 | ||
aic = False | ||
n,m = np.shape(y) | ||
norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])] | ||
norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])] | ||
norms = np.transpose([norms1,norms2]) | ||
# Running emulator_Xin routine and visualization | ||
model1 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic) | ||
model2 = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, 1, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic) | ||
|
||
betas1, mtx1, evs1 = model1.fit(traininputs, traindata1) | ||
betas2, mtx2, evs2 = model2.fit(traininputs, traindata2) | ||
|
||
betas1 = betas1[1000:] | ||
betas2 = betas2[1000:] | ||
|
||
start = 4 | ||
stop = 3750*4 | ||
stepsize = 4 | ||
used_inputs = [[1,1,1],[1,1,1]] | ||
ic = y[:,int(m/2)-1] | ||
|
||
T,Y = FoKLRoutines.FoKL.GP_Integrate([np.mean(betas1,axis=0),np.mean(betas2,axis=0)], [mtx1,mtx2], utest, norms, phis, start, stop, ic, stepsize, used_inputs) | ||
|
||
plt.plot(T,Y[0],T,y[0][3750:7500]) | ||
plt.plot(T,Y[1],T,y[1][3750:7500]) | ||
from FoKL import FoKLRoutines | ||
from FoKL.GP_Integrate import GP_Integrate | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import warnings | ||
warnings.filterwarnings("ignore", category=UserWarning) | ||
|
||
|
||
# ====================================================================================================================== | ||
|
||
# Inputs: | ||
traininputs = np.loadtxt('traininputs.txt',dtype=float,delimiter=',') | ||
traindata1 = np.loadtxt('traindata1.txt',dtype=float,delimiter=',') | ||
traindata2 = np.loadtxt('traindata2.txt',dtype=float,delimiter=',') | ||
traindata = [traindata1, traindata2] | ||
y = np.loadtxt('y.txt',dtype=float,delimiter=',') | ||
utest = np.loadtxt('utest.csv',dtype=float,delimiter=',') | ||
|
||
# User-defined constant hyperparameters (to override default values): | ||
relats_in = [1,1,1,1,1,1] | ||
a = 1000 | ||
b = 1 | ||
draws = 2000 | ||
way3 = True | ||
threshav = 0 | ||
threshstda = 0 | ||
threshstdb = 100 | ||
|
||
# Initializing FoKL model with constant hypers: | ||
model = FoKLRoutines.FoKL(relats_in=relats_in, a=a, b=b, draws=draws, way3=way3, threshav=threshav, threshstda=threshstda, threshstdb=threshstdb) | ||
|
||
# User-defined variable hyperparameters (to iterate through either for different data or for sweeping the same data): | ||
btau = [0.6091, 1] | ||
|
||
# Iterating through datasets: | ||
betas = [] | ||
mtx = [] | ||
for ii in range(2): | ||
|
||
print("\nCurrently fitting model to dataset", int(ii+1),". . .") | ||
|
||
# Updating model with current iteration of variable hyperparameters: | ||
model.btau = btau[ii] | ||
|
||
# Running emulator routine for current model/data: | ||
betas_i, mtx_i, _ = model.fit(traininputs, traindata[ii]) | ||
|
||
print("Done!") | ||
|
||
# Store values for post-processing (i.e., GP integration): | ||
betas.append(betas_i[1000:]) | ||
mtx.append(mtx_i) | ||
|
||
# Clear all attributes (except for hypers) so previous results do not influence the next iteration: | ||
model.clear() | ||
|
||
# ====================================================================================================================== | ||
|
||
# Integrating with FoKL.GP_Integrate(): | ||
|
||
phis = model.phis # same for all models iterated through, so just grab value from most recent model | ||
|
||
n,m = np.shape(y) | ||
norms1 = [np.min(y[0,0:int(m/2)]),np.max(y[0,0:int(m/2)])] | ||
norms2 = [np.min(y[1,0:int(m/2)]),np.max(y[1,0:int(m/2)])] | ||
norms = np.transpose([norms1,norms2]) | ||
|
||
start = 4 | ||
stop = 3750*4 | ||
stepsize = 4 | ||
used_inputs = [[1,1,1],[1,1,1]] | ||
ic = y[:,int(m/2)-1] | ||
|
||
T, Y = GP_Integrate([np.mean(betas[0],axis=0),np.mean(betas[1],axis=0)], [mtx[0],mtx[1]], utest, norms, phis, start, stop, ic, stepsize, used_inputs) | ||
|
||
plt.figure() | ||
plt.plot(T,Y[0],T,y[0][3750:7500]) | ||
plt.plot(T,Y[1],T,y[1][3750:7500]) | ||
plt.show() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,150 +1,64 @@ | ||
import numpy as np | ||
import FoKL | ||
from FoKL import FoKLRoutines | ||
from FoKL import getKernels | ||
|
||
# Setting input parameters | ||
|
||
#inputs | ||
X = np.loadtxt('X.csv',dtype=float,delimiter=',') | ||
Y = np.loadtxt('Y.csv',dtype=float,delimiter=',') | ||
|
||
m, n = np.shape(X) # reading dimensions of input variables | ||
X_reshape = np.reshape(X, (m*n,1), order='F') # reshaping, fortran index order | ||
|
||
m, n = np.shape(Y) | ||
Y_reshape = np.reshape(Y, (m*n,1), order='F') | ||
|
||
inputs = [] | ||
|
||
for i in range(len(X_reshape)): | ||
inputs.append([float(X_reshape[i]), float(Y_reshape[i])]) | ||
|
||
# data | ||
data_load = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',') | ||
|
||
data = np.reshape(data_load, (m*n,1), order='F') | ||
|
||
phis = getKernels.sp500() | ||
|
||
# a | ||
a = 9 | ||
|
||
# b | ||
b = 0.01 | ||
|
||
# atau | ||
atau = 3 | ||
|
||
# btau | ||
btau = 4000 | ||
|
||
# tolerance | ||
tolerance = 3 | ||
relats_in = [] | ||
# draws | ||
draws = 1000 | ||
|
||
gimmie = False | ||
way3 = False | ||
threshav = 0.05 | ||
threshstda = 0.5 | ||
threshstdb = 2 | ||
aic = True | ||
|
||
# define model | ||
model = FoKLRoutines.FoKL(phis, relats_in, a, b, atau, btau, tolerance, draws, gimmie, way3, threshav, threshstda, threshstdb, aic) | ||
""" | ||
initialization inputs: | ||
'relats' is a boolean matrix indicating which terms should be excluded | ||
from the model building; for instance if a certain main effect should be | ||
excluded relats will include a row with a 1 in the column for that input | ||
and zeros elsewhere; if a certain two way interaction should be excluded | ||
there should be a row with ones in those columns and zeros elsewhere | ||
to exclude no terms 'relats = np.array([[0]])'. An example of excluding | ||
the first input main effect and its interaction with the third input for | ||
a case with three total inputs is:'relats = np.array([[1,0,0],[1,0,1]])' | ||
'phis' are a data structure with the spline coefficients for the basis | ||
functions, built with 'spline_coefficient.txt' and 'splineconvert' or | ||
'spline_coefficient_500.txt' and 'splineconvert500' (the former provides | ||
25 basis functions: enough for most things -- while the latter provides | ||
500: definitely enough for anything) | ||
'a' and 'b' are the shape and scale parameters of the ig distribution for | ||
the observation error variance of the data. the observation error model is | ||
white noise choose the mode of the ig distribution to match the noise in | ||
the output dataset and the mean to broaden it some | ||
'atau' and 'btau' are the parameters of the ig distribution for the 'tau | ||
squared' parameter: the variance of the beta priors is iid normal mean | ||
zero with variance equal to sigma squared times tau squared. tau squared | ||
must be scaled in the prior such that the product of tau squared and sigma | ||
squared scales with the output dataset | ||
'tolerance' controls how hard the function builder tries to find a better | ||
model once adding terms starts to show diminishing returns. a good | ||
default is 3 -- large datasets could benefit from higher values | ||
'draws' is the total number of draws from the posterior for each tested | ||
model | ||
'draws' is the total number of draws from the posterior for each tested | ||
'threshav' is a threshold for proposing terms for elimination based on | ||
their mean values (larger thresholds lead to more elimination) | ||
'threshstda' is a threshold standard deviation -- expressed as a fraction | ||
relative to the mean -- that pairs with 'threshav'. | ||
terms with coefficients that are lower than 'threshav' and higher than | ||
'threshstda' will be proposed for elimination (elimination will happen or not | ||
based on relative BIC values) | ||
'threshstdb' is a threshold standard deviation that is independent of the | ||
mean value of the coefficient -- all with a standard deviation (fraction | ||
relative to mean) exceeding | ||
this value will be proposed for elimination | ||
'gimmie' is a boolean causing the routine to return the most complex | ||
model tried instead of the model with the optimum bic | ||
'aic' is a boolean specifying the use of the aikaike information | ||
criterion | ||
""" | ||
# Running emulator routine and visualization, fit model to data | ||
betas, mtx, evs = model.fit(inputs, data) | ||
""" | ||
inputs: | ||
'inputs' - normalzied inputs | ||
'data' - results | ||
outputs: | ||
'betas' are a draw from the posterior distribution of coefficients: matrix, with | ||
rows corresponding to draws and columns corresponding to terms in the GP | ||
'mtx' is the basis function interaction matrix from the | ||
best model: matrix, with rows corresponding to terms in the GP (and thus to the | ||
columns of 'betas' and columns corresponding to inputs. a given entry in the | ||
matrix gives the order of the basis function appearing in a given term in the GP. | ||
all basis functions indicated on a given row are multiplied together. | ||
a zero indicates no basis function from a given input is present in a given term | ||
'ev' is a vector of BIC values from all of the models | ||
evaluated | ||
""" | ||
|
||
meen, bounds, rmse = model.coverage3(inputs, data, draws, 1) | ||
""" | ||
inputs: | ||
'inputs' - normalized test inputs | ||
'data' - test set results | ||
'draws' - number of beta models used from model.fit() | ||
plotting binary - 1 = show plot, 0 = do not show plot | ||
outputs: | ||
'meen' - indexed predicted value | ||
'bounds' - 95% confidence interval for each point | ||
'rmse' - root mean square error | ||
""" | ||
import numpy as np | ||
import warnings | ||
warnings.filterwarnings("ignore", category=UserWarning) | ||
|
||
|
||
# ====================================================================================================================== | ||
|
||
# Inputs: | ||
X_grid = np.loadtxt('X.csv',dtype=float,delimiter=',') | ||
Y_grid = np.loadtxt('Y.csv',dtype=float,delimiter=',') | ||
|
||
# Data: | ||
Z_grid = np.loadtxt('DATA_nois.csv',dtype=float,delimiter=',') | ||
|
||
# Reshaping grid matrices into vectors via fortran index order: | ||
m, n = np.shape(X_grid) # = np.shape(Y_grid) = np.shape(Z_grid) = dimensions of grid | ||
X = np.reshape(X_grid, (m*n,1), order='F') | ||
Y = np.reshape(Y_grid, (m*n,1), order='F') | ||
Z = np.reshape(Z_grid, (m*n,1), order='F') | ||
|
||
# Initializing FoKL model with some user-defined hyperparameters (leaving others blank for default values): | ||
model = FoKLRoutines.FoKL(a=9, b=0.01, atau=3, btau=4000, aic=True) | ||
|
||
# Training FoKL model on a random selection of 100% (or 100%, 80%, 60%, etc.) of the dataset: | ||
train_all = [1] # = [1, 0.8, 0.6] etc. if sweeping through the percentage of data to train on | ||
betas_all = [] | ||
mtx_all = [] | ||
evs_all = [] | ||
meen_all = [] | ||
bounds_all = [] | ||
rmse_all = [] | ||
for train in train_all: | ||
|
||
print("\nCurrently fitting model to",train * 100,"% of data . . .") | ||
|
||
# Running emulator routine to fit model to training data as a function of the corresponding training inputs: | ||
betas, mtx, evs = model.fit([X, Y], Z, train=train) | ||
|
||
# Provide feedback to user before the figure from coverage3() pops up and pauses the code: | ||
print("\nDone! Please close the figure to continue.\n") | ||
|
||
# Evaluating and visualizing predicted values of data as a function of all inputs (train set plus test set): | ||
title = 'FoKL Model Trained on ' + str(train * 100) + '% of Data' | ||
meen, bounds, rmse = model.coverage3(plot='bounds',title=title,legend=1) | ||
|
||
# Store any values from iteration if performing additional post-processing or analysis: | ||
betas_all.append(betas) | ||
mtx_all.append(mtx) | ||
evs_all.append(evs) | ||
meen_all.append(meen) | ||
bounds_all.append(bounds) | ||
rmse_all.append(rmse) | ||
|
||
# Reset the model so that all attributes of the FoKL class are removed except for the hyperparameters: | ||
model.clear() | ||
|
||
# ====================================================================================================================== | ||
|
||
# Post-processing: | ||
print("\nThe results are as follows:") | ||
for ii in range(len(train_all)): | ||
print("\n ",train_all[ii]*100,"% of Data:\n --> RMSE =",rmse_all[ii]) | ||
|
Oops, something went wrong.