#14: AEP GPR/C done, see updated examples

thangbui · Mar 28, 2017 · 6fee08c · 6fee08c
1 parent 0a515d7
commit 6fee08c
Show file tree

Hide file tree

Showing 5 changed files with 459 additions and 37 deletions.
diff --git a/examples/gpr_aep_examples.py b/examples/gpr_aep_examples.py
@@ -1,18 +1,20 @@
 print "importing stuff..."
 import numpy as np
 import pdb
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pylab as plt
 from scipy import special
 
-# from .datautils import step, spiral
-# from .context import aep
+from .datautils import step, spiral
+from .context import aep
 
-import sys
-import os
-sys.path.insert(0, os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '..')))
-import geepee.aep_models as aep
-from datautils import step, spiral
+# import sys
+# import os
+# sys.path.insert(0, os.path.abspath(
+#     os.path.join(os.path.dirname(__file__), '..')))
+# import geepee.aep_models as aep
+# from datautils import step, spiral
 
 
 def run_regression_1D():
@@ -48,6 +50,7 @@ def plot(m):
 	plot(model)
 	plt.show()
 
+
 def run_banana():
 
 	def gridParams():
@@ -98,6 +101,86 @@ def plot(m):
 	plt.show()
 
 
+def run_regression_1D_stoc():
+	np.random.seed(42)
+
+	print "create dataset ..."
+	N = 500
+	X = np.random.rand(N,1)
+	Y = np.sin(12*X) + 0.5*np.cos(25*X) + np.random.randn(N,1)*0.2
+	# plt.plot(X, Y, 'kx', mew=2)
+
+	def plot(m):
+		xx = np.linspace(-0.5, 1.5, 100)[:,None]
+		mean, var = m.predict_f(xx)
+		zu = m.sgp_layer.zu
+		mean_u, var_u = m.predict_f(zu)
+		plt.figure()
+		plt.plot(X, Y, 'kx', mew=2)
+		plt.plot(xx, mean, 'b', lw=2)
+		plt.fill_between(
+			xx[:, 0], 
+			mean[:, 0] - 2*np.sqrt(var[:, 0]), 
+			mean[:, 0] + 2*np.sqrt(var[:, 0]), 
+			color='blue', alpha=0.2)
+		plt.errorbar(zu, mean_u, yerr=2*np.sqrt(var_u), fmt='ro')
+		plt.xlim(-0.1, 1.1)
+
+	# inference
+	print "create model and optimize ..."
+	M = 20
+	model = aep.SGPR(X, Y, M, lik='Gaussian')
+	model.optimise(method='adam', alpha=0.2, maxiter=100000, mb_size=3*M, adam_lr=0.001)
+	plot(model)
+	plt.show()
+	plt.savefig('/tmp/reg_1D_stoc.pdf')
+
+
+def run_banana_stoc():
+
+	def gridParams():
+		mins = [-3.25,-2.85 ]
+		maxs = [ 3.65, 3.4 ]
+		nGrid = 50
+		xspaced = np.linspace( mins[0], maxs[0], nGrid )
+		yspaced = np.linspace( mins[1], maxs[1], nGrid )
+		xx, yy = np.meshgrid( xspaced, yspaced )
+		Xplot = np.vstack((xx.flatten(),yy.flatten())).T
+		return mins, maxs, xx, yy, Xplot
+
+	def plot(m):
+		col1 = '#0172B2'
+		col2 = '#CC6600'
+		mins, maxs, xx, yy, Xplot = gridParams()
+		mf, vf = m.predict_f(Xplot)
+		plt.figure()
+		plt.plot(
+			Xtrain[:,0][Ytrain[:,0]==1], 
+			Xtrain[:,1][Ytrain[:,0]==1], 
+			'o', color=col1, mew=0, alpha=0.5)
+		plt.plot(
+			Xtrain[:,0][Ytrain[:,0]==-1], 
+			Xtrain[:,1][Ytrain[:,0]==-1], 
+			'o', color=col2, mew=0, alpha=0.5)
+		zu = m.sgp_layer.zu
+		plt.plot(zu[:,0], zu[:,1], 'ro', mew=0, ms=4)
+		plt.contour(
+			xx, yy, mf.reshape(*xx.shape), [0], 
+			colors='k', linewidths=1.8, zorder=100)
+
+	Xtrain = np.loadtxt(
+		'./examples/data/banana_X_train.txt', delimiter=',')
+	Ytrain = np.loadtxt(
+		'./examples/data/banana_Y_train.txt', delimiter=',').reshape(-1,1)
+	Ytrain[np.where(Ytrain==0)[0]] = -1
+	M = 30
+	model = aep.SGPR(Xtrain, Ytrain, M, lik='Probit')
+	model.optimise(method='adam', alpha=0.5, maxiter=100000, mb_size=3*M, adam_lr=0.005)
+	plot(model)
+	plt.show()
+	plt.savefig('/tmp/cla_probit_stoc.pdf')
+
+
 def run_step_1D():
 	np.random.seed(42)
 
@@ -233,4 +316,7 @@ def run_boston():
 	# run_banana()
 	# run_step_1D()
 	# run_spiral()
-	run_boston()
+	# run_boston()
+
+	run_regression_1D_stoc()
+	run_banana_stoc()
diff --git a/geepee/aep_models.py b/geepee/aep_models.py
@@ -9,6 +9,7 @@
 import pdb
 from scipy.cluster.vq import kmeans2
 import pprint
+from utils import profile
 
 pp = pprint.PrettyPrinter(indent=4)
 
@@ -85,7 +86,6 @@ def compute_phi_cavity(self):
             self.Suhat, self.muhat))
         return phi_cavity
 
-    # @profile
     def compute_phi(self, alpha=1.0):
         N = self.N
         scale_post = N * 1.0 / alpha - 1.0
@@ -228,7 +228,7 @@ def backprop_grads_lvm(self, m, v, dm, dv, psi1, psi2, mx, vx, alpha=1.0):
 
         return grad_hyper, grad_input
 
-    # @profile
+    @profile
     def backprop_grads_reg(self, m, v, dm, dv, kfu, x, alpha=1.0):
         N = self.N
         M = self.M
@@ -637,7 +637,7 @@ def get_hypers(self):
 
     def optimise(
         self, method='L-BFGS-B', tol=None, reinit_hypers=True, 
-        callback=None, maxiter=1000, alpha=0.5, adam_lr=0.05, **kargs):
+        callback=None, maxiter=1000, alpha=0.5, mb_size=None, adam_lr=0.001, **kargs):
         self.updated = False
 
         if reinit_hypers:
@@ -648,15 +648,18 @@ def optimise(
         init_params_vec, params_args = flatten_dict(init_params_dict)
 
         N = self.N
-        idxs = np.arange(N)
-
         try:
             if method.lower() == 'adam':
+                # TODO: do replacement sampling for now since this is faster
+                # alternative would be
+                # idxs = np.random.permutation(N)[:mb_size]
+                idxs = np.random.randint(N, size=mb_size)
                 results = adam(objective_wrapper, init_params_vec,
                                step_size=adam_lr,
                                maxiter=maxiter,
                                args=(params_args, self, idxs, alpha))
             else:
+                idxs = np.arange(N)
                 options = {'maxiter': maxiter, 'disp': True, 'gtol': 1e-8}
                 results = minimize(
                     fun=objective_wrapper,
@@ -667,7 +670,6 @@ def optimise(
                     tol=tol,
                     callback=callback,
                     options=options)
-
         except KeyboardInterrupt:
             print 'Caught KeyboardInterrupt ...'
             results = []
@@ -951,7 +953,7 @@ def __init__(self, x_train, y_train, no_pseudo, lik='Gaussian'):
         else:
             raise NotImplementedError('likelihood not implemented')
 
-    # @profile
+    @profile
     def objective_function(self, params, idxs, alpha=1.0):
         N = self.N
         xb = self.x_train[idxs, :]

diff --git a/geepee/tools.py b/geepee/tools.py
@@ -13,22 +13,6 @@ def inverse_sigmoid(x):
     return np.log(x / (1 - x))
 
 
-def adam(func, init_params, callback=None, maxiter=1000,
-         step_size=0.001, b1=0.9, b2=0.999, eps=1e-8, args=None):
-    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf."""
-    x = init_params
-    m = np.zeros_like(x)
-    v = np.zeros_like(x)
-    for i in range(maxiter):
-        f, g = func(x, *args)
-        m = (1 - b1) * g + b1 * m  # First  moment estimate.
-        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
-        mhat = m / (1 - b1**(i + 1))    # Bias correction.
-        vhat = v / (1 - b2**(i + 1))
-        x = x - step_size * mhat / (np.sqrt(vhat) + eps)
-    return x
-
-
 def make_batches(N_data, batch_size):
     return [slice(i, min(i + batch_size, N_data)) for i in range(0, N_data, batch_size)]
 

diff --git a/geepee/utils.py b/geepee/utils.py
@@ -1,7 +1,14 @@
 import numpy as np
 import scipy.linalg as spla
+import __builtin__
 
 
+try:
+    profile = __builtin__.profile
+except AttributeError:
+    # No line profiler, provide a pass-through version
+    def profile(func): return func
+
 def chol2inv(chol):
     return spla.cho_solve((chol, False), np.eye(chol.shape[0]))
 
@@ -61,4 +68,24 @@ def unflatten_dict(params, params_args):
     params_dict = {}
     for i, key in enumerate(sorted(keys)):
         params_dict[key] = np.reshape(vals[i], shapes[key])
-    return params_dict
+    return params_dict
+
+
+def adam(func, init_params, callback=None, maxiter=1000,
+         step_size=0.001, b1=0.9, b2=0.999, eps=1e-8, args=None):
+    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf."""
+    x = init_params
+    m = np.zeros_like(x)
+    v = np.zeros_like(x)
+    for i in range(maxiter):
+        f, g = func(x, *args)
+        if i % 100 == 0:
+            print 'iter %d \t obj %.3f' % (i, f)
+        if callback: callback(x, i, g)
+        m = (1 - b1) * g + b1 * m  # First  moment estimate.
+        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
+        mhat = m / (1 - b1**(i + 1))    # Bias correction.
+        vhat = v / (1 - b2**(i + 1))
+        x = x - step_size * mhat / (np.sqrt(vhat) + eps)
+    return x
+