From d050f26888dd5ef8cbf0bd96c9fc86e269aa9ae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicholas=20Kr=C3=A4mer?= Date: Wed, 5 May 2021 15:43:55 +0200 Subject: [PATCH] Nail down stability of IBM with very high orders. (#404) * made car tracking problem higher-order and sqrt ready * wrote tests for very high order IBM filters * keyword arguments in discrete transition * fixed square-root detail in IBM and added tiny damping * really high order filtering test * simplified equivalent discretistaion code for IBM * removed damping --- .../zoo/filtsmooth/_filtsmooth_problems.py | 6 +- src/probnum/statespace/discrete_transition.py | 8 +-- src/probnum/statespace/integrator.py | 60 ++++++++++--------- .../test_gaussfiltsmooth/test_kalman.py | 34 +++++++++++ 4 files changed, 76 insertions(+), 32 deletions(-) diff --git a/src/probnum/problems/zoo/filtsmooth/_filtsmooth_problems.py b/src/probnum/problems/zoo/filtsmooth/_filtsmooth_problems.py index 2cb7c56c6..cc130e3bf 100644 --- a/src/probnum/problems/zoo/filtsmooth/_filtsmooth_problems.py +++ b/src/probnum/problems/zoo/filtsmooth/_filtsmooth_problems.py @@ -101,17 +101,21 @@ def car_tracking( measurement_matrix = np.eye(measurement_dim, model_dim) measurement_cov = measurement_variance * np.eye(measurement_dim) + measurement_cov_cholesky = np.sqrt(measurement_variance) * np.eye(measurement_dim) measurement_model = statespace.DiscreteLTIGaussian( state_trans_mat=measurement_matrix, shift_vec=np.zeros(measurement_dim), proc_noise_cov_mat=measurement_cov, + proc_noise_cov_cholesky=measurement_cov_cholesky, forward_implementation=forward_implementation, backward_implementation=backward_implementation, ) if initrv is None: initrv = randvars.Normal( - np.zeros(model_dim), measurement_variance * np.eye(model_dim) + np.zeros(model_dim), + measurement_variance * np.eye(model_dim), + cov_cholesky=np.sqrt(measurement_variance) * np.eye(model_dim), ) # Set up regression problem diff --git a/src/probnum/statespace/discrete_transition.py b/src/probnum/statespace/discrete_transition.py index de26305b5..9361d6218 100644 --- a/src/probnum/statespace/discrete_transition.py +++ b/src/probnum/statespace/discrete_transition.py @@ -501,10 +501,10 @@ def __init__( super().__init__( input_dim, output_dim, - lambda t: state_trans_mat, - lambda t: shift_vec, - lambda t: proc_noise_cov_mat, - lambda t: proc_noise_cov_cholesky, + state_trans_mat_fun=lambda t: state_trans_mat, + shift_vec_fun=lambda t: shift_vec, + proc_noise_cov_mat_fun=lambda t: proc_noise_cov_mat, + proc_noise_cov_cholesky_fun=lambda t: proc_noise_cov_cholesky, forward_implementation=forward_implementation, backward_implementation=backward_implementation, ) diff --git a/src/probnum/statespace/integrator.py b/src/probnum/statespace/integrator.py index 369c28f16..278012a65 100644 --- a/src/probnum/statespace/integrator.py +++ b/src/probnum/statespace/integrator.py @@ -199,39 +199,37 @@ def _dispmat(self): @cached_property def equivalent_discretisation_preconditioned(self): - """Discretised IN THE PRECONDITIONED SPACE.""" + """Discretised IN THE PRECONDITIONED SPACE. + + The preconditioned state transition is the flipped Pascal matrix. + The preconditioned process noise covariance is the flipped Hilbert matrix. + The shift is always zero. + + Reference: https://arxiv.org/abs/2012.10106 + """ + + state_transition_1d = np.flip( + scipy.linalg.pascal(self.ordint + 1, kind="lower", exact=False) + ) + state_transition = np.kron(np.eye(self.spatialdim), state_transition_1d) + process_noise_1d = np.flip(scipy.linalg.hilbert(self.ordint + 1)) + process_noise = np.kron(np.eye(self.spatialdim), process_noise_1d) empty_shift = np.zeros(self.spatialdim * (self.ordint + 1)) + + process_noise_cholesky_1d = np.linalg.cholesky(process_noise_1d) + process_noise_cholesky = np.kron( + np.eye(self.spatialdim), process_noise_cholesky_1d + ) + return discrete_transition.DiscreteLTIGaussian( - state_trans_mat=self._state_trans_mat, + state_trans_mat=state_transition, shift_vec=empty_shift, - proc_noise_cov_mat=self._proc_noise_cov_mat, - proc_noise_cov_cholesky=np.linalg.cholesky(self._proc_noise_cov_mat), + proc_noise_cov_mat=process_noise, + proc_noise_cov_cholesky=process_noise_cholesky, forward_implementation=self.forward_implementation, backward_implementation=self.backward_implementation, ) - @cached_property - def _state_trans_mat(self): - # Loop, but cached anyway - driftmat_1d = np.array( - [ - [ - scipy.special.binom(self.ordint - i, self.ordint - j) - for j in range(0, self.ordint + 1) - ] - for i in range(0, self.ordint + 1) - ] - ) - return np.kron(np.eye(self.spatialdim), driftmat_1d) - - @cached_property - def _proc_noise_cov_mat(self): - # Optimised with broadcasting - range = np.arange(0, self.ordint + 1) - denominators = 2.0 * self.ordint + 1.0 - range[:, None] - range[None, :] - proc_noise_cov_mat_1d = 1.0 / denominators - return np.kron(np.eye(self.spatialdim), proc_noise_cov_mat_1d) - def forward_rv( self, rv, @@ -307,7 +305,6 @@ def discretise(self, dt): Only present for user's convenience and to maintain a clean interface. Not used for forward_rv, etc.. """ - state_trans_mat = ( self.precon(dt) @ self.equivalent_discretisation_preconditioned.state_trans_mat @@ -319,10 +316,19 @@ def discretise(self, dt): @ self.precon(dt).T ) zero_shift = np.zeros(len(state_trans_mat)) + + # The Cholesky factor of the process noise covariance matrix of the IBM + # always exists, even for non-square root implementations. + proc_noise_cov_cholesky = ( + self.precon(dt) + @ self.equivalent_discretisation_preconditioned.proc_noise_cov_cholesky + ) + return discrete_transition.DiscreteLTIGaussian( state_trans_mat=state_trans_mat, shift_vec=zero_shift, proc_noise_cov_mat=proc_noise_cov_mat, + proc_noise_cov_cholesky=proc_noise_cov_cholesky, forward_implementation=self.forward_implementation, backward_implementation=self.forward_implementation, ) diff --git a/tests/test_filtsmooth/test_gaussfiltsmooth/test_kalman.py b/tests/test_filtsmooth/test_gaussfiltsmooth/test_kalman.py index 72a57162b..7cea68ab7 100644 --- a/tests/test_filtsmooth/test_gaussfiltsmooth/test_kalman.py +++ b/tests/test_filtsmooth/test_gaussfiltsmooth/test_kalman.py @@ -50,3 +50,37 @@ def test_info_dicts(setup): assert isinstance(info_dicts, list) assert len(posterior) == len(info_dicts) + + +def test_kalman_smoother_high_order_ibm(): + """The highest feasible order (without damping, which we dont use) is 11. + + If this test breaks, someone played with the stable square-root + implementations in discrete_transition: for instance, + solve_triangular() and cho_solve() must not be changed to inv()! + """ + regression_problem, statespace_components = filtsmooth_zoo.car_tracking( + model_ordint=11, + timespan=(0.0, 1e-3), + step=1e-5, + forward_implementation="sqrt", + backward_implementation="sqrt", + ) + truth = regression_problem.solution + + kalman = filtsmooth.Kalman( + statespace_components["dynamics_model"], + statespace_components["measurement_model"], + statespace_components["initrv"], + ) + + posterior, _ = kalman.filtsmooth(regression_problem) + + filtms = posterior.filtering_posterior.states.mean + smooms = posterior.states.mean + + filtms_rmse = np.mean(np.abs(filtms[:, :2] - truth[:, :2])) + smooms_rmse = np.mean(np.abs(smooms[:, :2] - truth[:, :2])) + obs_rmse = np.mean(np.abs(regression_problem.observations - truth[:, :2])) + + assert smooms_rmse < filtms_rmse < obs_rmse