LLNL · tupek2 · Oct 17, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 25, 2024
diff --git a/src/serac/numerics/equation_solver.cpp b/src/serac/numerics/equation_solver.cpp
@@ -19,6 +19,20 @@
 
 namespace serac {
 
+/// @brief A simple exception class for recording Newton and TrustRegion failures
+class SolveException : public std::exception {
+public:
+  /// constructor
+  SolveException(const std::string& message) : msg(message) {}
+
+  /// what is message
+  const char* what() const noexcept override { return msg.c_str(); }
+
+private:
+  /// message string
+  std::string msg;
+};
+
 /// Newton solver with a 2-way line-search.  Reverts to regular Newton if max_line_search_iterations is set to 0.
 class NewtonSolver : public mfem::NewtonSolver {
 protected:
@@ -108,8 +122,11 @@ class NewtonSolver : public mfem::NewtonSolver {
       }
 
       if (norm != norm) {
-        mfem::out << "Initial residual for Newton iteration is undefined/nan." << std::endl;
-        mfem::out << "Newton: No convergence!\n";
+        if (print_options.first_and_last) {
+          mfem::out << "Initial residual for Newton iteration is undefined/nan." << std::endl;
+          mfem::out << "Newton: No convergence!\n";
+        }
+        throw SolveException("Initial residual for Newton iteration is undefined/nan.");
         return;
       }
 
@@ -196,9 +213,14 @@ class NewtonSolver : public mfem::NewtonSolver {
     if (print_options.summary || (!converged && print_options.warnings) || print_options.first_and_last) {
       mfem::out << "Newton: Number of iterations: " << final_iter << '\n' << "   ||r|| = " << final_norm << '\n';
     }
+
     if (!converged && (print_options.summary || print_options.warnings)) {
       mfem::out << "Newton: No convergence!\n";
     }
+
+    if (!converged) {
+      throw SolveException("Newton algorithm failed to converge.");
+    }
   }
 };
 
@@ -644,14 +666,17 @@ class TrustRegion : public mfem::NewtonSolver {
           mfem::out << ", ||r||/||r_0|| = " << std::setw(13) << (initial_norm != 0.0 ? norm / initial_norm : norm);
           mfem::out << ", x_incr = " << std::setw(13) << trResults.d.Norml2();
         } else {
-          mfem::out << ", norm goal = " << std::setw(13) << norm_goal << "\n";
+          mfem::out << ", norm goal = " << std::setw(13) << norm_goal;
         }
-        mfem::out << '\n';
+        mfem::out << "\n";
       }
 
       if (norm != norm) {
-        mfem::out << "Initial residual for trust-region iteration is undefined/nan." << std::endl;
-        mfem::out << "Newton: No convergence!\n";
+        if (print_options.first_and_last) {
+          mfem::out << "Initial residual for trust-region iteration is undefined/nan." << std::endl;
+          mfem::out << "Newton: No convergence!\n";
+        }
+        throw SolveException("Initial residual for trust-region iteration is undefined/nan.");
         return;
       }
 
@@ -744,7 +769,7 @@ class TrustRegion : public mfem::NewtonSolver {
           solveTheSubspaceProblem(trResults.d, hess_vec_func, ds, H_ds, r, tr_size, num_leftmost);
         }
 
-        static constexpr double roundOffTol = 0.0;  // 1e-14;
+        static constexpr double roundOffTol = 1e-32;
 
         hess_vec_func(trResults.d, trResults.H_d);
         double dHd            = Dot(trResults.d, trResults.H_d);
@@ -755,9 +780,8 @@ class TrustRegion : public mfem::NewtonSolver {
         double realObjective = std::numeric_limits<double>::max();
         double normPred      = std::numeric_limits<double>::max();
         try {
-          normPred    = computeResidual(x_pred, r_pred);
-          double obj1 = 0.5 * (Dot(r, trResults.d) + Dot(r_pred, trResults.d)) - roundOffTol;
-
+          normPred      = computeResidual(x_pred, r_pred);
+          double obj1   = 0.5 * (Dot(r, trResults.d) + Dot(r_pred, trResults.d)) - roundOffTol;
           realObjective = obj1;
         } catch (const std::exception&) {
           realObjective = std::numeric_limits<double>::max();
@@ -772,8 +796,8 @@ class TrustRegion : public mfem::NewtonSolver {
           happyAboutTrSize = true;
           if (print_options.iterations) {
             printTrustRegionInfo(realObjective, modelObjective, trResults.cg_iterations_count, tr_size, true);
-            trResults.cg_iterations_count =
-                0;  // zero this output so it doesn't look like the linesearch is doing cg iterations
+            // zero this output so it doesn't look like the linesearch is doing cg iterations
+            trResults.cg_iterations_count = 0;
           }
           break;
         }
@@ -786,7 +810,7 @@ class TrustRegion : public mfem::NewtonSolver {
           if (print_options.iterations || print_options.warnings) {
             mfem::out << "Found a positive model objective increase.  Debug if you see this.\n";
           }
-          rho = realImprove / -modelImprove;
+          rho = 0.0;  // realImprove / -modelImprove;
         }
 
         // std::cout << "rho , stuff = " << rho << " " << settings.eta3 << std::endl;
@@ -830,9 +854,14 @@ class TrustRegion : public mfem::NewtonSolver {
     final_iter = it;
     final_norm = norm;
 
+    if (!converged) {
+      throw SolveException("Trust-region algorithm failed to converge.");
+    }
+
     if (print_options.summary || (!converged && print_options.warnings) || print_options.first_and_last) {
       mfem::out << "Newton: Number of iterations: " << final_iter << '\n' << "   ||r|| = " << final_norm << '\n';
     }
+
     if (!converged && (print_options.summary || print_options.warnings)) {
       mfem::out << "Newton: No convergence!\n";
     }

diff --git a/src/serac/physics/solid_mechanics.hpp b/src/serac/physics/solid_mechanics.hpp
@@ -27,6 +27,16 @@
 
 namespace serac {
 
+/// @brief simple utility for computing matrix norm
+inline double matrixNorm(std::unique_ptr<mfem::HypreParMatrix>& K)
+{
+  mfem::HypreParMatrix* H      = K.get();
+  hypre_ParCSRMatrix*   Hhypre = static_cast<hypre_ParCSRMatrix*>(*H);
+  double                Hfronorm;
+  hypre_ParCSRMatrixNormFro(Hhypre, &Hfronorm);
+  return Hfronorm;
+}
+
 namespace solid_mechanics {
 
 namespace detail {
@@ -130,17 +140,20 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
    * @param checkpoint_to_disk Flag to save the transient states on disk instead of memory for transient adjoint solver
    * @param use_warm_start Flag to turn on or off the displacement warm start predictor which helps robustness for
    * large deformation problems
+   * @param max_timestep_cutbacks The maximum number of times the supplied timestep can be cutback before the solver
+   * gives up
    *
    * @note On parallel file systems (e.g. lustre), significant slowdowns and occasional errors were observed when
    *       writing and reading the needed trainsient states to disk for adjoint solves
    */
   SolidMechanics(const NonlinearSolverOptions nonlinear_opts, const LinearSolverOptions lin_opts,
                  const serac::TimesteppingOptions timestepping_opts, const std::string& physics_name,
                  std::string mesh_tag, std::vector<std::string> parameter_names = {}, int cycle = 0, double time = 0.0,
-                 bool checkpoint_to_disk = false, bool use_warm_start = true)
+                 bool checkpoint_to_disk = false, bool use_warm_start = true, int max_timestep_cutbacks = 5)
       : SolidMechanics(
             std::make_unique<EquationSolver>(nonlinear_opts, lin_opts, StateManager::mesh(mesh_tag).GetComm()),
-            timestepping_opts, physics_name, mesh_tag, parameter_names, cycle, time, checkpoint_to_disk, use_warm_start)
+            timestepping_opts, physics_name, mesh_tag, parameter_names, cycle, time, checkpoint_to_disk, use_warm_start,
+            max_timestep_cutbacks)
   {
   }
 
@@ -157,13 +170,16 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
    * @param checkpoint_to_disk Flag to save the transient states on disk instead of memory for transient adjoint solves
    * @param use_warm_start A flag to turn on or off the displacement warm start predictor which helps robustness for
    * large deformation problems
+   * @param max_timestep_cutbacks The maximum number of times the supplied timestep can be cutback before the solver
+   * gives up
    *
    * @note On parallel file systems (e.g. lustre), significant slowdowns and occasional errors were observed when
    *       writing and reading the needed trainsient states to disk for adjoint solves
    */
   SolidMechanics(std::unique_ptr<serac::EquationSolver> solver, const serac::TimesteppingOptions timestepping_opts,
                  const std::string& physics_name, std::string mesh_tag, std::vector<std::string> parameter_names = {},
-                 int cycle = 0, double time = 0.0, bool checkpoint_to_disk = false, bool use_warm_start = true)
+                 int cycle = 0, double time = 0.0, bool checkpoint_to_disk = false, bool use_warm_start = true,
+                 int max_timestep_cutbacks = 5)
       : BasePhysics(physics_name, mesh_tag, cycle, time, checkpoint_to_disk),
         displacement_(
             StateManager::newState(H1<order, dim>{}, detail::addPrefix(physics_name, "displacement"), mesh_tag_)),
@@ -183,7 +199,8 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
         ode2_(displacement_.space().TrueVSize(),
               {.time = time_, .c0 = c0_, .c1 = c1_, .u = u_, .du_dt = v_, .d2u_dt2 = acceleration_}, *nonlin_solver_,
               bcs_),
-        use_warm_start_(use_warm_start)
+        use_warm_start_(use_warm_start),
+        max_timestep_cutbacks_(max_timestep_cutbacks)
   {
     SERAC_MARK_FUNCTION;
     SLIC_ERROR_ROOT_IF(mesh_.Dimension() != dim,
@@ -1232,7 +1249,6 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
             J_.reset(mfem::Add(1.0, *m_mat, c0_, *k_mat));
             J_e_.reset();
             J_e_ = bcs_.eliminateAllEssentialDofsFromMatrix(*J_);
-
             return *J_;
           });
     }
@@ -1282,7 +1298,7 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
     }
 
     if (is_quasistatic_) {
-      quasiStaticSolve(dt);
+      quasiStaticSolve(dt, 1.0, 0);
     } else {
       // The current ode interface tracks 2 times, one internally which we have a handle to via time_,
       // and one here via the step interface.
@@ -1606,6 +1622,9 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
   /// @brief A flag denoting whether to compute the warm start for improved robustness
   bool use_warm_start_;
 
+  /// @brief The maximum number of cut-back in the supplied time-step increment before the solver will give up
+  int max_timestep_cutbacks_;
+
   /// @brief Coefficient containing the essential boundary values
   std::shared_ptr<mfem::VectorCoefficient> disp_bdr_coef_;
 
@@ -1624,16 +1643,40 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
       }...};
 
   /// @brief Solve the Quasi-static Newton system
-  virtual void quasiStaticSolve(double dt)
+  virtual void quasiStaticSolve(double dt, double step_fraction_of_dt_remaining, int level)
   {
-    // warm start must be called prior to the time update so that the previous Jacobians can be used consistently
-    // throughout.
-    warmStartDisplacement(dt);
-    time_ += dt;
-
-    // this method is essentially equivalent to the 1-liner
-    // u += dot(inv(J), dot(J_elim[:, dofs], (U(t + dt) - u)[dofs]));
-    nonlin_solver_->solve(displacement_);
+    if (level > max_timestep_cutbacks_) {
+      if (mpi_rank_ == 0)
+        std::cout << "Too many boundary condition cutbacks, accepting solution even though there may be issues. Try "
+                     "increasing the number of load steps "
+                  << std::endl;
+      return;
+    }
+
+    bool solver_success = true;
+    try {
+      // warm start must be called prior to the time update so that the previous Jacobians can be used consistently
+      // throughout.
+      if (level == 0) time_ += dt;
+      warmStartDisplacement(dt, step_fraction_of_dt_remaining);
+      nonlin_solver_->solve(displacement_);
+    } catch (const std::exception& e) {
+      if (mpi_rank_ == 0) mfem::out << "Caught nonlinear solver exception: " << e.what() << std::endl;
+      displacement_ -= du_;
+      solver_success = false;
+      quasiStaticSolve(dt, 0.5 * step_fraction_of_dt_remaining, level + 1);
+      quasiStaticSolve(dt, 1.0, level + 1);
+    }
+
+    if (solver_success) {
+      if (step_fraction_of_dt_remaining == 1.0) {
+        if (mpi_rank_ == 0)
+          mfem::out << "SolidMechanics solve succeeded for time " << time_ << " dt = " << dt << std::endl;
+      } else {
+        if (mpi_rank_ == 0)
+          mfem::out << "SolidMechanics substep solve succeeded for time " << time_ << " dt = " << dt << std::endl;
+      }
+    }
   }
 
   /**
@@ -1731,29 +1774,34 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
    nonlinear solvers will ensure positive definiteness at equilibrium. *Once any parameter is changed, it is no longer
    certain to be positive definite, which will cause issues for many types linear solvers.
    */
-  void warmStartDisplacement(double dt)
+  void warmStartDisplacement(double dt, double displacement_scale_factor)
   {
     SERAC_MARK_FUNCTION;
 
     du_ = 0.0;
     for (auto& bc : bcs_.essentials()) {
       // apply the future boundary conditions, but use the most recent Jacobians stiffness.
-      bc.setDofs(du_, time_ + dt);
+      bc.setDofs(du_, time_);
     }
 
     auto& constrained_dofs = bcs_.allEssentialTrueDofs();
+
     for (int i = 0; i < constrained_dofs.Size(); i++) {
       int j = constrained_dofs[i];
       du_[j] -= displacement_(j);
     }
 
+    if (displacement_scale_factor != 1.0) {
+      du_ *= displacement_scale_factor;
+    }
+
     if (use_warm_start_) {
       // Update the linearized Jacobian matrix
-      auto r = (*residual_)(time_ + dt, shape_displacement_, displacement_, acceleration_,
+      auto r = (*residual_)(time_, shape_displacement_, displacement_, acceleration_,
                             *parameters_[parameter_indices].state...);
 
       // use the most recently evaluated Jacobian
-      auto [_, drdu] = (*residual_)(time_, shape_displacement_, differentiate_wrt(displacement_), acceleration_,
+      auto [_, drdu] = (*residual_)(time_ - dt, shape_displacement_, differentiate_wrt(displacement_), acceleration_,
                                     *parameters_[parameter_indices].previous_state...);
       J_.reset();
       J_ = assemble(drdu);
@@ -1771,11 +1819,23 @@ class SolidMechanics<order, dim, Parameters<parameter_space...>, std::integer_se
       auto& lin_solver = nonlin_solver_->linearSolver();
 
       lin_solver.SetOperator(*J_);
-
       lin_solver.Mult(r, du_);
     }
 
     displacement_ += du_;
+
+    // due to solver inaccuracies, the end of step boundary conditions may not be exact at this point
+    if (displacement_scale_factor == 1.0) {
+      du_ = 0.0;
+      for (auto& bc : bcs_.essentials()) {
+        bc.setDofs(du_, time_);
+      }
+      for (int i = 0; i < constrained_dofs.Size(); i++) {
+        int j = constrained_dofs[i];
+        du_[j] -= displacement_(j);
+      }
+      displacement_ += du_;
+    }
   }
 };
 

diff --git a/src/serac/physics/solid_mechanics_contact.hpp b/src/serac/physics/solid_mechanics_contact.hpp
@@ -221,7 +221,8 @@ class SolidMechanicsContact<order, dim, Parameters<parameter_space...>,
 
 protected:
   /// @brief Solve the Quasi-static Newton system
-  void quasiStaticSolve(double dt) override
+  void quasiStaticSolve(double dt, [[maybe_unused]] double step_fraction_of_dt_remaining,
+                        [[maybe_unused]] int level) override
   {
     // warm start must be called prior to the time update so that the previous Jacobians can be used consistently
     // throughout. warm start for contact needs to include the previous stiffness terms associated with contact

diff --git a/src/serac/physics/tests/contact_patch_tied.cpp b/src/serac/physics/tests/contact_patch_tied.cpp
@@ -55,7 +55,7 @@ TEST_P(ContactPatchTied, patch)
 //   };
 // #elif defined(MFEM_USE_STRUMPACK)
 #ifdef MFEM_USE_STRUMPACK
-  LinearSolverOptions linear_options{.linear_solver = LinearSolver::Strumpack, .print_level = 1};
+  LinearSolverOptions linear_options{.linear_solver = LinearSolver::Strumpack, .print_level = 0};
 #else
   LinearSolverOptions linear_options{};
   SLIC_INFO_ROOT("Contact requires MFEM built with strumpack.");
@@ -64,8 +64,8 @@ TEST_P(ContactPatchTied, patch)
 
   NonlinearSolverOptions nonlinear_options{.nonlin_solver  = NonlinearSolver::Newton,
                                            .relative_tol   = 1.0e-10,
-                                           .absolute_tol   = 1.0e-10,
-                                           .max_iterations = 20,
+                                           .absolute_tol   = 5.0e-10,
+                                           .max_iterations = 25,
                                            .print_level    = 1};
 
   ContactOptions contact_options{.method      = ContactMethod::SingleMortar,

diff --git a/src/serac/physics/tests/lce_Brighenti_tensile.cpp b/src/serac/physics/tests/lce_Brighenti_tensile.cpp
@@ -71,7 +71,7 @@ TEST(LiquidCrystalElastomer, Brighenti)
 
   // Construct a solid mechanics solver
   LinearSolverOptions linear_options = {
-      .linear_solver  = LinearSolver::GMRES,
+      .linear_solver  = LinearSolver::CG,
       .preconditioner = Preconditioner::HypreAMG,
       .relative_tol   = 1.0e-6,
       .absolute_tol   = 1.0e-14,
@@ -95,7 +95,7 @@ TEST(LiquidCrystalElastomer, Brighenti)
 
   SolidMechanics<p, dim, Parameters<H1<p>, L2<p> > > solid_solver(
       nonlinear_options, linear_options, solid_mechanics::default_quasistatic_options, "lce_solid_functional", mesh_tag,
-      {"temperature", "gamma"});
+      {"temperature", "gamma"}, 0, 0.0, false, true, 0);
 
   constexpr int TEMPERATURE_INDEX = 0;
   constexpr int GAMMA_INDEX       = 1;