parthenon-hpc-lab · pgrete · Dec 17, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 11, 2025
diff --git a/example/poisson_gmg/parthenon_app_inputs.cpp b/example/poisson_gmg/parthenon_app_inputs.cpp
@@ -58,10 +58,10 @@ void ProblemGenerator(Mesh *pm, ParameterInput *pin, MeshData<Real> *md) {
         const auto &coords = pack.GetCoordinates(b);
         Real x1 = coords.X<1, te>(i);
         Real x2 = coords.X<2, te>(j);
-        Real x3 = coords.X<2, te>(k);
+        Real x3 = coords.X<3, te>(k);
         Real x1f = coords.X<1, TE::F1>(k, j, i);
         Real x2f = coords.X<2, TE::F2>(k, j, i);
-        Real x3f = coords.X<2, TE::F3>(k, j, i);
+        Real x3f = coords.X<3, TE::F3>(k, j, i);
         Real dx1 = coords.Dxc<1>(k, j, i);
         Real dx2 = coords.Dxc<2>(k, j, i);
         Real dx3 = coords.Dxc<3>(k, j, i);

diff --git a/example/poisson_gmg/poisson_equation.hpp b/example/poisson_gmg/poisson_equation.hpp
@@ -112,8 +112,8 @@ class PoissonEquation {
 
     auto desc_mat = parthenon::MakePackDescriptor<D_t>(md_mat.get());
     auto desc_diag = parthenon::MakePackDescriptor<var_t>(md_diag.get());
-    auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
-    auto pack_diag = desc_diag.GetPack(md_diag.get(), include_block);
+    auto pack_mat = desc_mat.GetPack(md_mat.get());
+    auto pack_diag = desc_diag.GetPack(md_diag.get());
     using TE = parthenon::TopologicalElement;
     parthenon::par_for(
         "StoreDiagonal", 0, pack_mat.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
@@ -158,9 +158,9 @@ class PoissonEquation {
     std::vector<bool> include_block(nblocks, true);
 
     auto desc = parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
-    auto pack = desc.GetPack(md.get(), include_block);
+    auto pack = desc.GetPack(md.get());
     auto desc_mat = parthenon::MakePackDescriptor<D_t>(md_mat.get(), {});
-    auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
+    auto pack_mat = desc_mat.GetPack(md_mat.get());
     parthenon::par_for(
         "CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
         KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
@@ -217,8 +217,8 @@ class PoissonEquation {
 
     auto desc = parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
     auto desc_mat = parthenon::MakePackDescriptor<D_t>(md.get());
-    auto pack = desc.GetPack(md.get(), include_block);
-    auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
+    auto pack = desc.GetPack(md.get());
+    auto pack_mat = desc_mat.GetPack(md_mat.get());
     const std::size_t scratch_size_in_bytes = 0;
     const std::size_t scratch_level = 1;
 
@@ -298,8 +298,8 @@ class PoissonEquation {
     static auto desc =
         parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
     static auto desc_out = parthenon::MakePackDescriptor<var_t>(md_out.get());
-    auto pack = desc.GetPack(md.get(), include_block);
-    auto pack_out = desc_out.GetPack(md_out.get(), include_block);
+    auto pack = desc.GetPack(md.get());
+    auto pack_out = desc_out.GetPack(md_out.get());
     parthenon::par_for(
         "FluxMultiplyMatrix", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
         ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {

diff --git a/src/solvers/bicgstab_solver.hpp b/src/solvers/bicgstab_solver.hpp
@@ -211,8 +211,11 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
     // 1. u <- M p
     auto precon1 = reset;
     if (params_.precondition_type == Preconditioner::Multigrid) {
+      auto timer = solver_timings.GetOrAddAndRegister("BiCGSTAB: Precon setup", itl);
+      timer->StartCollectingTasks();
       auto set_rhs = itl.AddTask(precon1, TF(CopyData<FieldTL>), md_p, md_rhs);
       auto zero_u = itl.AddTask(precon1, TF(SetToZero<FieldTL>), md_u);
+      timer->StopCollectingTasks();
       precon1 =
           preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh);
     } else if (params_.precondition_type == Preconditioner::Diagonal) {
@@ -222,11 +225,19 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
     }
 
     // 2. v <- A u
+    auto timer_Auv = solver_timings.GetOrAddAndRegister("BiCGSTAB: Au -> v", itl);
+    auto timer_comm = solver_timings.GetOrAddAndRegister("BiCGSTAB: Boundary", itl);
+    timer_Auv->StartCollectingTasks();
+    timer_comm->StartCollectingTasks();
     auto comm = AddBoundaryExchangeTasks<BoundaryType::any>(precon1, itl, md_u,
                                                             multilevel, BCFunc);
+    timer_comm->StopCollectingTasks();
     auto get_v = eqs_.Ax(itl, comm, md_base, md_u, md_v);
+    timer_Auv->StopCollectingTasks();
 
     // 3. rhat0v <- (rhat0, v)
+    auto timer_alpha = solver_timings.GetOrAddAndRegister("BiCGSTAB: alpha update", itl);
+    timer_alpha->StartCollectingTasks();
     auto get_rhat0v = DotProduct<FieldTL>(get_v, itl, &rhat0v, md_rhat0, md_v);
 
     // 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v)
@@ -250,23 +261,29 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
         this, md_r, md_v, md_s);
 
     // Check and print out residual
-    auto get_res = DotProduct<FieldTL>(correct_s, itl, &residual, md_s, md_s);
-
-    auto print = itl.AddTask(
-        TaskQualifier::once_per_region, get_res,
-        [&](BiCGSTABSolver *solver, Mesh *pmesh) {
-          Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells());
-          if (Globals::my_rank == 0 && solver->params_.print_per_step)
-            printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res);
-          return TaskStatus::complete;
-        },
-        this, pmesh);
+    if (params_.print_per_step) {
+      auto get_res = DotProduct<FieldTL>(correct_s, itl, &residual, md_s, md_s);
+
+      auto print = itl.AddTask(
+          TaskQualifier::once_per_region, get_res,
+          [&](BiCGSTABSolver *solver, Mesh *pmesh) {
+            Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells());
+            if (Globals::my_rank == 0 && solver->params_.print_per_step)
+              printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res);
+            return TaskStatus::complete;
+          },
+          this, pmesh);
+    }
+    timer_alpha->StopCollectingTasks();
 
     // 6. u <- M s
     auto precon2 = correct_s;
     if (params_.precondition_type == Preconditioner::Multigrid) {
+      auto timer = solver_timings.GetOrAddAndRegister("BiCGSTAB: Precon setup", itl);
+      timer->StartCollectingTasks();
       auto set_rhs = itl.AddTask(precon2, TF(CopyData<FieldTL>), md_s, md_rhs);
       auto zero_u = itl.AddTask(precon2, TF(SetToZero<FieldTL>), md_u);
+      timer->StopCollectingTasks();
       precon2 =
           preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh);
     } else if (params_.precondition_type == Preconditioner::Diagonal) {
@@ -276,11 +293,18 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
     }
 
     // 7. t <- A u
+    auto timer_Aut = solver_timings.GetOrAddAndRegister("BiCGSTAB: Au -> t", itl);
+    timer_comm->StartCollectingTasks();
+    timer_Aut->StartCollectingTasks();
     auto pre_t_comm = AddBoundaryExchangeTasks<BoundaryType::any>(precon2, itl, md_u,
                                                                   multilevel, BCFunc);
+    timer_comm->StopCollectingTasks();
     auto get_t = eqs_.Ax(itl, pre_t_comm, md_base, md_u, md_t);
+    timer_Aut->StopCollectingTasks();
 
     // 8. omega <- (t,s) / (t,t)
+    auto timer_omega = solver_timings.GetOrAddAndRegister("BiCGSTAB: omega update", itl);
+    timer_omega->StartCollectingTasks();
     auto get_ts = DotProduct<FieldTL>(get_t, itl, &ts, md_t, md_s);
     auto get_tt = DotProduct<FieldTL>(get_t, itl, &tt, md_t, md_t);
 
@@ -303,8 +327,11 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
           return AddFieldsAndStore<FieldTL>(md_s, md_t, md_r, 1.0, -omega);
         },
         this, md_s, md_t, md_r);
+    timer_omega->StopCollectingTasks();
 
     // Check and print out residual
+    auto timer_res = solver_timings.GetOrAddAndRegister("BiCGSTAB: residual", itl);
+    timer_res->StartCollectingTasks();
     auto get_res2 = DotProduct<FieldTL>(correct_r, itl, &residual, md_r, md_r);
 
     get_res2 = itl.AddTask(
@@ -355,7 +382,7 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
         },
         this, pmesh, params_.max_iters, params_.residual_tolerance,
         params_.relative_residual);
-
+    timer_res->StopCollectingTasks();
     return tl.AddTask(solver_id, TF(CopyData<FieldTL>), md_x, md_u);
   }
 

diff --git a/src/solvers/mg_solver.hpp b/src/solvers/mg_solver.hpp
@@ -135,6 +135,9 @@ class MGSolver : public SolverBase, MGSolverCounter {
         &iter_counter);
     auto mg_finest = AddLinearOperatorTasks(itl, update_iter, partition, pmesh);
 
+    auto timing_guard =
+        TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister("MG: Residual", tl));
+
     auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::leaf());
     if (partition >= partitions.size())
       PARTHENON_FAIL("Does not work with non-default partitioning.");
@@ -279,6 +282,10 @@ class MGSolver : public SolverBase, MGSolverCounter {
     return TaskStatus::complete;
   }
 
+  std::string GetTimeLabel(const std::string &name, int level) {
+    return "MG: " + name + " [level = " + std::to_string(level) + "]";
+  }
+
   template <parthenon::BoundaryType comm_boundary>
   TaskID AddJacobiIteration(TaskList &tl, TaskID depends_on, bool multilevel, Real omega,
                             int partition, int level,
@@ -292,9 +299,19 @@ class MGSolver : public SolverBase, MGSolverCounter {
     auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields);
     auto &md_diag = pmesh->mesh_data.Add(container_diag, md_base, sol_fields);
 
+    auto time_comm =
+        solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
+    time_comm->StartCollectingTasks();
     auto comm = AddBoundaryExchangeTasks<comm_boundary>(depends_on, tl, md_in, multilevel,
                                                         BCFunc);
+    time_comm->StopCollectingTasks();
+    auto time_ax =
+        solver_timings.GetOrAddAndRegister(GetTimeLabel("Jacobi Ax", level), tl);
+    time_ax->StartCollectingTasks();
     auto mat_mult = eqs_.Ax(tl, comm, md_base, md_in, md_out);
+    time_ax->StopCollectingTasks();
+    auto guard = TimingAccumulatorGuard(
+        solver_timings.GetOrAddAndRegister(GetTimeLabel("Jacobi", level), tl));
     return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, md_rhs, md_out, md_diag,
                       md_in, md_out, omega);
   }
@@ -319,6 +336,9 @@ class MGSolver : public SolverBase, MGSolverCounter {
     std::array<std::array<Real, 3>, 3> omega_M3{
         {{0.9372, 0.6667, 0.5173}, {1.6653, 0.8000, 0.5264}, {2.2473, 0.8571, 0.5296}}};
 
+    auto timing_guard = TimingAccumulatorGuard(
+        solver_timings.GetOrAddAndRegister(GetTimeLabel("Smooth", level), tl));
+
     if (stages == 0) return depends_on;
     auto omega = omega_M1;
     if (stages == 2) omega = omega_M2;
@@ -353,13 +373,22 @@ class MGSolver : public SolverBase, MGSolverCounter {
 
     auto task_out = dependence;
     if (level < max_level) {
+      auto timing_guard = TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister(
+          GetTimeLabel("Setup restrict recv", level), tl));
       task_out =
           tl.AddTask(task_out, TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md);
       task_out = tl.AddTask(task_out, TF(SetBounds<BoundaryType::gmg_restrict_recv>), md);
     }
+    auto timer = solver_timings.GetOrAddAndRegister(
+        GetTimeLabel("Setup restrict send", level), tl);
+    timer->StartCollectingTasks();
     task_out = tl.AddTask(task_out, TF(&equations_t::SetDiagonal), &eqs_, md, md_diag);
+    timer->StopCollectingTasks();
+
     // If we are finer than the coarsest level:
     if (level > min_level) {
+      auto timing_guard = TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister(
+          GetTimeLabel("Setup restrict send", level), tl));
       task_out =
           tl.AddTask(task_out, TF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md);
     }
@@ -412,12 +441,18 @@ class MGSolver : public SolverBase, MGSolverCounter {
     auto &md_u0 = pmesh->mesh_data.Add(container_u0, md, sol_fields);
     auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields);
 
+    auto timer_guard_total = TimingAccumulatorGuard(
+        solver_timings.GetOrAddAndRegister(GetTimeLabel("Total", level), tl));
+
     // 0. Receive residual from coarser level if there is one
     auto set_from_finer = dependence;
     if (level < max_level) {
       // Fill fields with restricted values
       // TODO(LFR): Need to make sure that this communication pattern is ok, since we are
       //            trying to concurrently communicate on two stages
+      auto timer =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Restrict recv", level), tl);
+      timer->StartCollectingTasks();
       auto recv_from_finer = tl.AddTask(
           dependence, TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md_u);
       set_from_finer = tl.AddTask(recv_from_finer,
@@ -427,6 +462,7 @@ class MGSolver : public SolverBase, MGSolverCounter {
                      TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md_res_err);
       set_from_finer = tl.AddTask(
           recv_from_finer, BTF(SetBounds<BoundaryType::gmg_restrict_recv>), md_res_err);
+      timer->StopCollectingTasks();
       // 1. Copy residual from dual purpose communication field to the rhs, should be
       // actual RHS for finest level
       if (!do_FAS) {
@@ -439,8 +475,14 @@ class MGSolver : public SolverBase, MGSolverCounter {
         // to make sure that the boundaries of the restricted u are up to date before
         // calling Ax. That being said, at least in one case commenting this line out
         // didn't seem to impact the solution.
+        auto timer_guard = TimingAccumulatorGuard(
+            solver_timings.GetOrAddAndRegister(GetTimeLabel("Ax rhs", level), tl));
+        auto timer_comm =
+            solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
+        timer_comm->StartCollectingTasks();
         set_from_finer = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(
             set_from_finer, tl, md_u, multilevel, BCFunc);
+        timer_comm->StopCollectingTasks();
         set_from_finer =
             tl.AddTask(set_from_finer, BTF(CopyData<FieldTL, true>), md_u, md_u0);
         // This should set the rhs only in blocks that correspond to interior nodes, the
@@ -452,6 +494,8 @@ class MGSolver : public SolverBase, MGSolverCounter {
                                     md_temp, md_res_err, md_rhs, 1.0, 1.0, true);
       }
     } else {
+      auto timer_guard = TimingAccumulatorGuard(
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Copy u0", level), tl));
       set_from_finer =
           tl.AddTask(set_from_finer, BTF(CopyData<FieldTL, true>), md_u, md_u0);
     }
@@ -462,24 +506,39 @@ class MGSolver : public SolverBase, MGSolverCounter {
     // If we are finer than the coarsest level:
     auto post_smooth = pre_smooth;
     if (level > min_level) {
+      auto timer_ax =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Ax residual", level), tl);
+      timer_ax->StartCollectingTasks();
       // 3. Communicate same level boundaries so that u is up to date everywhere
+      auto timer_comm =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
+      timer_comm->StartCollectingTasks();
       auto comm_u = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(pre_smooth, tl, md_u,
                                                                      multilevel, BCFunc);
+      timer_comm->StopCollectingTasks();
 
       // 4. Caclulate residual and store in communication field
       auto residual = eqs_.Ax(tl, comm_u, md, md_u, md_temp);
       residual = tl.AddTask(residual, BTF(AddFieldsAndStoreInteriorSelect<FieldTL, true>),
                             md_rhs, md_temp, md_res_err, 1.0, -1.0, false);
+      timer_ax->StopCollectingTasks();
 
       // 5. Restrict communication field and send to next level
       // TODO(LFR): Other place where we are receiving two stage communication
+      auto timer_res =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Restrict send", level), tl);
+      timer_res->StartCollectingTasks();
       auto communicate_to_coarse =
           tl.AddTask(residual, BTF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md_u);
       communicate_to_coarse =
           tl.AddTask(communicate_to_coarse,
                      BTF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md_res_err);
+      timer_res->StopCollectingTasks();
 
       // 6. Receive error field into communication field and prolongate
+      auto timer_pro =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Prolong recv", level), tl);
+      timer_pro->StartCollectingTasks();
       auto recv_from_coarser =
           tl.AddTask(communicate_to_coarse,
                      TF(ReceiveBoundBufs<BoundaryType::gmg_prolongate_recv>), md_res_err);
@@ -488,11 +547,16 @@ class MGSolver : public SolverBase, MGSolverCounter {
                      md_res_err);
       auto prolongate =
           prolongator_.template Prolongate<FieldTL>(tl, set_from_coarser, md_res_err);
+      timer_pro->StopCollectingTasks();
 
       // 7. Correct solution on this level with res_err field and store in
       //    communication field
+      auto timer_u =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Update u", level), tl);
+      timer_u->StartCollectingTasks();
       auto update_sol = tl.AddTask(prolongate, BTF(AddFieldsAndStore<FieldTL, true>),
                                    md_u, md_res_err, md_u, 1.0, 1.0);
+      timer_u->StopCollectingTasks();
 
       // 8. Post smooth using communication field and stored RHS
       post_smooth = AddSRJIteration<BoundaryType::gmg_same>(
@@ -507,6 +571,8 @@ class MGSolver : public SolverBase, MGSolverCounter {
     // level)
     TaskID last_task = post_smooth;
     if (level < max_level) {
+      auto timer_guard = TimingAccumulatorGuard(
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Prolong send", level), tl));
       auto copy_over = post_smooth;
       if (!do_FAS) {
         copy_over =
@@ -518,8 +584,12 @@ class MGSolver : public SolverBase, MGSolverCounter {
       }
       // This is required to make sure boundaries of res_err are up to date before
       // prolongation
+      auto timer_comm =
+          solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
+      timer_comm->StartCollectingTasks();
       auto boundary = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(
           copy_over, tl, md_res_err, multilevel, BCFunc);
+      timer_comm->StopCollectingTasks();
       last_task = tl.AddTask(
           boundary, BTF(SendBoundBufs<BoundaryType::gmg_prolongate_send>), md_res_err);
     }

diff --git a/src/solvers/solver_base.hpp b/src/solvers/solver_base.hpp
@@ -66,6 +66,8 @@ class SolverBase {
 
   const std::vector<std::string> &GetFieldLabels() const { return sol_fields; }
 
+  static inline TimingAccumulatorDictionary solver_timings;
+
  protected:
   // Labels of all fields included in the vector
   std::vector<std::string> sol_fields;