Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions example/poisson_gmg/parthenon_app_inputs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ void ProblemGenerator(Mesh *pm, ParameterInput *pin, MeshData<Real> *md) {
const auto &coords = pack.GetCoordinates(b);
Real x1 = coords.X<1, te>(i);
Real x2 = coords.X<2, te>(j);
Real x3 = coords.X<2, te>(k);
Real x3 = coords.X<3, te>(k);
Real x1f = coords.X<1, TE::F1>(k, j, i);
Real x2f = coords.X<2, TE::F2>(k, j, i);
Real x3f = coords.X<2, TE::F3>(k, j, i);
Real x3f = coords.X<3, TE::F3>(k, j, i);
Comment on lines -61 to +64
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a bug in the code that needed to be fixed somewhere

Real dx1 = coords.Dxc<1>(k, j, i);
Real dx2 = coords.Dxc<2>(k, j, i);
Real dx3 = coords.Dxc<3>(k, j, i);
Expand Down
16 changes: 8 additions & 8 deletions example/poisson_gmg/poisson_equation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ class PoissonEquation {

auto desc_mat = parthenon::MakePackDescriptor<D_t>(md_mat.get());
auto desc_diag = parthenon::MakePackDescriptor<var_t>(md_diag.get());
auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
auto pack_diag = desc_diag.GetPack(md_diag.get(), include_block);
auto pack_mat = desc_mat.GetPack(md_mat.get());
auto pack_diag = desc_diag.GetPack(md_diag.get());
using TE = parthenon::TopologicalElement;
parthenon::par_for(
"StoreDiagonal", 0, pack_mat.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
Expand Down Expand Up @@ -158,9 +158,9 @@ class PoissonEquation {
std::vector<bool> include_block(nblocks, true);

auto desc = parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
auto pack = desc.GetPack(md.get(), include_block);
auto pack = desc.GetPack(md.get());
auto desc_mat = parthenon::MakePackDescriptor<D_t>(md_mat.get(), {});
auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
auto pack_mat = desc_mat.GetPack(md_mat.get());
parthenon::par_for(
"CaclulateFluxes", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
Expand Down Expand Up @@ -217,8 +217,8 @@ class PoissonEquation {

auto desc = parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
auto desc_mat = parthenon::MakePackDescriptor<D_t>(md.get());
auto pack = desc.GetPack(md.get(), include_block);
auto pack_mat = desc_mat.GetPack(md_mat.get(), include_block);
auto pack = desc.GetPack(md.get());
auto pack_mat = desc_mat.GetPack(md_mat.get());
const std::size_t scratch_size_in_bytes = 0;
const std::size_t scratch_level = 1;

Expand Down Expand Up @@ -298,8 +298,8 @@ class PoissonEquation {
static auto desc =
parthenon::MakePackDescriptor<var_t>(md.get(), {}, {PDOpt::WithFluxes});
static auto desc_out = parthenon::MakePackDescriptor<var_t>(md_out.get());
auto pack = desc.GetPack(md.get(), include_block);
auto pack_out = desc_out.GetPack(md_out.get(), include_block);
auto pack = desc.GetPack(md.get());
auto pack_out = desc_out.GetPack(md_out.get());
parthenon::par_for(
"FluxMultiplyMatrix", 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
Expand Down
51 changes: 39 additions & 12 deletions src/solvers/bicgstab_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,11 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
// 1. u <- M p
auto precon1 = reset;
if (params_.precondition_type == Preconditioner::Multigrid) {
auto timer = solver_timings.GetOrAddAndRegister("BiCGSTAB: Precon setup", itl);
timer->StartCollectingTasks();
auto set_rhs = itl.AddTask(precon1, TF(CopyData<FieldTL>), md_p, md_rhs);
auto zero_u = itl.AddTask(precon1, TF(SetToZero<FieldTL>), md_u);
timer->StopCollectingTasks();
precon1 =
preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh);
} else if (params_.precondition_type == Preconditioner::Diagonal) {
Expand All @@ -222,11 +225,19 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
}

// 2. v <- A u
auto timer_Auv = solver_timings.GetOrAddAndRegister("BiCGSTAB: Au -> v", itl);
auto timer_comm = solver_timings.GetOrAddAndRegister("BiCGSTAB: Boundary", itl);
timer_Auv->StartCollectingTasks();
timer_comm->StartCollectingTasks();
auto comm = AddBoundaryExchangeTasks<BoundaryType::any>(precon1, itl, md_u,
multilevel, BCFunc);
timer_comm->StopCollectingTasks();
auto get_v = eqs_.Ax(itl, comm, md_base, md_u, md_v);
timer_Auv->StopCollectingTasks();

// 3. rhat0v <- (rhat0, v)
auto timer_alpha = solver_timings.GetOrAddAndRegister("BiCGSTAB: alpha update", itl);
timer_alpha->StartCollectingTasks();
auto get_rhat0v = DotProduct<FieldTL>(get_v, itl, &rhat0v, md_rhat0, md_v);

// 4. h <- x + alpha u (alpha = rhat0r_old / rhat0v)
Expand All @@ -250,23 +261,29 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
this, md_r, md_v, md_s);

// Check and print out residual
auto get_res = DotProduct<FieldTL>(correct_s, itl, &residual, md_s, md_s);

auto print = itl.AddTask(
TaskQualifier::once_per_region, get_res,
[&](BiCGSTABSolver *solver, Mesh *pmesh) {
Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells());
if (Globals::my_rank == 0 && solver->params_.print_per_step)
printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res);
return TaskStatus::complete;
},
this, pmesh);
if (params_.print_per_step) {
auto get_res = DotProduct<FieldTL>(correct_s, itl, &residual, md_s, md_s);

auto print = itl.AddTask(
TaskQualifier::once_per_region, get_res,
[&](BiCGSTABSolver *solver, Mesh *pmesh) {
Real rms_res = std::sqrt(solver->residual.val / pmesh->GetTotalCells());
if (Globals::my_rank == 0 && solver->params_.print_per_step)
printf("%i %e\n", solver->iter_counter * 2 + 1, rms_res);
return TaskStatus::complete;
},
this, pmesh);
}
timer_alpha->StopCollectingTasks();

// 6. u <- M s
auto precon2 = correct_s;
if (params_.precondition_type == Preconditioner::Multigrid) {
auto timer = solver_timings.GetOrAddAndRegister("BiCGSTAB: Precon setup", itl);
timer->StartCollectingTasks();
auto set_rhs = itl.AddTask(precon2, TF(CopyData<FieldTL>), md_s, md_rhs);
auto zero_u = itl.AddTask(precon2, TF(SetToZero<FieldTL>), md_u);
timer->StopCollectingTasks();
precon2 =
preconditioner.AddLinearOperatorTasks(itl, set_rhs | zero_u, partition, pmesh);
} else if (params_.precondition_type == Preconditioner::Diagonal) {
Expand All @@ -276,11 +293,18 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
}

// 7. t <- A u
auto timer_Aut = solver_timings.GetOrAddAndRegister("BiCGSTAB: Au -> t", itl);
timer_comm->StartCollectingTasks();
timer_Aut->StartCollectingTasks();
auto pre_t_comm = AddBoundaryExchangeTasks<BoundaryType::any>(precon2, itl, md_u,
multilevel, BCFunc);
timer_comm->StopCollectingTasks();
auto get_t = eqs_.Ax(itl, pre_t_comm, md_base, md_u, md_t);
timer_Aut->StopCollectingTasks();

// 8. omega <- (t,s) / (t,t)
auto timer_omega = solver_timings.GetOrAddAndRegister("BiCGSTAB: omega update", itl);
timer_omega->StartCollectingTasks();
auto get_ts = DotProduct<FieldTL>(get_t, itl, &ts, md_t, md_s);
auto get_tt = DotProduct<FieldTL>(get_t, itl, &tt, md_t, md_t);

Expand All @@ -303,8 +327,11 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
return AddFieldsAndStore<FieldTL>(md_s, md_t, md_r, 1.0, -omega);
},
this, md_s, md_t, md_r);
timer_omega->StopCollectingTasks();

// Check and print out residual
auto timer_res = solver_timings.GetOrAddAndRegister("BiCGSTAB: residual", itl);
timer_res->StartCollectingTasks();
auto get_res2 = DotProduct<FieldTL>(correct_r, itl, &residual, md_r, md_r);

get_res2 = itl.AddTask(
Expand Down Expand Up @@ -355,7 +382,7 @@ class BiCGSTABSolver : public SolverBase, BiCGSTABSolverCounter {
},
this, pmesh, params_.max_iters, params_.residual_tolerance,
params_.relative_residual);

timer_res->StopCollectingTasks();
return tl.AddTask(solver_id, TF(CopyData<FieldTL>), md_x, md_u);
}

Expand Down
70 changes: 70 additions & 0 deletions src/solvers/mg_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ class MGSolver : public SolverBase, MGSolverCounter {
&iter_counter);
auto mg_finest = AddLinearOperatorTasks(itl, update_iter, partition, pmesh);

auto timing_guard =
TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister("MG: Residual", tl));

auto partitions = pmesh->GetDefaultBlockPartitions(GridIdentifier::leaf());
if (partition >= partitions.size())
PARTHENON_FAIL("Does not work with non-default partitioning.");
Expand Down Expand Up @@ -279,6 +282,10 @@ class MGSolver : public SolverBase, MGSolverCounter {
return TaskStatus::complete;
}

std::string GetTimeLabel(const std::string &name, int level) {
return "MG: " + name + " [level = " + std::to_string(level) + "]";
}

template <parthenon::BoundaryType comm_boundary>
TaskID AddJacobiIteration(TaskList &tl, TaskID depends_on, bool multilevel, Real omega,
int partition, int level,
Expand All @@ -292,9 +299,19 @@ class MGSolver : public SolverBase, MGSolverCounter {
auto &md_rhs = pmesh->mesh_data.Add(container_rhs, partitions[partition], sol_fields);
auto &md_diag = pmesh->mesh_data.Add(container_diag, md_base, sol_fields);

auto time_comm =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
time_comm->StartCollectingTasks();
auto comm = AddBoundaryExchangeTasks<comm_boundary>(depends_on, tl, md_in, multilevel,
BCFunc);
time_comm->StopCollectingTasks();
auto time_ax =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Jacobi Ax", level), tl);
time_ax->StartCollectingTasks();
auto mat_mult = eqs_.Ax(tl, comm, md_base, md_in, md_out);
time_ax->StopCollectingTasks();
auto guard = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Jacobi", level), tl));
return tl.AddTask(mat_mult, TF(&MGSolver::Jacobi), this, md_rhs, md_out, md_diag,
md_in, md_out, omega);
}
Expand All @@ -319,6 +336,9 @@ class MGSolver : public SolverBase, MGSolverCounter {
std::array<std::array<Real, 3>, 3> omega_M3{
{{0.9372, 0.6667, 0.5173}, {1.6653, 0.8000, 0.5264}, {2.2473, 0.8571, 0.5296}}};

auto timing_guard = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Smooth", level), tl));

if (stages == 0) return depends_on;
auto omega = omega_M1;
if (stages == 2) omega = omega_M2;
Expand Down Expand Up @@ -353,13 +373,22 @@ class MGSolver : public SolverBase, MGSolverCounter {

auto task_out = dependence;
if (level < max_level) {
auto timing_guard = TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister(
GetTimeLabel("Setup restrict recv", level), tl));
task_out =
tl.AddTask(task_out, TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md);
task_out = tl.AddTask(task_out, TF(SetBounds<BoundaryType::gmg_restrict_recv>), md);
}
auto timer = solver_timings.GetOrAddAndRegister(
GetTimeLabel("Setup restrict send", level), tl);
timer->StartCollectingTasks();
task_out = tl.AddTask(task_out, TF(&equations_t::SetDiagonal), &eqs_, md, md_diag);
timer->StopCollectingTasks();

// If we are finer than the coarsest level:
if (level > min_level) {
auto timing_guard = TimingAccumulatorGuard(solver_timings.GetOrAddAndRegister(
GetTimeLabel("Setup restrict send", level), tl));
task_out =
tl.AddTask(task_out, TF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md);
}
Expand Down Expand Up @@ -412,12 +441,18 @@ class MGSolver : public SolverBase, MGSolverCounter {
auto &md_u0 = pmesh->mesh_data.Add(container_u0, md, sol_fields);
auto &md_diag = pmesh->mesh_data.Add(container_diag, md, sol_fields);

auto timer_guard_total = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Total", level), tl));

// 0. Receive residual from coarser level if there is one
auto set_from_finer = dependence;
if (level < max_level) {
// Fill fields with restricted values
// TODO(LFR): Need to make sure that this communication pattern is ok, since we are
// trying to concurrently communicate on two stages
auto timer =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Restrict recv", level), tl);
timer->StartCollectingTasks();
auto recv_from_finer = tl.AddTask(
dependence, TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md_u);
set_from_finer = tl.AddTask(recv_from_finer,
Expand All @@ -427,6 +462,7 @@ class MGSolver : public SolverBase, MGSolverCounter {
TF(ReceiveBoundBufs<BoundaryType::gmg_restrict_recv>), md_res_err);
set_from_finer = tl.AddTask(
recv_from_finer, BTF(SetBounds<BoundaryType::gmg_restrict_recv>), md_res_err);
timer->StopCollectingTasks();
// 1. Copy residual from dual purpose communication field to the rhs, should be
// actual RHS for finest level
if (!do_FAS) {
Expand All @@ -439,8 +475,14 @@ class MGSolver : public SolverBase, MGSolverCounter {
// to make sure that the boundaries of the restricted u are up to date before
// calling Ax. That being said, at least in one case commenting this line out
// didn't seem to impact the solution.
auto timer_guard = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Ax rhs", level), tl));
auto timer_comm =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
timer_comm->StartCollectingTasks();
set_from_finer = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(
set_from_finer, tl, md_u, multilevel, BCFunc);
timer_comm->StopCollectingTasks();
set_from_finer =
tl.AddTask(set_from_finer, BTF(CopyData<FieldTL, true>), md_u, md_u0);
// This should set the rhs only in blocks that correspond to interior nodes, the
Expand All @@ -452,6 +494,8 @@ class MGSolver : public SolverBase, MGSolverCounter {
md_temp, md_res_err, md_rhs, 1.0, 1.0, true);
}
} else {
auto timer_guard = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Copy u0", level), tl));
set_from_finer =
tl.AddTask(set_from_finer, BTF(CopyData<FieldTL, true>), md_u, md_u0);
}
Expand All @@ -462,24 +506,39 @@ class MGSolver : public SolverBase, MGSolverCounter {
// If we are finer than the coarsest level:
auto post_smooth = pre_smooth;
if (level > min_level) {
auto timer_ax =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Ax residual", level), tl);
timer_ax->StartCollectingTasks();
// 3. Communicate same level boundaries so that u is up to date everywhere
auto timer_comm =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
timer_comm->StartCollectingTasks();
auto comm_u = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(pre_smooth, tl, md_u,
multilevel, BCFunc);
timer_comm->StopCollectingTasks();

// 4. Caclulate residual and store in communication field
auto residual = eqs_.Ax(tl, comm_u, md, md_u, md_temp);
residual = tl.AddTask(residual, BTF(AddFieldsAndStoreInteriorSelect<FieldTL, true>),
md_rhs, md_temp, md_res_err, 1.0, -1.0, false);
timer_ax->StopCollectingTasks();

// 5. Restrict communication field and send to next level
// TODO(LFR): Other place where we are receiving two stage communication
auto timer_res =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Restrict send", level), tl);
timer_res->StartCollectingTasks();
auto communicate_to_coarse =
tl.AddTask(residual, BTF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md_u);
communicate_to_coarse =
tl.AddTask(communicate_to_coarse,
BTF(SendBoundBufs<BoundaryType::gmg_restrict_send>), md_res_err);
timer_res->StopCollectingTasks();

// 6. Receive error field into communication field and prolongate
auto timer_pro =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Prolong recv", level), tl);
timer_pro->StartCollectingTasks();
auto recv_from_coarser =
tl.AddTask(communicate_to_coarse,
TF(ReceiveBoundBufs<BoundaryType::gmg_prolongate_recv>), md_res_err);
Expand All @@ -488,11 +547,16 @@ class MGSolver : public SolverBase, MGSolverCounter {
md_res_err);
auto prolongate =
prolongator_.template Prolongate<FieldTL>(tl, set_from_coarser, md_res_err);
timer_pro->StopCollectingTasks();

// 7. Correct solution on this level with res_err field and store in
// communication field
auto timer_u =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Update u", level), tl);
timer_u->StartCollectingTasks();
auto update_sol = tl.AddTask(prolongate, BTF(AddFieldsAndStore<FieldTL, true>),
md_u, md_res_err, md_u, 1.0, 1.0);
timer_u->StopCollectingTasks();

// 8. Post smooth using communication field and stored RHS
post_smooth = AddSRJIteration<BoundaryType::gmg_same>(
Expand All @@ -507,6 +571,8 @@ class MGSolver : public SolverBase, MGSolverCounter {
// level)
TaskID last_task = post_smooth;
if (level < max_level) {
auto timer_guard = TimingAccumulatorGuard(
solver_timings.GetOrAddAndRegister(GetTimeLabel("Prolong send", level), tl));
auto copy_over = post_smooth;
if (!do_FAS) {
copy_over =
Expand All @@ -518,8 +584,12 @@ class MGSolver : public SolverBase, MGSolverCounter {
}
// This is required to make sure boundaries of res_err are up to date before
// prolongation
auto timer_comm =
solver_timings.GetOrAddAndRegister(GetTimeLabel("Boundary", level), tl);
timer_comm->StartCollectingTasks();
auto boundary = AddBoundaryExchangeTasks<BoundaryType::gmg_same>(
copy_over, tl, md_res_err, multilevel, BCFunc);
timer_comm->StopCollectingTasks();
last_task = tl.AddTask(
boundary, BTF(SendBoundBufs<BoundaryType::gmg_prolongate_send>), md_res_err);
}
Expand Down
2 changes: 2 additions & 0 deletions src/solvers/solver_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class SolverBase {

const std::vector<std::string> &GetFieldLabels() const { return sol_fields; }

static inline TimingAccumulatorDictionary solver_timings;

protected:
// Labels of all fields included in the vector
std::vector<std::string> sol_fields;
Expand Down
Loading