From 662947fa83f15488f2554cc1809b51c80fabaa00 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 24 Nov 2021 22:15:45 +0100
Subject: [PATCH 01/52] field boundary change and less kernel launches

---
 src/fields/Fields.H                           |  43 ++-
 src/fields/Fields.cpp                         | 351 +++++++++++++-----
 .../FFTPoissonSolverDirichlet.cpp             |  62 ++--
 .../fft_poisson_solver/fft/WrapCuDST.cpp      |  14 +-
 4 files changed, 328 insertions(+), 142 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index c9b2e00c82..93454c7e85 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -44,10 +44,14 @@ static std::array<std::map<std::string, int>, 5> Comps
             }}
     }};
 
-/** \brief Operation performed in the TransverseDerivative function:
- * either assign or add to the destination array
- */
-enum struct SliceOperatorType { Assign, Add };
+ struct FieldView {
+     amrex::MultiFab& m_mfab;
+     int m_comp;
+
+     auto array (amrex::MFIter& mfi) const {
+         return m_mfab.array(mfi, m_comp);
+     }
+ };
 
 /** \brief Direction of each dimension. Can be used for clean handling 2D vs. 3D in the future */
 struct Direction{
@@ -94,6 +98,15 @@ public:
      * \param[in] islice slice index
      */
     amrex::MultiFab& getSlices (int lev, int islice) {return m_slices[lev][islice]; }
+
+    FieldView getField (const int lev, const int islice, const std::string comp) {
+        return FieldView{getSlices(lev, islice), Comps[islice][comp]};
+    }
+
+    FieldView getStagingArea (const int lev) {
+        return FieldView{m_poisson_solver[lev]->StagingArea(), 0};
+    }
+
     /** Return reference to density tile arrays */
     amrex::Vector<amrex::FArrayBox>& getTmpDensities() { return m_tmp_densities; }
     /** \brief Copy between the full FArrayBox and slice MultiFab.
@@ -146,22 +159,22 @@ public:
      * \param[in] scomp compent of source MF
      * \param[in] lev level of mesh refinement
      */
-    void CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
-                            const int scomp, const int lev);
+    /*void CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
+                            const int scomp, const int lev);*/
 
     /** Compute transverse derivative of 1 slice*/
-    void TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
-                               const int direction, const amrex::Real dx,
+    /*void TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
+                               const int direction, const amrex::Real dx, const int lev,
                                const amrex::Real mult_coeff=1.,
                                const SliceOperatorType slice_operator=SliceOperatorType::Assign,
-                               const int scomp=0, const int dcomp=0, const bool use_offset=false);
+                               const int scomp=0, const int dcomp=0, const bool use_offset=false);*/
 
     /** Compute longitudinal derivative (difference between two slices) */
-    void LongitudinalDerivative (const amrex::MultiFab& src, const amrex::MultiFab& src2,
-                                 amrex::MultiFab& dst, const amrex::Real dz,
+    /*void LongitudinalDerivative (const amrex::MultiFab& src, const amrex::MultiFab& src2,
+                                 amrex::MultiFab& dst, const amrex::Real dz, const int lev,
                                  const amrex::Real mult_coeff=1.,
                                  const SliceOperatorType slice_operator=SliceOperatorType::Assign,
-                                 const int sc1omp=0, const int s2comp=0, const int dcomp=0);
+                                 const int sc1omp=0, const int s2comp=0, const int dcomp=0);*/
 
     /** \brief Interpolate values at boundaries from coarse grid to the fine grid
      *
@@ -288,6 +301,12 @@ public:
 
     /** Number of guard cells for slices MultiFab */
     static amrex::IntVect m_slices_nguards;
+    static amrex::IntVect m_valid_nguards;
+
+    static amrex::Vector<amrex::Box> m_box_problem;
+    static amrex::Vector<amrex::Box> m_box_source;
+    static amrex::Vector<amrex::Box> m_box_valid;
+    static amrex::Vector<amrex::Box> m_box_all;
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
     amrex::Vector<std::array<amrex::MultiFab, m_nslices>> m_slices;
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index a334093482..26f8686697 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -7,10 +7,21 @@
 #include "particles/ShapeFactors.H"
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
+amrex::IntVect Fields::m_valid_nguards = {-1, -1, -1};
+
+amrex::Vector<amrex::Box> Fields::m_box_problem{};
+amrex::Vector<amrex::Box> Fields::m_box_source{};
+amrex::Vector<amrex::Box> Fields::m_box_valid{};
+amrex::Vector<amrex::Box> Fields::m_box_all{};
 
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
 {
+    const int max_lev = a_hipace->maxLevel()+1;
+    m_box_problem.resize(max_lev);
+    m_box_source.resize(max_lev);
+    m_box_valid.resize(max_lev);
+    m_box_all.resize(max_lev);
     amrex::ParmParse ppf("fields");
     queryWithParser(ppf, "do_dirichlet_poisson", m_do_dirichlet_poisson);
 }
@@ -21,9 +32,28 @@ Fields::AllocData (
     const amrex::DistributionMapping& slice_dm, int bin_size)
 {
     HIPACE_PROFILE("Fields::AllocData()");
-    // Need at least 1 guard cell transversally for transverse derivative
+    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(slice_ba.size() == 1,
+        "Parallel field solvers not supported yet");
+
+    // Need 1 extra guard cell transversally for transverse derivative of Psi
     int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
-    m_slices_nguards = {nguards_xy, nguards_xy, 0};
+    m_valid_nguards = {nguards_xy, nguards_xy, 0};
+    m_slices_nguards = {nguards_xy + 1, nguards_xy + 1, 0};
+
+    // box where the problem is defined
+    m_box_problem[lev] = slice_ba[0];
+
+    // source terms of derivatives can be only in here
+    m_box_source[lev] = m_box_problem[lev];
+    m_box_source[lev].grow({-1, -1, 0});
+
+    // contains valid fileds that can be used for particles, also the box of the Poisson solver
+    m_box_valid[lev] = m_box_problem[lev];
+    m_box_valid[lev].grow(m_valid_nguards);
+
+    // valid box for Psi
+    m_box_all[lev] = m_box_problem[lev];
+    m_box_all[lev].grow(m_slices_nguards);
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
         m_slices[lev][islice].define(
@@ -68,60 +98,160 @@ Fields::AllocData (
     }
 }
 
+
+struct derivative_x_GPU {
+    amrex::Array4<amrex::Real const> array;
+    amrex::Real dx_inv;
+
+    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
+        return (array(i+1,j,k) - array(i-1,j,k)) * dx_inv;
+    }
+};
+
+struct derivative_y_GPU {
+    amrex::Array4<amrex::Real const> array;
+    amrex::Real dy_inv;
+
+    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
+        return (array(i,j+1,k) - array(i,j-1,k)) * dy_inv;
+    }
+};
+
+struct derivative_z_GPU {
+    amrex::Array4<amrex::Real const> array1;
+    amrex::Array4<amrex::Real const> array2;
+    amrex::Real dz_inv;
+
+    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
+        return (array1(i,j,k) - array2(i,j,k)) * dz_inv;
+    }
+};
+
+
+
+struct derivative_x {
+    FieldView f_view;
+    amrex::Real dx;
+
+    derivative_x_GPU array (amrex::MFIter& mfi) const {
+        return derivative_x_GPU{f_view.array(mfi), 1/(2*dx)};
+    }
+};
+
+struct derivative_y {
+    FieldView f_view;
+    amrex::Real dy;
+
+    derivative_y_GPU array (amrex::MFIter& mfi) const {
+        return derivative_y_GPU{f_view.array(mfi), 1/(2*dy)};
+    }
+};
+
+struct derivative_z {
+    FieldView f_view1;
+    FieldView f_view2;
+    amrex::Real dz;
+
+    derivative_z_GPU array (amrex::MFIter& mfi) const {
+        return derivative_z_GPU{f_view1.array(mfi), f_view2.array(mfi), 1/(2*dz)};
+    }
+};
+
+template<class FVA, class FVB>
+void
+FieldOperation (const amrex::Box op_box, FieldView dst,
+                const amrex::Real factor_a, const FVA src_a,
+                const amrex::Real factor_b, const FVB src_b,
+                const amrex::Box valid_box)
+{
+    HIPACE_PROFILE("Fields::FieldOperation()");
+
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU());
+          mfi.isValid(); ++mfi ){
+        const auto& dst_array = dst.array(mfi);
+        const auto src_a_array = src_a.array(mfi);
+        const auto src_b_array = src_b.array(mfi);
+        const amrex::Box bx = mfi.tilebox() & op_box;
+        const int i_lo = valid_box.smallEnd(0);
+        const int i_hi = valid_box.bigEnd(0);
+        const int j_lo = valid_box.smallEnd(1);
+        const int j_hi = valid_box.bigEnd(1);
+
+        amrex::ParallelFor(
+            bx,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            {
+                dst_array(i,j,k) = (factor_a*src_a_array(i,j,k) + factor_b*src_b_array(i,j,k))
+                                  *(i_lo<=i && i<=i_hi && j_lo<=j && j<=j_hi);
+            });
+    }
+}
+
+template<class FV>
+void
+UnaryFieldOperation (const amrex::Box op_box, FieldView dst,
+                     const amrex::Real factor, const FV src)
+{
+    HIPACE_PROFILE("Fields::FieldOperation()");
+
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU());
+          mfi.isValid(); ++mfi ){
+        const auto& dst_array = dst.array(mfi);
+        const auto src_array = src.array(mfi);
+        const amrex::Box bx = mfi.tilebox() & op_box;
+
+        amrex::ParallelFor(
+            bx,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            {
+                dst_array(i,j,k) = factor*src_array(i,j,k);
+            });
+    }
+}
+
+/*
 void
 Fields::CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
                            const int scomp, const int lev)
 {
     HIPACE_PROFILE("Fields::CopyToStagingArea()");
 
-    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(src.size() == 1, "Slice MFs must be defined on one box only");
-    const amrex::FArrayBox& src_fab = src[0];
-    amrex::Box src_bx = src_fab.box();
-    src_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-    amrex::IntVect lo = src_bx.smallEnd();
-
-    if (lo[0] == 0 && lo[1] == 0) {
-        if (slice_operator==SliceOperatorType::Assign) {
-            amrex::MultiFab::Copy(m_poisson_solver[lev]->StagingArea(),
-                                  getSlices(lev,WhichSlice::This), Comps[WhichSlice::This]["jz"], 0,
-                                  1, 0);
-        } else {
-            amrex::MultiFab::Add(m_poisson_solver[lev]->StagingArea(),
-                                 getSlices(lev,WhichSlice::This), Comps[WhichSlice::This]["rho"], 0,
-                                 1, 0);
-        }
-    } else {
-
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-        for ( amrex::MFIter mfi(m_poisson_solver[lev]->StagingArea(), amrex::TilingIfNotGPU());
-              mfi.isValid(); ++mfi ){
-            const amrex::Box& bx = mfi.tilebox();
-            amrex::Array4<amrex::Real const> const & src_array = src.array(mfi);
-            amrex::Array4<amrex::Real> const & dst_array = m_poisson_solver[lev]
-                                                           ->StagingArea().array(mfi);
-
-            amrex::ParallelFor(
-                bx,
-                [=] AMREX_GPU_DEVICE(int i, int j, int k)
+    for ( amrex::MFIter mfi(m_poisson_solver[lev]->StagingArea(), amrex::TilingIfNotGPU());
+          mfi.isValid(); ++mfi ){
+        const amrex::Box& bx = mfi.tilebox();
+        amrex::Array4<amrex::Real const> const & src_array = src.array(mfi);
+        amrex::Array4<amrex::Real> const & dst_array = m_poisson_solver[lev]
+                                                       ->StagingArea().array(mfi);
+        amrex::Box prob_box = m_box_source[lev];
+
+        amrex::ParallelFor(
+            bx,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            {
+                if (slice_operator==SliceOperatorType::Assign) {
+                    dst_array(i,j,k,0) = src_array(i,j,k,scomp) * prob_box.contains(i,j,k);
+                }
+                else
                 {
-                    if (slice_operator==SliceOperatorType::Assign) {
-                        dst_array(i,j,k,0) = src_array(i+lo[0], j+lo[1], k, scomp);
-                    }
-                    else /* SliceOperatorType::Add */
-                    {
-                        dst_array(i,j,k,0) += src_array(i+lo[0], j+lo[1], k, scomp);
-                    }
+                    dst_array(i,j,k,0) += src_array(i,j,k,scomp) * prob_box.contains(i,j,k);
                 }
-                );
-        }
+            }
+            );
     }
 }
 
 void
 Fields::TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst, const int direction,
-                              const amrex::Real dx, const amrex::Real mult_coeff,
+                              const amrex::Real dx, const int lev, const amrex::Real mult_coeff,
                               const SliceOperatorType slice_operator, const int scomp,
                               const int dcomp, const bool use_offset)
 {
@@ -129,17 +259,8 @@ Fields::TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
     using namespace amrex::literals;
 
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(src.size() == 1, "Slice MFs must be defined on one box only");
-    amrex::IntVect lo;
-    if (use_offset) {
-        const amrex::FArrayBox& src_fab = src[0];
-        amrex::Box src_bx = src_fab.box();
-        src_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-        lo = src_bx.smallEnd();
-    } else {
-        lo = {0, 0, 0};
-    }
-
     AMREX_ALWAYS_ASSERT((direction == Direction::x) || (direction == Direction::y));
+
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
@@ -152,32 +273,32 @@ Fields::TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
             {
                 if (direction == Direction::x){
-                    /* finite difference along x */
+
                     if (slice_operator==SliceOperatorType::Assign)
                     {
                         dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dx) *
-                                                 (src_array(i+1+lo[0], j+lo[1], k, scomp)
-                                                  - src_array(i-1+lo[0], j+lo[1], k, scomp));
+                                                 (src_array(i+1, j, k, scomp)
+                                                  - src_array(i-1, j, k, scomp));
                     }
-                    else /* SliceOperatorType::Add */
+                    else
                     {
                         dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dx) *
-                                                  (src_array(i+1+lo[0], j+lo[1], k, scomp)
-                                                   - src_array(i-1+lo[0], j+lo[1], k, scomp));
+                                                  (src_array(i+1, j, k, scomp)
+                                                   - src_array(i-1, j, k, scomp));
                     }
-                } else /* Direction::y */ {
-                    /* finite difference along y */
+                } else  {
+
                     if (slice_operator==SliceOperatorType::Assign)
                     {
                         dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dx) *
-                                                 (src_array(i+lo[0], j+1+lo[1], k, scomp)
-                                                  - src_array(i+lo[0], j-1+lo[1], k, scomp));
+                                                 (src_array(i, j+1, k, scomp)
+                                                  - src_array(i, j-1, k, scomp));
                     }
-                    else /* SliceOperatorType::Add */
+
                     {
                         dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dx) *
-                                                  (src_array(i+lo[0], j+1+lo[1], k, scomp)
-                                                   - src_array(i+lo[0], j-1+lo[1], k, scomp));
+                                                  (src_array(i, j+1, k, scomp)
+                                                   - src_array(i, j-1, k, scomp));
                     }
                 }
             }
@@ -187,7 +308,7 @@ Fields::TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
 
 void
 Fields::LongitudinalDerivative (const amrex::MultiFab& src1, const amrex::MultiFab& src2,
-                                amrex::MultiFab& dst, const amrex::Real dz,
+                                amrex::MultiFab& dst, const amrex::Real dz, const int lev,
                                 const amrex::Real mult_coeff,
                                 const SliceOperatorType slice_operator, const int s1comp,
                                 const int s2comp, const int dcomp)
@@ -196,10 +317,6 @@ Fields::LongitudinalDerivative (const amrex::MultiFab& src1, const amrex::MultiF
     using namespace amrex::literals;
 
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(src1.size() == 1, "Slice MFs must be defined on one box only");
-    const amrex::FArrayBox& src_fab = src1[0];
-    amrex::Box src_bx = src_fab.box();
-    src_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-    amrex::IntVect lo = src_bx.smallEnd();
 
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
@@ -216,21 +333,21 @@ Fields::LongitudinalDerivative (const amrex::MultiFab& src1, const amrex::MultiF
                 if (slice_operator==SliceOperatorType::Assign)
                 {
                     dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dz) *
-                                             (src1_array(i+lo[0], j+lo[1], k, s1comp)
-                                              - src2_array(i+lo[0], j+lo[1], k, s2comp));
+                                             (src1_array(i, j, k, s1comp)
+                                              - src2_array(i, j, k, s2comp));
                 }
-                else /* SliceOperatorType::Add */
+                else
                 {
                     dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dz) *
-                                              (src1_array(i+lo[0], j+lo[1], k, s1comp)
-                                               - src2_array(i+lo[0], j+lo[1], k, s2comp));
+                                              (src1_array(i, j, k, s1comp)
+                                               - src2_array(i, j, k, s2comp));
                 }
 
             }
             );
     }
 }
-
+*/
 
 void
 Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int full_comp,
@@ -359,12 +476,12 @@ Fields::AddBeamCurrents (const int lev, const int which_slice)
     amrex::MultiFab& S = getSlices(lev, which_slice);
     // we add the beam currents to the full currents, as mostly the full currents are needed
     amrex::MultiFab::Add(S, S, Comps[which_slice]["jx_beam"], Comps[which_slice]["jx"], 1,
-                         {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+        m_slices_nguards);
     amrex::MultiFab::Add(S, S, Comps[which_slice]["jy_beam"], Comps[which_slice]["jy"], 1,
-                         {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+        m_slices_nguards);
     if (which_slice == WhichSlice::This) {
         amrex::MultiFab::Add(S, S, Comps[which_slice]["jz_beam"], Comps[which_slice]["jz"], 1,
-                             {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+            m_slices_nguards);
     }
 }
 
@@ -648,29 +765,40 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     InterpolateFromLev0toLev1(geom, lev, "rho", islice);
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Assign,
+    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+                   1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
+                   -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"),
+                   m_box_problem[lev]);
+
+    /*CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Assign,
                        Comps[WhichSlice::This]["jz"], lev);
     m_poisson_solver[lev]->StagingArea().mult(-1./phys_const.c);
     CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Add,
                        Comps[WhichSlice::This]["rho"], lev);
-    m_poisson_solver[lev]->StagingArea().mult(-1./phys_const.ep0);
+    m_poisson_solver[lev]->StagingArea().mult(-1./phys_const.ep0);*/
 
     InterpolateBoundaries(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
     /* ---------- Transverse FillBoundary Psi ---------- */
-    amrex::ParallelContext::push(m_comm_xy);
-    lhs.FillBoundary(geom[lev].periodicity());
-    amrex::ParallelContext::pop();
 
     InterpolateFromLev0toLev1(geom, lev, "Psi", islice);
 
     /* Compute ExmBy and Eypbx from grad(-psi) */
-    TransverseDerivative(
+    UnaryFieldOperation(m_box_valid[lev], getField(lev, WhichSlice::This, "ExmBy"),
+                        -1., derivative_x{getField(lev, WhichSlice::This, "Psi"),
+                        geom[lev].CellSize(Direction::x)});
+
+    UnaryFieldOperation(m_box_valid[lev], getField(lev, WhichSlice::This, "EypBx"),
+                        -1., derivative_y{getField(lev, WhichSlice::This, "Psi"),
+                        geom[lev].CellSize(Direction::y)});
+
+    /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
         getSlices(lev, WhichSlice::This),
         Direction::x,
         geom[lev].CellSize(Direction::x),
+        lev,
         -1.,
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["Psi"],
@@ -681,10 +809,11 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         getSlices(lev, WhichSlice::This),
         Direction::y,
         geom[lev].CellSize(Direction::y),
+        lev,
         -1.,
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["Psi"],
-        Comps[WhichSlice::This]["EypBx"]);
+        Comps[WhichSlice::This]["EypBx"]);*/
 }
 
 
@@ -700,11 +829,19 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
                         Comps[WhichSlice::This]["Ez"], 1);
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    TransverseDerivative(
+    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+                   1./(phys_const.ep0*phys_const.c),
+                   derivative_x{getField(lev, WhichSlice::This, "jx"), geom[lev].CellSize(Direction::x)},
+                   1./(phys_const.ep0*phys_const.c),
+                   derivative_y{getField(lev, WhichSlice::This, "jy"), geom[lev].CellSize(Direction::y)},
+                   m_box_source[lev]);
+
+    /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
         m_poisson_solver[lev]->StagingArea(),
         Direction::x,
         geom[lev].CellSize(Direction::x),
+        lev,
         1./(phys_const.ep0*phys_const.c),
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["jx"], 0, 1);
@@ -714,9 +851,10 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
         m_poisson_solver[lev]->StagingArea(),
         Direction::y,
         geom[lev].CellSize(Direction::y),
+        lev,
         1./(phys_const.ep0*phys_const.c),
         SliceOperatorType::Add,
-        Comps[WhichSlice::This]["jy"], 0, 1);
+        Comps[WhichSlice::This]["jy"], 0, 1);*/
 
     InterpolateBoundaries(geom, lev, "Ez", islice);
     // Solve Poisson equation.
@@ -735,11 +873,20 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
     PhysConst phys_const = get_phys_const();
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    TransverseDerivative(
+    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+                   -phys_const.mu0,
+                   derivative_y{getField(lev, WhichSlice::This, "jz"), geom[lev].CellSize(Direction::y)},
+                   phys_const.mu0,
+                   derivative_z{getField(lev, WhichSlice::Previous1, "jy"),
+                   getField(lev, WhichSlice::Next, "jy"), geom[lev].CellSize(Direction::z)},
+                   m_box_source[lev]);
+
+    /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
         m_poisson_solver[lev]->StagingArea(),
         Direction::y,
         geom[lev].CellSize(Direction::y),
+        lev,
         -phys_const.mu0,
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["jz"], 0, 1);
@@ -749,10 +896,11 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
         getSlices(lev, WhichSlice::Next),
         m_poisson_solver[lev]->StagingArea(),
         geom[lev].CellSize(Direction::z),
+        lev,
         phys_const.mu0,
         SliceOperatorType::Add,
         Comps[WhichSlice::Previous1]["jy"],
-        Comps[WhichSlice::Next]["jy"]);
+        Comps[WhichSlice::Next]["jy"]);*/
 
     InterpolateBoundaries(geom, lev, "Bx", islice);
     // Solve Poisson equation.
@@ -771,11 +919,20 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
     PhysConst phys_const = get_phys_const();
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    TransverseDerivative(
+    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+                   phys_const.mu0,
+                   derivative_x{getField(lev, WhichSlice::This, "jz"), geom[lev].CellSize(Direction::x)},
+                   -phys_const.mu0,
+                   derivative_z{getField(lev, WhichSlice::Previous1, "jx"),
+                   getField(lev, WhichSlice::Next, "jx"), geom[lev].CellSize(Direction::z)},
+                   m_box_source[lev]);
+
+    /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
         m_poisson_solver[lev]->StagingArea(),
         Direction::x,
         geom[lev].CellSize(Direction::x),
+        lev,
         phys_const.mu0,
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["jz"], 0, 1);
@@ -785,10 +942,11 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
         getSlices(lev, WhichSlice::Next),
         m_poisson_solver[lev]->StagingArea(),
         geom[lev].CellSize(Direction::z),
+        lev,
         -phys_const.mu0,
         SliceOperatorType::Add,
         Comps[WhichSlice::Previous1]["jx"],
-        Comps[WhichSlice::Next]["jx"]);
+        Comps[WhichSlice::Next]["jx"]);*/
 
     InterpolateBoundaries(geom, lev, "By", islice);
     // Solve Poisson equation.
@@ -809,11 +967,19 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
                         Comps[WhichSlice::This]["Bz"], 1);
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    TransverseDerivative(
+    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+                   phys_const.mu0,
+                   derivative_y{getField(lev, WhichSlice::This, "jx"), geom[lev].CellSize(Direction::y)},
+                   -phys_const.mu0,
+                   derivative_x{getField(lev, WhichSlice::This, "jy"), geom[lev].CellSize(Direction::x)},
+                   m_box_source[lev]);
+
+    /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
         m_poisson_solver[lev]->StagingArea(),
         Direction::y,
         geom[lev].CellSize(Direction::y),
+        lev,
         phys_const.mu0,
         SliceOperatorType::Assign,
         Comps[WhichSlice::This]["jx"], 0, 1);
@@ -823,9 +989,10 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
         m_poisson_solver[lev]->StagingArea(),
         Direction::x,
         geom[lev].CellSize(Direction::x),
+        lev,
         -phys_const.mu0,
         SliceOperatorType::Add,
-        Comps[WhichSlice::This]["jy"], 0, 1);
+        Comps[WhichSlice::This]["jy"], 0, 1);*/
 
     InterpolateBoundaries(geom, lev, "Bz", islice);
     // Solve Poisson equation.
diff --git a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
index ac4c5ab495..1dac56dd03 100644
--- a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
+++ b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
@@ -23,31 +23,25 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(a_realspace_ba.size() == 1, "Parallel FFT not supported yet");
 
     // Create the box array that corresponds to spectral space
-    amrex::BoxList spectral_bl; // Create empty box list
-    amrex::BoxList real_bl; // Create empty box list<
+    amrex::BoxList real_and_spectral_bl; // Create empty box list
     // Loop over boxes and fill the box list
     for (int i=0; i < a_realspace_ba.size(); i++ ) {
-        // For local FFTs, boxes in spectral space start at 0 in
-        // each direction and have the same number of points as the
-        // (cell-centered) real space box
+        // For local FFTs, boxes in spectral space
+        // are the same as real space boxes, but have one less ghoast cell
         // Define the corresponding box
-        amrex::Box spectral_bx = amrex::Box( amrex::IntVect::TheZeroVector(),
-                          a_realspace_ba[i].length() - amrex::IntVect::TheUnitVector() );
-        spectral_bl.push_back( spectral_bx );
-        amrex::Box real_bx = spectral_bx;
-        real_bx.setSmall(Direction::z, a_realspace_ba[i].smallEnd(Direction::z));
-        real_bx.setBig  (Direction::z, a_realspace_ba[i].bigEnd(Direction::z));
-        real_bl.push_back( real_bx );
+        amrex::Box space_bx = a_realspace_ba[i];
+        space_bx.grow(Fields::m_valid_nguards);
+        real_and_spectral_bl.push_back( space_bx );
     }
-    m_spectralspace_ba.define( std::move(spectral_bl) );
-    amrex::BoxArray real_ba(std::move(real_bl));
+    m_spectralspace_ba.define( std::move(real_and_spectral_bl) );
 
     // Allocate temporary arrays - in real space and spectral space
     // These arrays will store the data just before/after the FFT
     // The stagingArea is also created from 0 to nx, because the real space array may have
     // an offset for levels > 0
-    m_stagingArea = amrex::MultiFab(real_ba, dm, 1, 0);
+    m_stagingArea = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
     m_tmpSpectralField = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
+    m_eigenvalue_matrix = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
     m_stagingArea.setVal(0.0); // this is not required
     m_tmpSpectralField.setVal(0.0);
 
@@ -57,36 +51,35 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_tmpSpectralField.local_size() == 1,
                                      "There should be only one box locally.");
 
+    const amrex::Box fft_box = m_spectralspace_ba[0];
     const auto dx = gm.CellSizeArray();
     const amrex::Real dxsquared = dx[0]*dx[0];
     const amrex::Real dysquared = dx[1]*dx[1];
-    const amrex::Real sine_x_factor = MathConst::pi / ( 2. * ( real_ba[0].length(0) + 1 ));
-    const amrex::Real sine_y_factor = MathConst::pi / ( 2. * ( real_ba[0].length(1) + 1 ));
+    const amrex::Real sine_x_factor = MathConst::pi / ( 2. * ( fft_box.length(0) + 1 ));
+    const amrex::Real sine_y_factor = MathConst::pi / ( 2. * ( fft_box.length(1) + 1 ));
 
     // Normalization of FFTW's 'DST-I' discrete sine transform (FFTW_RODFT00)
     // This normalization is used regardless of the sine transform library
-    const amrex::Real norm_fac = 0.5 / ( 2 * (( real_ba[0].length(0) + 1 )
-                                             *( real_ba[0].length(1) + 1 )));
-
-    m_eigenvalue_matrix = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
+    const amrex::Real norm_fac = 0.5 / ( 2 * (( fft_box.length(0) + 1 )
+                                             *( fft_box.length(1) + 1 )));
 
     // Calculate the array of m_eigenvalue_matrix
     for (amrex::MFIter mfi(m_eigenvalue_matrix); mfi.isValid(); ++mfi ){
         amrex::Array4<amrex::Real> eigenvalue_matrix = m_eigenvalue_matrix.array(mfi);
-        amrex::Box const& bx = mfi.validbox();  // The lower corner of the "2D" slice Box is zero.
+        amrex::IntVect lo = fft_box.smallEnd();
         amrex::ParallelFor(
-            bx, [=] AMREX_GPU_DEVICE (int i, int j, int /* k */) noexcept
+            fft_box, [=] AMREX_GPU_DEVICE (int i, int j, int /* k */) noexcept
                 {
                     /* fast poisson solver diagonal x coeffs */
-                    amrex::Real sinex_sq = sin(( i + 1 ) * sine_x_factor) * sin(( i + 1 ) * sine_x_factor);
+                    amrex::Real sinex_sq = sin(( i - lo[0] + 1 ) * sine_x_factor) * sin(( i - lo[0] + 1 ) * sine_x_factor);
                     /* fast poisson solver diagonal y coeffs */
-                    amrex::Real siney_sq = sin(( j + 1 ) * sine_y_factor) * sin(( j + 1 ) * sine_y_factor);
+                    amrex::Real siney_sq = sin(( j - lo[1] + 1 ) * sine_y_factor) * sin(( j - lo[1] + 1 ) * sine_y_factor);
 
                     if ((sinex_sq!=0) && (siney_sq!=0)) {
-                        eigenvalue_matrix(i,j,0) = norm_fac / ( -4.0 * ( sinex_sq / dxsquared + siney_sq / dysquared ));
+                        eigenvalue_matrix(i,j,lo[2]) = norm_fac / ( -4.0 * ( sinex_sq / dxsquared + siney_sq / dysquared ));
                     } else {
                         // Avoid division by 0
-                        eigenvalue_matrix(i,j,0) = 0._rt;
+                        eigenvalue_matrix(i,j,lo[2]) = 0._rt;
                     }
                 });
     }
@@ -99,7 +92,7 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
         // Note: the size of the real-space box and spectral-space box
         // differ when using real-to-complex FFT. When initializing
         // the FFT plan, the valid dimensions are those of the real-space box.
-        amrex::IntVect fft_size = mfi.validbox().length();
+        amrex::IntVect fft_size = fft_box.length();
         m_plan[mfi] = AnyDST::CreatePlan(
             fft_size, &m_stagingArea[mfi], &m_tmpSpectralField[mfi]);
     }
@@ -137,12 +130,15 @@ FFTPoissonSolverDirichlet::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
                                          "Slice MFs must be defined on one box only");
         const amrex::FArrayBox& lhs_fab = lhs_mf[0];
         amrex::Box lhs_bx = lhs_fab.box();
-        lhs_bx.grow({-Fields::m_slices_nguards[0], -Fields::m_slices_nguards[1], 0});
-        const amrex::IntVect lo = lhs_bx.smallEnd();
-        amrex::ParallelFor( mfi.validbox(),
+        amrex::Box fft_box = m_spectralspace_ba[mfi];
+        amrex::ParallelFor( lhs_bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
-                // Copy and normalize field
-                lhs_arr(i+lo[0],j+lo[1],k) = tmp_real_arr(i,j,k);
+                // Copy field
+                if(fft_box.contains(i,j,k)) {
+                    lhs_arr(i,j,k) = tmp_real_arr(i,j,k);
+                } else {
+                    lhs_arr(i,j,k) = 0;
+                }
             });
     }
 }
diff --git a/src/fields/fft_poisson_solver/fft/WrapCuDST.cpp b/src/fields/fft_poisson_solver/fft/WrapCuDST.cpp
index da3eddb517..7a948529cf 100644
--- a/src/fields/fft_poisson_solver/fft/WrapCuDST.cpp
+++ b/src/fields/fft_poisson_solver/fft/WrapCuDST.cpp
@@ -26,21 +26,22 @@ namespace AnyDST
         const amrex::Box bx = src.box();
         const int nx = bx.length(0);
         const int ny = bx.length(1);
+        const amrex::IntVect lo = bx.smallEnd();
         amrex::Array4<amrex::Real const> const & src_array = src.array();
         amrex::Array4<amrex::Real> const & dst_array = dst.array();
 
         amrex::ParallelFor(
             bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            [=] AMREX_GPU_DEVICE(int i, int j, int)
             {
                 /* upper left quadrant */
-                dst_array(i+1,j+1,0,dcomp) = src_array(i, j, k, scomp);
+                dst_array(i+1,j+1,lo[2],dcomp) = src_array(i, j, lo[2], scomp);
                 /* lower left quadrant */
-                dst_array(i+1,j+ny+2,0,dcomp) = -src_array(i, ny-1-j, k, scomp);
+                dst_array(i+1,j+ny+2,lo[2],dcomp) = -src_array(i, ny-1-j+2*lo[1], lo[2], scomp);
                 /* upper right quadrant */
-                dst_array(i+nx+2,j+1,0,dcomp) = -src_array(nx-1-i, j, k, scomp);
+                dst_array(i+nx+2,j+1,lo[2],dcomp) = -src_array(nx-1-i+2*lo[0], j, lo[2], scomp);
                 /* lower right quadrant */
-                dst_array(i+nx+2,j+ny+2,0,dcomp) = src_array(nx-1-i, ny-1-j, k, scomp);
+                dst_array(i+nx+2,j+ny+2,lo[2],dcomp) = src_array(nx-1-i+2*lo[0], ny-1-j+2*lo[1], lo[2], scomp);
             }
             );
     };
@@ -243,6 +244,9 @@ namespace AnyDST
             // Allocate expanded_fourier_array Complex of size (nx+2, 2*ny+2)
             amrex::Box expanded_position_box {{0, 0, 0}, {2*nx+1, 2*ny+1, 0}};
             amrex::Box expanded_fourier_box {{0, 0, 0}, {nx+1, 2*ny+1, 0}};
+            // shift box to match rest of fields
+            expanded_position_box += position_array->box().smallEnd();
+            expanded_fourier_box += fourier_array->box().smallEnd();
             dst_plan.m_expanded_position_array =
                 std::make_unique<amrex::FArrayBox>(
                     expanded_position_box, 1);

From c61f4f8281ac61e5f62011bbf13463bf6fb8875e Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 25 Nov 2021 00:12:11 +0100
Subject: [PATCH 02/52] fix doc

---
 src/fields/Fields.H   | 9 ---------
 src/fields/Fields.cpp | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 93454c7e85..7d752e5cac 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -152,24 +152,15 @@ public:
      */
     void AddBeamCurrents (const int lev, const int which_slice);
 
-    /** Copy or add array to staging area
-     *
-     * \param[in] src multifab to be copied or added
-     * \param[in] slice_operator operation: SliceOperatorType::Assign or Add
-     * \param[in] scomp compent of source MF
-     * \param[in] lev level of mesh refinement
-     */
     /*void CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
                             const int scomp, const int lev);*/
 
-    /** Compute transverse derivative of 1 slice*/
     /*void TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
                                const int direction, const amrex::Real dx, const int lev,
                                const amrex::Real mult_coeff=1.,
                                const SliceOperatorType slice_operator=SliceOperatorType::Assign,
                                const int scomp=0, const int dcomp=0, const bool use_offset=false);*/
 
-    /** Compute longitudinal derivative (difference between two slices) */
     /*void LongitudinalDerivative (const amrex::MultiFab& src, const amrex::MultiFab& src2,
                                  amrex::MultiFab& dst, const amrex::Real dz, const int lev,
                                  const amrex::Real mult_coeff=1.,
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 26f8686697..dc4b560a47 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -36,7 +36,7 @@ Fields::AllocData (
         "Parallel field solvers not supported yet");
 
     // Need 1 extra guard cell transversally for transverse derivative of Psi
-    int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
+    int nguards_xy = Hipace::m_depos_order_xy;
     m_valid_nguards = {nguards_xy, nguards_xy, 0};
     m_slices_nguards = {nguards_xy + 1, nguards_xy + 1, 0};
 

From 1b44ecd342a2db233bdf923d42124c51edd15dd8 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 25 Nov 2021 01:43:39 +0100
Subject: [PATCH 03/52] fix tiling

---
 src/particles/deposition/PlasmaDepositCurrentInner.H | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/particles/deposition/PlasmaDepositCurrentInner.H b/src/particles/deposition/PlasmaDepositCurrentInner.H
index d507e5581a..d5b2ac6e31 100644
--- a/src/particles/deposition/PlasmaDepositCurrentInner.H
+++ b/src/particles/deposition/PlasmaDepositCurrentInner.H
@@ -145,7 +145,7 @@ void doDepositionShapeN (const PlasmaParticleIterator& pti,
         amrex::Array4<amrex::Real> const& jyy_arr =
             do_tiling ? tmp_densities[ithread].array(6) : jyy_fab.array();
 
-        const int ng = std::max(1, depos_order_xy);
+        const int ng = Fields::m_slices_nguards[0];
         int ntiley = 0;
         if (do_tiling) {
             const int ncellx = jx_fab.box().bigEnd(0)-jx_fab.box().smallEnd(0)+1-2*ng;

From 5b9dd1cb2a00dc3798843d9c4e8887587c042a3e Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Mon, 29 Nov 2021 20:14:24 +0100
Subject: [PATCH 04/52] change box sizes again

---
 src/fields/Fields.H                           |   6 +-
 src/fields/Fields.cpp                         | 218 ++++++++----------
 .../FFTPoissonSolverDirichlet.cpp             |   9 +-
 3 files changed, 104 insertions(+), 129 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 7d752e5cac..8e0410e17f 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -292,12 +292,10 @@ public:
 
     /** Number of guard cells for slices MultiFab */
     static amrex::IntVect m_slices_nguards;
-    static amrex::IntVect m_valid_nguards;
 
     static amrex::Vector<amrex::Box> m_box_problem;
-    static amrex::Vector<amrex::Box> m_box_source;
-    static amrex::Vector<amrex::Box> m_box_valid;
-    static amrex::Vector<amrex::Box> m_box_all;
+    static amrex::Vector<amrex::Box> m_box_extended;
+    static amrex::Vector<amrex::Box> m_box_narrow;
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
     amrex::Vector<std::array<amrex::MultiFab, m_nslices>> m_slices;
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index dc4b560a47..e31cf2d4b8 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -7,21 +7,18 @@
 #include "particles/ShapeFactors.H"
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
-amrex::IntVect Fields::m_valid_nguards = {-1, -1, -1};
 
 amrex::Vector<amrex::Box> Fields::m_box_problem{};
-amrex::Vector<amrex::Box> Fields::m_box_source{};
-amrex::Vector<amrex::Box> Fields::m_box_valid{};
-amrex::Vector<amrex::Box> Fields::m_box_all{};
+amrex::Vector<amrex::Box> Fields::m_box_extended{};
+amrex::Vector<amrex::Box> Fields::m_box_narrow{};
 
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
 {
     const int max_lev = a_hipace->maxLevel()+1;
     m_box_problem.resize(max_lev);
-    m_box_source.resize(max_lev);
-    m_box_valid.resize(max_lev);
-    m_box_all.resize(max_lev);
+    m_box_extended.resize(max_lev);
+    m_box_narrow.resize(max_lev);
     amrex::ParmParse ppf("fields");
     queryWithParser(ppf, "do_dirichlet_poisson", m_do_dirichlet_poisson);
 }
@@ -35,25 +32,20 @@ Fields::AllocData (
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(slice_ba.size() == 1,
         "Parallel field solvers not supported yet");
 
-    // Need 1 extra guard cell transversally for transverse derivative of Psi
-    int nguards_xy = Hipace::m_depos_order_xy;
-    m_valid_nguards = {nguards_xy, nguards_xy, 0};
-    m_slices_nguards = {nguards_xy + 1, nguards_xy + 1, 0};
+    // Need 1 extra guard cell transversally for transverse derivative
+    int nguards_xy = Hipace::m_depos_order_xy + 1;
+    m_slices_nguards = {nguards_xy, nguards_xy, 0};
 
-    // box where the problem is defined
+    // box where the problem is defined, contains particles
     m_box_problem[lev] = slice_ba[0];
 
-    // source terms of derivatives can be only in here
-    m_box_source[lev] = m_box_problem[lev];
-    m_box_source[lev].grow({-1, -1, 0});
-
     // contains valid fileds that can be used for particles, also the box of the Poisson solver
-    m_box_valid[lev] = m_box_problem[lev];
-    m_box_valid[lev].grow(m_valid_nguards);
+    m_box_extended[lev] = m_box_problem[lev];
+    m_box_extended[lev].grow(m_slices_nguards);
 
-    // valid box for Psi
-    m_box_all[lev] = m_box_problem[lev];
-    m_box_all[lev].grow(m_slices_nguards);
+    // contains valid sources for lev=1
+    m_box_narrow[lev] = m_box_problem[lev];
+    m_box_narrow[lev].grow(-m_slices_nguards);
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
         m_slices[lev][islice].define(
@@ -99,61 +91,57 @@ Fields::AllocData (
 }
 
 
-struct derivative_x_GPU {
+template<int dir>
+struct derivative_GPU {
     amrex::Array4<amrex::Real const> array;
     amrex::Real dx_inv;
-
-    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
-        return (array(i+1,j,k) - array(i-1,j,k)) * dx_inv;
-    }
-};
-
-struct derivative_y_GPU {
-    amrex::Array4<amrex::Real const> array;
-    amrex::Real dy_inv;
-
-    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
-        return (array(i,j+1,k) - array(i,j-1,k)) * dy_inv;
+    int box_lo;
+    int box_hi;
+
+    AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k) const noexcept {
+        constexpr bool is_x_dir = dir == Direction::x;
+        constexpr bool is_y_dir = dir == Direction::y;
+        const int ij_along_dir = is_x_dir * i + is_y_dir * j;
+        const bool lo_guard = ij_along_dir != box_lo;
+        const bool hi_guard = ij_along_dir != box_hi;
+        return (array(i+is_x_dir*hi_guard,j+is_y_dir*hi_guard,k)*hi_guard
+               -array(i-is_x_dir*lo_guard,j-is_y_dir*lo_guard,k)*lo_guard) * dx_inv;
     }
 };
 
-struct derivative_z_GPU {
+template<>
+struct derivative_GPU<Direction::z> {
     amrex::Array4<amrex::Real const> array1;
     amrex::Array4<amrex::Real const> array2;
     amrex::Real dz_inv;
 
-    AMREX_GPU_DEVICE amrex::Real operator ()(int i, int j, int k) const {
+    AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k) const noexcept {
         return (array1(i,j,k) - array2(i,j,k)) * dz_inv;
     }
 };
 
 
-
-struct derivative_x {
-    FieldView f_view;
-    amrex::Real dx;
-
-    derivative_x_GPU array (amrex::MFIter& mfi) const {
-        return derivative_x_GPU{f_view.array(mfi), 1/(2*dx)};
-    }
-};
-
-struct derivative_y {
+template<int dir>
+struct derivative {
     FieldView f_view;
-    amrex::Real dy;
+    const amrex::Geometry& geom;
+    const amrex::Box& bx;
 
-    derivative_y_GPU array (amrex::MFIter& mfi) const {
-        return derivative_y_GPU{f_view.array(mfi), 1/(2*dy)};
+    derivative_GPU<dir> array (amrex::MFIter& mfi) const {
+        return derivative_GPU<dir>{f_view.array(mfi),
+            1./(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
 };
 
-struct derivative_z {
+template<>
+struct derivative<Direction::z> {
     FieldView f_view1;
     FieldView f_view2;
-    amrex::Real dz;
+    const amrex::Geometry& geom;
 
-    derivative_z_GPU array (amrex::MFIter& mfi) const {
-        return derivative_z_GPU{f_view1.array(mfi), f_view2.array(mfi), 1/(2*dz)};
+    derivative_GPU<Direction::z> array (amrex::MFIter& mfi) const {
+        return derivative_GPU<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
+            1./(2*geom.CellSize(Direction::z))};
     }
 };
 
@@ -161,8 +149,7 @@ template<class FVA, class FVB>
 void
 FieldOperation (const amrex::Box op_box, FieldView dst,
                 const amrex::Real factor_a, const FVA src_a,
-                const amrex::Real factor_b, const FVB src_b,
-                const amrex::Box valid_box)
+                const amrex::Real factor_b, const FVB src_b)
 {
     HIPACE_PROFILE("Fields::FieldOperation()");
 
@@ -171,46 +158,16 @@ FieldOperation (const amrex::Box op_box, FieldView dst,
 #endif
     for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU());
           mfi.isValid(); ++mfi ){
-        const auto& dst_array = dst.array(mfi);
+        const auto dst_array = dst.array(mfi);
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
         const amrex::Box bx = mfi.tilebox() & op_box;
-        const int i_lo = valid_box.smallEnd(0);
-        const int i_hi = valid_box.bigEnd(0);
-        const int j_lo = valid_box.smallEnd(1);
-        const int j_hi = valid_box.bigEnd(1);
-
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
-            {
-                dst_array(i,j,k) = (factor_a*src_a_array(i,j,k) + factor_b*src_b_array(i,j,k))
-                                  *(i_lo<=i && i<=i_hi && j_lo<=j && j<=j_hi);
-            });
-    }
-}
-
-template<class FV>
-void
-UnaryFieldOperation (const amrex::Box op_box, FieldView dst,
-                     const amrex::Real factor, const FV src)
-{
-    HIPACE_PROFILE("Fields::FieldOperation()");
-
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
-#endif
-    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU());
-          mfi.isValid(); ++mfi ){
-        const auto& dst_array = dst.array(mfi);
-        const auto src_array = src.array(mfi);
-        const amrex::Box bx = mfi.tilebox() & op_box;
 
         amrex::ParallelFor(
             bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
             {
-                dst_array(i,j,k) = factor*src_array(i,j,k);
+                dst_array(i,j,k) = factor_a * src_a_array(i,j,k) + factor_b * src_b_array(i,j,k);
             });
     }
 }
@@ -462,10 +419,12 @@ Fields::AddRhoIons (const int lev, bool inverse)
     HIPACE_PROFILE("Fields::AddRhoIons()");
     if (!inverse){
         amrex::MultiFab::Add(getSlices(lev, WhichSlice::This), getSlices(lev, WhichSlice::RhoIons),
-                             Comps[WhichSlice::RhoIons]["rho"], Comps[WhichSlice::This]["rho"], 1, 0);
+                             Comps[WhichSlice::RhoIons]["rho"], Comps[WhichSlice::This]["rho"], 1,
+                             m_slices_nguards);
     } else {
         amrex::MultiFab::Subtract(getSlices(lev, WhichSlice::This), getSlices(lev, WhichSlice::RhoIons),
-                                  Comps[WhichSlice::RhoIons]["rho"], Comps[WhichSlice::This]["rho"], 1, 0);
+                                  Comps[WhichSlice::RhoIons]["rho"], Comps[WhichSlice::This]["rho"], 1,
+                                  m_slices_nguards);
     }
 }
 
@@ -763,12 +722,12 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
                         Comps[WhichSlice::This]["Psi"], 1);
 
     InterpolateFromLev0toLev1(geom, lev, "rho", islice);
+    // TODO: InterpolateFromLev0toLev1 jz
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+    FieldOperation(m_box_extended[lev], getStagingArea(lev),
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
-                   -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"),
-                   m_box_problem[lev]);
+                   -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
     /*CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Assign,
                        Comps[WhichSlice::This]["jz"], lev);
@@ -782,16 +741,35 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
 
     /* ---------- Transverse FillBoundary Psi ---------- */
 
-    InterpolateFromLev0toLev1(geom, lev, "Psi", islice);
+    //InterpolateFromLev0toLev1(geom, lev, "Psi", islice);
 
     /* Compute ExmBy and Eypbx from grad(-psi) */
-    UnaryFieldOperation(m_box_valid[lev], getField(lev, WhichSlice::This, "ExmBy"),
-                        -1., derivative_x{getField(lev, WhichSlice::This, "Psi"),
-                        geom[lev].CellSize(Direction::x)});
+    FieldView f_ExmBy = getField(lev, WhichSlice::This, "ExmBy");
+    FieldView f_EypBx = getField(lev, WhichSlice::This, "EypBx");
+    FieldView f_Psi = getField(lev, WhichSlice::This, "Psi");
+
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
+    for ( amrex::MFIter mfi(f_ExmBy.m_mfab, amrex::TilingIfNotGPU());
+          mfi.isValid(); ++mfi ){
+        const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
+        const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
+        const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
+        amrex::Box op_box = m_box_extended[lev];
+        op_box.grow({-1, -1, 0});
+        const amrex::Box bx = mfi.tilebox() & op_box;
+        const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
+        const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
 
-    UnaryFieldOperation(m_box_valid[lev], getField(lev, WhichSlice::This, "EypBx"),
-                        -1., derivative_y{getField(lev, WhichSlice::This, "Psi"),
-                        geom[lev].CellSize(Direction::y)});
+        amrex::ParallelFor(
+            bx,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            {
+                array_ExmBy(i,j,k) = - (array_Psi(i+1,j,k) - array_Psi(i-1,j,k))*dx_inv;
+                array_EypBx(i,j,k) = - (array_Psi(i,j+1,k) - array_Psi(i,j-1,k))*dy_inv;
+            });
+    }
 
     /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
@@ -827,14 +805,15 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
     // Left-Hand Side for Poisson equation is Bz in the slice MF
     amrex::MultiFab lhs(getSlices(lev, WhichSlice::This), amrex::make_alias,
                         Comps[WhichSlice::This]["Ez"], 1);
+    // TODO: InterpolateFromLev0toLev1 jx, jy
+
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+    FieldOperation(m_box_extended[lev], getStagingArea(lev),
                    1./(phys_const.ep0*phys_const.c),
-                   derivative_x{getField(lev, WhichSlice::This, "jx"), geom[lev].CellSize(Direction::x)},
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev], m_box_extended[lev]},
                    1./(phys_const.ep0*phys_const.c),
-                   derivative_y{getField(lev, WhichSlice::This, "jy"), geom[lev].CellSize(Direction::y)},
-                   m_box_source[lev]);
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
 
     /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
@@ -871,15 +850,16 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
     HIPACE_PROFILE("Fields::SolvePoissonBx()");
 
     PhysConst phys_const = get_phys_const();
+    // TODO: InterpolateFromLev0toLev1 jz, jy
+
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+    FieldOperation(m_box_extended[lev], getStagingArea(lev),
                    -phys_const.mu0,
-                   derivative_y{getField(lev, WhichSlice::This, "jz"), geom[lev].CellSize(Direction::y)},
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev], m_box_extended[lev]},
                    phys_const.mu0,
-                   derivative_z{getField(lev, WhichSlice::Previous1, "jy"),
-                   getField(lev, WhichSlice::Next, "jy"), geom[lev].CellSize(Direction::z)},
-                   m_box_source[lev]);
+                   derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jy"),
+                   getField(lev, WhichSlice::Next, "jy"), geom[lev]});
 
     /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
@@ -917,15 +897,16 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
     HIPACE_PROFILE("Fields::SolvePoissonBy()");
 
     PhysConst phys_const = get_phys_const();
+    // TODO: InterpolateFromLev0toLev1 jz, jx
+
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+    FieldOperation(m_box_extended[lev], getStagingArea(lev),
                    phys_const.mu0,
-                   derivative_x{getField(lev, WhichSlice::This, "jz"), geom[lev].CellSize(Direction::x)},
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev], m_box_extended[lev]},
                    -phys_const.mu0,
-                   derivative_z{getField(lev, WhichSlice::Previous1, "jx"),
-                   getField(lev, WhichSlice::Next, "jx"), geom[lev].CellSize(Direction::z)},
-                   m_box_source[lev]);
+                   derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jx"),
+                   getField(lev, WhichSlice::Next, "jx"), geom[lev]});
 
     /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
@@ -965,14 +946,15 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
     // Left-Hand Side for Poisson equation is Bz in the slice MF
     amrex::MultiFab lhs(getSlices(lev, WhichSlice::This), amrex::make_alias,
                         Comps[WhichSlice::This]["Bz"], 1);
+    // TODO: InterpolateFromLev0toLev1 jx, jy
+
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    FieldOperation(m_box_valid[lev], getStagingArea(lev),
+    FieldOperation(m_box_extended[lev], getStagingArea(lev),
                    phys_const.mu0,
-                   derivative_y{getField(lev, WhichSlice::This, "jx"), geom[lev].CellSize(Direction::y)},
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev], m_box_extended[lev]},
                    -phys_const.mu0,
-                   derivative_x{getField(lev, WhichSlice::This, "jy"), geom[lev].CellSize(Direction::x)},
-                   m_box_source[lev]);
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
 
     /*TransverseDerivative(
         getSlices(lev, WhichSlice::This),
diff --git a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
index 1dac56dd03..2964a6dfba 100644
--- a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
+++ b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
@@ -30,7 +30,7 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
         // are the same as real space boxes, but have one less ghoast cell
         // Define the corresponding box
         amrex::Box space_bx = a_realspace_ba[i];
-        space_bx.grow(Fields::m_valid_nguards);
+        space_bx.grow(Fields::m_slices_nguards);
         real_and_spectral_bl.push_back( space_bx );
     }
     m_spectralspace_ba.define( std::move(real_and_spectral_bl) );
@@ -130,15 +130,10 @@ FFTPoissonSolverDirichlet::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
                                          "Slice MFs must be defined on one box only");
         const amrex::FArrayBox& lhs_fab = lhs_mf[0];
         amrex::Box lhs_bx = lhs_fab.box();
-        amrex::Box fft_box = m_spectralspace_ba[mfi];
         amrex::ParallelFor( lhs_bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 // Copy field
-                if(fft_box.contains(i,j,k)) {
-                    lhs_arr(i,j,k) = tmp_real_arr(i,j,k);
-                } else {
-                    lhs_arr(i,j,k) = 0;
-                }
+                lhs_arr(i,j,k) = tmp_real_arr(i,j,k);
             });
     }
 }

From 741c2e64286cff29200d7682002fca32821ac259 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Mon, 29 Nov 2021 20:43:21 +0100
Subject: [PATCH 05/52] fix conversion and some cleaning

---
 src/fields/Fields.H   |  15 ---
 src/fields/Fields.cpp | 245 +-----------------------------------------
 2 files changed, 2 insertions(+), 258 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 8e0410e17f..c77f176fb6 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -152,21 +152,6 @@ public:
      */
     void AddBeamCurrents (const int lev, const int which_slice);
 
-    /*void CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
-                            const int scomp, const int lev);*/
-
-    /*void TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst,
-                               const int direction, const amrex::Real dx, const int lev,
-                               const amrex::Real mult_coeff=1.,
-                               const SliceOperatorType slice_operator=SliceOperatorType::Assign,
-                               const int scomp=0, const int dcomp=0, const bool use_offset=false);*/
-
-    /*void LongitudinalDerivative (const amrex::MultiFab& src, const amrex::MultiFab& src2,
-                                 amrex::MultiFab& dst, const amrex::Real dz, const int lev,
-                                 const amrex::Real mult_coeff=1.,
-                                 const SliceOperatorType slice_operator=SliceOperatorType::Assign,
-                                 const int sc1omp=0, const int s2comp=0, const int dcomp=0);*/
-
     /** \brief Interpolate values at boundaries from coarse grid to the fine grid
      *
      * \param[in] geom Geometry
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index e31cf2d4b8..e6b097c729 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -129,7 +129,7 @@ struct derivative {
 
     derivative_GPU<dir> array (amrex::MFIter& mfi) const {
         return derivative_GPU<dir>{f_view.array(mfi),
-            1./(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
+            1/(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
 };
 
@@ -141,7 +141,7 @@ struct derivative<Direction::z> {
 
     derivative_GPU<Direction::z> array (amrex::MFIter& mfi) const {
         return derivative_GPU<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
-            1./(2*geom.CellSize(Direction::z))};
+            1/(2*geom.CellSize(Direction::z))};
     }
 };
 
@@ -172,140 +172,6 @@ FieldOperation (const amrex::Box op_box, FieldView dst,
     }
 }
 
-/*
-void
-Fields::CopyToStagingArea (const amrex::MultiFab& src, const SliceOperatorType slice_operator,
-                           const int scomp, const int lev)
-{
-    HIPACE_PROFILE("Fields::CopyToStagingArea()");
-
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
-#endif
-    for ( amrex::MFIter mfi(m_poisson_solver[lev]->StagingArea(), amrex::TilingIfNotGPU());
-          mfi.isValid(); ++mfi ){
-        const amrex::Box& bx = mfi.tilebox();
-        amrex::Array4<amrex::Real const> const & src_array = src.array(mfi);
-        amrex::Array4<amrex::Real> const & dst_array = m_poisson_solver[lev]
-                                                       ->StagingArea().array(mfi);
-        amrex::Box prob_box = m_box_source[lev];
-
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
-            {
-                if (slice_operator==SliceOperatorType::Assign) {
-                    dst_array(i,j,k,0) = src_array(i,j,k,scomp) * prob_box.contains(i,j,k);
-                }
-                else
-                {
-                    dst_array(i,j,k,0) += src_array(i,j,k,scomp) * prob_box.contains(i,j,k);
-                }
-            }
-            );
-    }
-}
-
-void
-Fields::TransverseDerivative (const amrex::MultiFab& src, amrex::MultiFab& dst, const int direction,
-                              const amrex::Real dx, const int lev, const amrex::Real mult_coeff,
-                              const SliceOperatorType slice_operator, const int scomp,
-                              const int dcomp, const bool use_offset)
-{
-    HIPACE_PROFILE("Fields::TransverseDerivative()");
-    using namespace amrex::literals;
-
-    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(src.size() == 1, "Slice MFs must be defined on one box only");
-    AMREX_ALWAYS_ASSERT((direction == Direction::x) || (direction == Direction::y));
-
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
-#endif
-    for ( amrex::MFIter mfi(dst, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
-        const amrex::Box& bx = mfi.tilebox();
-        amrex::Array4<amrex::Real const> const & src_array = src.array(mfi);
-        amrex::Array4<amrex::Real> const & dst_array = dst.array(mfi);
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
-            {
-                if (direction == Direction::x){
-
-                    if (slice_operator==SliceOperatorType::Assign)
-                    {
-                        dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dx) *
-                                                 (src_array(i+1, j, k, scomp)
-                                                  - src_array(i-1, j, k, scomp));
-                    }
-                    else
-                    {
-                        dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dx) *
-                                                  (src_array(i+1, j, k, scomp)
-                                                   - src_array(i-1, j, k, scomp));
-                    }
-                } else  {
-
-                    if (slice_operator==SliceOperatorType::Assign)
-                    {
-                        dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dx) *
-                                                 (src_array(i, j+1, k, scomp)
-                                                  - src_array(i, j-1, k, scomp));
-                    }
-
-                    {
-                        dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dx) *
-                                                  (src_array(i, j+1, k, scomp)
-                                                   - src_array(i, j-1, k, scomp));
-                    }
-                }
-            }
-            );
-    }
-}
-
-void
-Fields::LongitudinalDerivative (const amrex::MultiFab& src1, const amrex::MultiFab& src2,
-                                amrex::MultiFab& dst, const amrex::Real dz, const int lev,
-                                const amrex::Real mult_coeff,
-                                const SliceOperatorType slice_operator, const int s1comp,
-                                const int s2comp, const int dcomp)
-{
-    HIPACE_PROFILE("Fields::LongitudinalDerivative()");
-    using namespace amrex::literals;
-
-    AMREX_ALWAYS_ASSERT_WITH_MESSAGE(src1.size() == 1, "Slice MFs must be defined on one box only");
-
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
-#endif
-    for ( amrex::MFIter mfi(dst, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
-        const amrex::Box& bx = mfi.tilebox();
-        amrex::Array4<amrex::Real const> const & src1_array = src1.array(mfi);
-        amrex::Array4<amrex::Real const> const & src2_array = src2.array(mfi);
-        amrex::Array4<amrex::Real> const & dst_array = dst.array(mfi);
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
-            {
-                if (slice_operator==SliceOperatorType::Assign)
-                {
-                    dst_array(i,j,k,dcomp) = mult_coeff / (2.0_rt*dz) *
-                                             (src1_array(i, j, k, s1comp)
-                                              - src2_array(i, j, k, s2comp));
-                }
-                else
-                {
-                    dst_array(i,j,k,dcomp) += mult_coeff / (2.0_rt*dz) *
-                                              (src1_array(i, j, k, s1comp)
-                                               - src2_array(i, j, k, s2comp));
-                }
-
-            }
-            );
-    }
-}
-*/
-
 void
 Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int full_comp,
               const amrex::Gpu::DeviceVector<int>& diag_comps_vect,
@@ -729,13 +595,6 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
-    /*CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Assign,
-                       Comps[WhichSlice::This]["jz"], lev);
-    m_poisson_solver[lev]->StagingArea().mult(-1./phys_const.c);
-    CopyToStagingArea(getSlices(lev,WhichSlice::This), SliceOperatorType::Add,
-                       Comps[WhichSlice::This]["rho"], lev);
-    m_poisson_solver[lev]->StagingArea().mult(-1./phys_const.ep0);*/
-
     InterpolateBoundaries(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
@@ -770,28 +629,6 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
                 array_EypBx(i,j,k) = - (array_Psi(i,j+1,k) - array_Psi(i,j-1,k))*dy_inv;
             });
     }
-
-    /*TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        getSlices(lev, WhichSlice::This),
-        Direction::x,
-        geom[lev].CellSize(Direction::x),
-        lev,
-        -1.,
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["Psi"],
-        Comps[WhichSlice::This]["ExmBy"]);
-
-    TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        getSlices(lev, WhichSlice::This),
-        Direction::y,
-        geom[lev].CellSize(Direction::y),
-        lev,
-        -1.,
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["Psi"],
-        Comps[WhichSlice::This]["EypBx"]);*/
 }
 
 
@@ -815,25 +652,6 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
                    1./(phys_const.ep0*phys_const.c),
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
 
-    /*TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::x,
-        geom[lev].CellSize(Direction::x),
-        lev,
-        1./(phys_const.ep0*phys_const.c),
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["jx"], 0, 1);
-
-    TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::y,
-        geom[lev].CellSize(Direction::y),
-        lev,
-        1./(phys_const.ep0*phys_const.c),
-        SliceOperatorType::Add,
-        Comps[WhichSlice::This]["jy"], 0, 1);*/
 
     InterpolateBoundaries(geom, lev, "Ez", islice);
     // Solve Poisson equation.
@@ -861,26 +679,6 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jy"),
                    getField(lev, WhichSlice::Next, "jy"), geom[lev]});
 
-    /*TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::y,
-        geom[lev].CellSize(Direction::y),
-        lev,
-        -phys_const.mu0,
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["jz"], 0, 1);
-
-    LongitudinalDerivative(
-        getSlices(lev, WhichSlice::Previous1),
-        getSlices(lev, WhichSlice::Next),
-        m_poisson_solver[lev]->StagingArea(),
-        geom[lev].CellSize(Direction::z),
-        lev,
-        phys_const.mu0,
-        SliceOperatorType::Add,
-        Comps[WhichSlice::Previous1]["jy"],
-        Comps[WhichSlice::Next]["jy"]);*/
 
     InterpolateBoundaries(geom, lev, "Bx", islice);
     // Solve Poisson equation.
@@ -908,26 +706,6 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jx"),
                    getField(lev, WhichSlice::Next, "jx"), geom[lev]});
 
-    /*TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::x,
-        geom[lev].CellSize(Direction::x),
-        lev,
-        phys_const.mu0,
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["jz"], 0, 1);
-
-    LongitudinalDerivative(
-        getSlices(lev, WhichSlice::Previous1),
-        getSlices(lev, WhichSlice::Next),
-        m_poisson_solver[lev]->StagingArea(),
-        geom[lev].CellSize(Direction::z),
-        lev,
-        -phys_const.mu0,
-        SliceOperatorType::Add,
-        Comps[WhichSlice::Previous1]["jx"],
-        Comps[WhichSlice::Next]["jx"]);*/
 
     InterpolateBoundaries(geom, lev, "By", islice);
     // Solve Poisson equation.
@@ -956,25 +734,6 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
                    -phys_const.mu0,
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
 
-    /*TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::y,
-        geom[lev].CellSize(Direction::y),
-        lev,
-        phys_const.mu0,
-        SliceOperatorType::Assign,
-        Comps[WhichSlice::This]["jx"], 0, 1);
-
-    TransverseDerivative(
-        getSlices(lev, WhichSlice::This),
-        m_poisson_solver[lev]->StagingArea(),
-        Direction::x,
-        geom[lev].CellSize(Direction::x),
-        lev,
-        -phys_const.mu0,
-        SliceOperatorType::Add,
-        Comps[WhichSlice::This]["jy"], 0, 1);*/
 
     InterpolateBoundaries(geom, lev, "Bz", islice);
     // Solve Poisson equation.

From 6a62d504d0c0d98841a8a78ac664539502c34d75 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 30 Nov 2021 00:14:38 +0100
Subject: [PATCH 06/52] test tests

---
 src/Hipace.cpp                                | 38 +++++++++----------
 src/fields/Fields.H                           |  3 +-
 src/fields/Fields.cpp                         | 12 +++---
 src/particles/PlasmaParticleContainer.cpp     |  3 +-
 .../deposition/BeamDepositCurrent.cpp         |  2 +-
 .../deposition/PlasmaDepositCurrent.cpp       |  2 +-
 src/particles/pusher/BeamParticleAdvance.cpp  |  2 +-
 .../pusher/PlasmaParticleAdvance.cpp          |  3 +-
 8 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 1eec0672fa..07aa976938 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -504,10 +504,10 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                 const int iby = Comps[WhichSlice::This]["By"];
                 const int nc = Comps[WhichSlice::This]["N"];
                 AMREX_ALWAYS_ASSERT( iby == ibx+1 );
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx);
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx, m_fields.m_slices_nguards);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1, m_fields.m_slices_nguards);
             } else {
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0.);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., m_fields.m_slices_nguards);
             }
 
             if (!m_explicit) {
@@ -553,7 +553,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                     amrex::make_alias, Comps[WhichSlice::This]["jx"], 7);
             j_slice.FillBoundary(Geom(lev).periodicity());
 
-            m_fields.SolvePoissonExmByAndEypBx(Geom(), m_comm_xy, lev, islice);
+            m_fields.SolvePoissonExmByAndEypBx(Geom(), lev, islice);
 
             m_grid_current.DepositCurrentSlice(m_fields, geom[lev], lev, islice);
             m_multi_beam.DepositCurrentSlice(m_fields, geom, lev, islice_local, bx, bins[lev],
@@ -603,7 +603,7 @@ Hipace::ResetAllQuantities ()
     for (int lev = 0; lev <= finestLevel(); ++lev) {
         m_multi_plasma.ResetParticles(lev, true);
         for (int islice=0; islice<WhichSlice::N; islice++) {
-            m_fields.getSlices(lev, islice).setVal(0.);
+            m_fields.getSlices(lev, islice).setVal(0., m_fields.m_slices_nguards);
         }
     }
 }
@@ -850,10 +850,10 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
 
     /* Guess Bx and By */
     m_fields.InitialBfieldGuess(relative_Bfield_error, m_predcorr_B_error_tolerance, lev);
-    amrex::ParallelContext::push(m_comm_xy);
+    //amrex::ParallelContext::push(m_comm_xy);
      // exchange ExmBy EypBx Ez Bx By Bz
-    m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-    amrex::ParallelContext::pop();
+    //m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+    //amrex::ParallelContext::pop();
 
     /* creating temporary Bx and By arrays for the current and previous iteration */
     amrex::MultiFab Bx_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
@@ -862,18 +862,18 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
     amrex::MultiFab By_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
                             m_fields.getSlices(lev, WhichSlice::This).DistributionMap(), 1,
                             m_fields.getSlices(lev, WhichSlice::This).nGrowVect());
-    Bx_iter.setVal(0.0);
-    By_iter.setVal(0.0);
+    Bx_iter.setVal(0.0, m_fields.m_slices_nguards);
+    By_iter.setVal(0.0, m_fields.m_slices_nguards);
     amrex::MultiFab Bx_prev_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
                                  m_fields.getSlices(lev, WhichSlice::This).DistributionMap(), 1,
                                  m_fields.getSlices(lev, WhichSlice::This).nGrowVect());
     amrex::MultiFab::Copy(Bx_prev_iter, m_fields.getSlices(lev, WhichSlice::This),
-                          Comps[WhichSlice::This]["Bx"], 0, 1, 0);
+                          Comps[WhichSlice::This]["Bx"], 0, 1, m_fields.m_slices_nguards);
     amrex::MultiFab By_prev_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
                                  m_fields.getSlices(lev, WhichSlice::This).DistributionMap(), 1,
                                  m_fields.getSlices(lev, WhichSlice::This).nGrowVect());
     amrex::MultiFab::Copy(By_prev_iter, m_fields.getSlices(lev, WhichSlice::This),
-                          Comps[WhichSlice::This]["By"], 0, 1, 0);
+                          Comps[WhichSlice::This]["By"], 0, 1, m_fields.m_slices_nguards);
 
     /* creating aliases to the current in the next slice.
      * This needs to be reset after each push to the next slice */
@@ -943,15 +943,15 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
             relative_Bfield_error_prev_iter, m_predcorr_B_mixing_factor, lev);
 
         /* resetting current in the next slice to clean temporarily used current*/
-        jx_next.setVal(0.);
-        jy_next.setVal(0.);
-        jx_beam_next.setVal(0.);
-        jy_beam_next.setVal(0.);
+        jx_next.setVal(0., m_fields.m_slices_nguards);
+        jy_next.setVal(0., m_fields.m_slices_nguards);
+        jx_beam_next.setVal(0., m_fields.m_slices_nguards);
+        jy_beam_next.setVal(0., m_fields.m_slices_nguards);
 
-        amrex::ParallelContext::push(m_comm_xy);
+        //amrex::ParallelContext::push(m_comm_xy);
          // exchange Bx By
-        m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-        amrex::ParallelContext::pop();
+        //m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+        //amrex::ParallelContext::pop();
 
         /* Update force terms using the calculated Bx and By */
         m_multi_plasma.AdvanceParticles(m_fields, geom[lev], false, false, true, false, lev);
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index c77f176fb6..17895e4cba 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -185,12 +185,11 @@ public:
      * ExmBy and EypBx are solved in the same function because both rely on Psi.
      *
      * \param[in] geom Geometry
-     * \param[in] m_comm_xy transverse communicator on the slice
      * \param[in] lev current level
      * \param[in] islice longitudinal slice
      */
     void SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
-                                    const MPI_Comm& m_comm_xy, const int lev, const int islice);
+                                    const int lev, const int islice);
     /** \brief Compute Ez on the slice container from J by solving a Poisson equation
      *
      * \param[in] geom Geometry
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index e6b097c729..b743e47b68 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -574,7 +574,7 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
 
 void
 Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
-                                   const MPI_Comm& m_comm_xy, const int lev, const int islice)
+                                   const int lev, const int islice)
 {
     /* Solves Laplacian(Psi) =  1/episilon0 * -(rho-Jz/c) and
      * calculates Ex-c By, Ey + c Bx from  grad(-Psi)
@@ -757,13 +757,13 @@ Fields::InitialBfieldGuess (const amrex::Real relative_Bfield_error,
         getSlices(lev, WhichSlice::This),
         1+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["Bx"],
         -mix_factor_init_guess, getSlices(lev, WhichSlice::Previous2), Comps[WhichSlice::Previous2]["Bx"],
-        Comps[WhichSlice::This]["Bx"], 1, 0);
+        Comps[WhichSlice::This]["Bx"], 1, m_slices_nguards);
 
     amrex::MultiFab::LinComb(
         getSlices(lev, WhichSlice::This),
         1+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["By"],
         -mix_factor_init_guess, getSlices(lev, WhichSlice::Previous2), Comps[WhichSlice::Previous2]["By"],
-        Comps[WhichSlice::This]["By"], 1, 0);
+        Comps[WhichSlice::This]["By"], 1, m_slices_nguards);
 }
 
 void
@@ -802,17 +802,17 @@ Fields::MixAndShiftBfields (const amrex::MultiFab& B_iter, amrex::MultiFab& B_pr
         B_prev_iter,
         weight_B_iter, B_iter, 0,
         weight_B_prev_iter, B_prev_iter, 0,
-        0, 1, 0);
+        0, 1, m_slices_nguards);
 
     /* calculating the mixed B field  B = a*B + (1-a)*B_prev_iter */
     amrex::MultiFab::LinComb(
         getSlices(lev, WhichSlice::This),
         1-predcorr_B_mixing_factor, getSlices(lev, WhichSlice::This), field_comp,
         predcorr_B_mixing_factor, B_prev_iter, 0,
-        field_comp, 1, 0);
+        field_comp, 1, m_slices_nguards);
 
     /* Shifting the B field from the current iteration to the previous iteration */
-    amrex::MultiFab::Copy(B_prev_iter, B_iter, 0, 0, 1, 0);
+    amrex::MultiFab::Copy(B_prev_iter, B_iter, 0, 0, 1, m_slices_nguards);
 
 }
 
diff --git a/src/particles/PlasmaParticleContainer.cpp b/src/particles/PlasmaParticleContainer.cpp
index fd8c2cb00c..8c5e145744 100644
--- a/src/particles/PlasmaParticleContainer.cpp
+++ b/src/particles/PlasmaParticleContainer.cpp
@@ -164,8 +164,7 @@ IonizationModule (const int lev,
     {
         // Extract properties associated with the extent of the current box
         // Grow to capture the extent of the particle shape
-        amrex::Box tilebox = mfi_ion.tilebox().grow(
-            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+        amrex::Box tilebox = mfi_ion.tilebox().grow(Fields::m_slices_nguards);
 
         amrex::RealBox const grid_box{tilebox, geom.CellSize(), geom.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/deposition/BeamDepositCurrent.cpp b/src/particles/deposition/BeamDepositCurrent.cpp
index ae0949e0d5..9c939bd91f 100644
--- a/src/particles/deposition/BeamDepositCurrent.cpp
+++ b/src/particles/deposition/BeamDepositCurrent.cpp
@@ -31,7 +31,7 @@ DepositCurrentSlice (BeamParticleContainer& beam, Fields& fields,
 
     // Extract properties associated with the extent of the current box
     amrex::Box tilebox = bx;
-    tilebox.grow({Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, Hipace::m_depos_order_z});
+    tilebox.grow(Fields::m_slices_nguards + amrex::IntVect{0, 0, Hipace::m_depos_order_z});
 
     amrex::RealBox const grid_box{tilebox, gm[lev].CellSize(), gm[lev].ProbLo()};
     amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/deposition/PlasmaDepositCurrent.cpp b/src/particles/deposition/PlasmaDepositCurrent.cpp
index b10a8c8df0..15d3db0ee1 100644
--- a/src/particles/deposition/PlasmaDepositCurrent.cpp
+++ b/src/particles/deposition/PlasmaDepositCurrent.cpp
@@ -37,7 +37,7 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields,
     {
         // Extract properties associated with the extent of the current box
         amrex::Box tilebox = pti.tilebox().grow(
-            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+            Fields::m_slices_nguards);
 
         amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/pusher/BeamParticleAdvance.cpp b/src/particles/pusher/BeamParticleAdvance.cpp
index a565f14aa3..a3832976ce 100644
--- a/src/particles/pusher/BeamParticleAdvance.cpp
+++ b/src/particles/pusher/BeamParticleAdvance.cpp
@@ -28,7 +28,7 @@ AdvanceBeamParticlesSlice (BeamParticleContainer& beam, Fields& fields, amrex::G
     // Extract properties associated with the extent of the current box
     const int depos_order_xy = Hipace::m_depos_order_xy;
     amrex::Box tilebox = box;
-    tilebox.grow({depos_order_xy, depos_order_xy, Hipace::m_depos_order_z});
+    tilebox.grow(Fields::m_slices_nguards + amrex::IntVect{0, 0, Hipace::m_depos_order_z});
 
     amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
     amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/pusher/PlasmaParticleAdvance.cpp b/src/particles/pusher/PlasmaParticleAdvance.cpp
index 25cecdfba9..12e50e29e3 100644
--- a/src/particles/pusher/PlasmaParticleAdvance.cpp
+++ b/src/particles/pusher/PlasmaParticleAdvance.cpp
@@ -34,8 +34,7 @@ AdvancePlasmaParticles (PlasmaParticleContainer& plasma, Fields & fields,
     {
         // Extract properties associated with the extent of the current box
         // Grow to capture the extent of the particle shape
-        amrex::Box tilebox = pti.tilebox().grow(
-            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
+        amrex::Box tilebox = pti.tilebox().grow(Fields::m_slices_nguards);
 
         amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();

From 755252ed7a57577d3cde089b4bb07e7a25efcc19 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sun, 5 Dec 2021 19:11:03 +0100
Subject: [PATCH 07/52] more fixes and decrease B mixing factor

---
 examples/blowout_wake/inputs_SI               |  2 +-
 examples/blowout_wake/inputs_ionization_SI    |  3 +-
 examples/blowout_wake/inputs_normalized       |  2 +-
 src/Hipace.cpp                                | 11 ++--
 src/diagnostics/Diagnostic.H                  |  2 +
 src/diagnostics/Diagnostic.cpp                |  6 ++
 src/fields/Fields.cpp                         | 58 +++++++++++--------
 .../FFTPoissonSolverDirichlet.cpp             | 31 +++-------
 .../deposition/BeamDepositCurrent.cpp         |  2 +-
 .../deposition/PlasmaDepositCurrent.cpp       |  2 +-
 src/particles/pusher/BeamParticleAdvance.cpp  |  2 +-
 .../pusher/PlasmaParticleAdvance.cpp          |  3 +-
 12 files changed, 65 insertions(+), 59 deletions(-)

diff --git a/examples/blowout_wake/inputs_SI b/examples/blowout_wake/inputs_SI
index fb69a21500..14a35f0142 100644
--- a/examples/blowout_wake/inputs_SI
+++ b/examples/blowout_wake/inputs_SI
@@ -1,7 +1,7 @@
 amr.n_cell = 64 64 100
 
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.12
+hipace.predcorr_B_mixing_factor = 0.10
 hipace.predcorr_B_error_tolerance = -1
 
 my_constants.kp_inv = 10.e-6
diff --git a/examples/blowout_wake/inputs_ionization_SI b/examples/blowout_wake/inputs_ionization_SI
index 5611476255..f079ede241 100644
--- a/examples/blowout_wake/inputs_ionization_SI
+++ b/examples/blowout_wake/inputs_ionization_SI
@@ -1,8 +1,7 @@
-
 amr.n_cell = 64 64 100
 
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.12
+hipace.predcorr_B_mixing_factor = 0.10
 hipace.predcorr_B_error_tolerance = -1
 
 my_constants.ne = 1.25e24
diff --git a/examples/blowout_wake/inputs_normalized b/examples/blowout_wake/inputs_normalized
index c973902a3e..36ac28c2e1 100644
--- a/examples/blowout_wake/inputs_normalized
+++ b/examples/blowout_wake/inputs_normalized
@@ -2,7 +2,7 @@ amr.n_cell = 64 64 100
 
 hipace.normalized_units=1
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.12
+hipace.predcorr_B_mixing_factor = 0.10
 hipace.predcorr_B_error_tolerance = -1
 
 amr.blocking_factor = 2
diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 07aa976938..5fc9828824 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -531,7 +531,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                                  WhichSlice::Next);
                 m_fields.AddBeamCurrents(lev, WhichSlice::Next);
                 // need to exchange jx jy jx_beam jy_beam
-                j_slice_next.FillBoundary(Geom(lev).periodicity());
+                //j_slice_next.FillBoundary(Geom(lev).periodicity());
             }
 
             m_fields.AddRhoIons(lev);
@@ -551,7 +551,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                  ijz == ijx+4 && ijz_beam == ijx+5 && irho == ijx+6 );
             amrex::MultiFab j_slice(m_fields.getSlices(lev, WhichSlice::This),
                                     amrex::make_alias, Comps[WhichSlice::This]["jx"], 7);
-            j_slice.FillBoundary(Geom(lev).periodicity());
+            //j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonExmByAndEypBx(Geom(), lev, islice);
 
@@ -561,7 +561,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                              WhichSlice::This);
             m_fields.AddBeamCurrents(lev, WhichSlice::This);
 
-            j_slice.FillBoundary(Geom(lev).periodicity());
+            //j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonEz(Geom(), lev, islice);
             m_fields.SolvePoissonBz(Geom(), lev, islice);
@@ -918,7 +918,7 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
         // need to exchange jx jy jx_beam jy_beam
         amrex::MultiFab j_slice_next(m_fields.getSlices(lev, WhichSlice::Next),
                                      amrex::make_alias, Comps[WhichSlice::Next]["jx"], 4);
-        j_slice_next.FillBoundary(Geom(lev).periodicity());
+        //j_slice_next.FillBoundary(Geom(lev).periodicity());
         amrex::ParallelContext::pop();
 
         /* Calculate Bx and By */
@@ -1301,6 +1301,9 @@ Hipace::ResizeFDiagFAB (const int it)
 {
     for (int lev = 0; lev <= finestLevel(); ++lev) {
         amrex::Box bx = boxArray(lev)[it];
+        if(Diagnostic::m_include_ghost_cells) {
+            bx.grow(Fields::m_slices_nguards);
+        }
 
         if (lev == 1) {
             const amrex::Box& bx_lev0 = boxArray(0)[it];
diff --git a/src/diagnostics/Diagnostic.H b/src/diagnostics/Diagnostic.H
index ed32856e01..561113443e 100644
--- a/src/diagnostics/Diagnostic.H
+++ b/src/diagnostics/Diagnostic.H
@@ -67,6 +67,8 @@ public:
      */
     void ResizeFDiagFAB (const amrex::Box box, const int lev);
 
+    static bool m_include_ghost_cells; /**< Include ghost cells in diagnostic output */
+
 private:
 
     /** Vector over levels, all fields */
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index 9d4348df11..bb70bbb5ae 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -2,6 +2,8 @@
 #include "Hipace.H"
 #include <AMReX_ParmParse.H>
 
+bool Diagnostic::m_include_ghost_cells = false;
+
 Diagnostic::Diagnostic (int nlev)
     : m_F(nlev),
       m_diag_coarsen(nlev),
@@ -33,6 +35,10 @@ Diagnostic::Diagnostic (int nlev)
         m_diag_coarsen[ilev] = amrex::IntVect(diag_coarsen_arr);
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE( m_diag_coarsen[ilev].min() >= 1,
             "Coarsening ratio must be >= 1");
+
+        if(diag_coarsen_arr == amrex::Array<int,3>{1,1,1}) {
+           queryWithParser(ppd, "include_ghost_cells", m_include_ghost_cells);
+        }
     }
 
     queryWithParser(ppd, "field_data", m_comps_output);
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index b743e47b68..165d5766fc 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -125,9 +125,9 @@ template<int dir>
 struct derivative {
     FieldView f_view;
     const amrex::Geometry& geom;
-    const amrex::Box& bx;
 
     derivative_GPU<dir> array (amrex::MFIter& mfi) const {
+        amrex::Box bx = f_view.m_mfab[mfi].box();
         return derivative_GPU<dir>{f_view.array(mfi),
             1/(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
@@ -147,7 +147,7 @@ struct derivative<Direction::z> {
 
 template<class FVA, class FVB>
 void
-FieldOperation (const amrex::Box op_box, FieldView dst,
+FieldOperation (const amrex::IntVect box_grow, FieldView dst,
                 const amrex::Real factor_a, const FVA src_a,
                 const amrex::Real factor_b, const FVB src_b)
 {
@@ -161,8 +161,7 @@ FieldOperation (const amrex::Box op_box, FieldView dst,
         const auto dst_array = dst.array(mfi);
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
-        const amrex::Box bx = mfi.tilebox() & op_box;
-
+        const amrex::Box bx = mfi.growntilebox(box_grow);
         amrex::ParallelFor(
             bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
@@ -185,14 +184,20 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
     for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
         auto& slice_fab = slice_mf[mfi];
         amrex::Box slice_box = slice_fab.box();
-        slice_box.setSmall(Direction::z, i_slice);
-        slice_box.setBig  (Direction::z, i_slice);
+        slice_box -= amrex::IntVect(slice_box.smallEnd());
+        if (!Diagnostic::m_include_ghost_cells) {
+            slice_box -= m_slices_nguards;
+        }
         slice_array = amrex::makeArray4(slice_fab.dataPtr(), slice_box, slice_fab.nComp());
-        // slice_array's longitude index is i_slice.
+        // slice_array's longitude index is 0.
     }
 
     const int full_array_z = i_slice / diag_coarsen[2];
-    const amrex::IntVect ncells_global = geom.Domain().length();
+    amrex::Box domain = geom.Domain();
+    if (Diagnostic::m_include_ghost_cells) {
+        domain.grow(m_slices_nguards);
+    }
+    const amrex::IntVect ncells_global = domain.length();
 
     amrex::Box const& vbx = fab.box();
     if (vbx.smallEnd(Direction::z) <= full_array_z and
@@ -216,12 +221,17 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
         const int ncells_x = ncells_global[0];
         const int ncells_y = ncells_global[1];
 
+        const int cpyboxlo_x = copy_box.smallEnd(0);
+        const int cpyboxlo_y = copy_box.smallEnd(1);
+
         const int *diag_comps = diag_comps_vect.data();
 
         amrex::ParallelFor(copy_box, ncomp,
-        [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+        [=] AMREX_GPU_DEVICE (int i_l, int j_l, int k, int n) noexcept
         {
             const int m = n[diag_comps];
+            const int i = i_l - cpyboxlo_x;
+            const int j = j_l - cpyboxlo_y;
 
             // coarsening in slice direction is always 1
             const int i_c_start = amrex::min(i*coarse_x +(coarse_x-1)/2 -even_slice_x, ncells_x-1);
@@ -234,12 +244,12 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
 
             for (int j_c = j_c_start; j_c != j_c_stop; ++j_c) {
                 for (int i_c = i_c_start; i_c != i_c_stop; ++i_c) {
-                    field_value += slice_array(i_c, j_c, i_slice, m+slice_comp);
+                    field_value += slice_array(i_c, j_c, 0, m+slice_comp);
                     ++n_values;
                 }
             }
 
-            full_array(i,j,k,n+full_comp) = field_value / amrex::max(n_values,1);
+            full_array(i_l,j_l,k,n+full_comp) = field_value / amrex::max(n_values,1);
         });
     }
 }
@@ -591,7 +601,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     // TODO: InterpolateFromLev0toLev1 jz
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    FieldOperation(m_box_extended[lev], getStagingArea(lev),
+    FieldOperation(m_slices_nguards, getStagingArea(lev),
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
@@ -615,9 +625,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
-        amrex::Box op_box = m_box_extended[lev];
-        op_box.grow({-1, -1, 0});
-        const amrex::Box bx = mfi.tilebox() & op_box;
+        const amrex::Box bx = mfi.growntilebox(m_slices_nguards - amrex::IntVect{1, 1, 0});
         const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
 
@@ -646,11 +654,11 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    FieldOperation(m_box_extended[lev], getStagingArea(lev),
+    FieldOperation(m_slices_nguards, getStagingArea(lev),
                    1./(phys_const.ep0*phys_const.c),
-                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev], m_box_extended[lev]},
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    1./(phys_const.ep0*phys_const.c),
-                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
 
     InterpolateBoundaries(geom, lev, "Ez", islice);
@@ -672,9 +680,9 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_box_extended[lev], getStagingArea(lev),
+    FieldOperation(m_slices_nguards, getStagingArea(lev),
                    -phys_const.mu0,
-                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev], m_box_extended[lev]},
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    phys_const.mu0,
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jy"),
                    getField(lev, WhichSlice::Next, "jy"), geom[lev]});
@@ -699,9 +707,9 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_box_extended[lev], getStagingArea(lev),
+    FieldOperation(m_slices_nguards, getStagingArea(lev),
                    phys_const.mu0,
-                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev], m_box_extended[lev]},
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    -phys_const.mu0,
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jx"),
                    getField(lev, WhichSlice::Next, "jx"), geom[lev]});
@@ -728,11 +736,11 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    FieldOperation(m_box_extended[lev], getStagingArea(lev),
+    FieldOperation(m_slices_nguards, getStagingArea(lev),
                    phys_const.mu0,
-                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev], m_box_extended[lev]},
+                   derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    -phys_const.mu0,
-                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev], m_box_extended[lev]});
+                   derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
 
     InterpolateBoundaries(geom, lev, "Bz", islice);
diff --git a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
index 2964a6dfba..98d17f4e7f 100644
--- a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
+++ b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
@@ -22,28 +22,17 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
     // If we are going to support parallel FFT, the constructor needs to take a communicator.
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(a_realspace_ba.size() == 1, "Parallel FFT not supported yet");
 
-    // Create the box array that corresponds to spectral space
-    amrex::BoxList real_and_spectral_bl; // Create empty box list
-    // Loop over boxes and fill the box list
-    for (int i=0; i < a_realspace_ba.size(); i++ ) {
-        // For local FFTs, boxes in spectral space
-        // are the same as real space boxes, but have one less ghoast cell
-        // Define the corresponding box
-        amrex::Box space_bx = a_realspace_ba[i];
-        space_bx.grow(Fields::m_slices_nguards);
-        real_and_spectral_bl.push_back( space_bx );
-    }
-    m_spectralspace_ba.define( std::move(real_and_spectral_bl) );
+    m_spectralspace_ba = a_realspace_ba;
 
     // Allocate temporary arrays - in real space and spectral space
     // These arrays will store the data just before/after the FFT
     // The stagingArea is also created from 0 to nx, because the real space array may have
     // an offset for levels > 0
-    m_stagingArea = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
-    m_tmpSpectralField = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
-    m_eigenvalue_matrix = amrex::MultiFab(m_spectralspace_ba, dm, 1, 0);
-    m_stagingArea.setVal(0.0); // this is not required
-    m_tmpSpectralField.setVal(0.0);
+    m_stagingArea = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
+    m_tmpSpectralField = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
+    m_eigenvalue_matrix = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
+    m_stagingArea.setVal(0.0, Fields::m_slices_nguards); // this is not required
+    m_tmpSpectralField.setVal(0.0, Fields::m_slices_nguards);
 
     // This must be true even for parallel FFT.
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_stagingArea.local_size() == 1,
@@ -51,7 +40,7 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_tmpSpectralField.local_size() == 1,
                                      "There should be only one box locally.");
 
-    const amrex::Box fft_box = m_spectralspace_ba[0];
+    const amrex::Box fft_box = m_stagingArea[0].box();
     const auto dx = gm.CellSizeArray();
     const amrex::Real dxsquared = dx[0]*dx[0];
     const amrex::Real dysquared = dx[1]*dx[1];
@@ -115,7 +104,7 @@ FFTPoissonSolverDirichlet::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
         amrex::Array4<amrex::Real> tmp_cmplx_arr = m_tmpSpectralField.array(mfi);
         amrex::Array4<amrex::Real> eigenvalue_matrix = m_eigenvalue_matrix.array(mfi);
 
-        amrex::ParallelFor( m_spectralspace_ba[mfi],
+        amrex::ParallelFor( m_tmpSpectralField[mfi].box(),
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 tmp_cmplx_arr(i,j,k) *= eigenvalue_matrix(i,j,k);
             });
@@ -128,9 +117,7 @@ FFTPoissonSolverDirichlet::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
         amrex::Array4<amrex::Real> lhs_arr = lhs_mf.array(mfi);
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE(lhs_mf.size() == 1,
                                          "Slice MFs must be defined on one box only");
-        const amrex::FArrayBox& lhs_fab = lhs_mf[0];
-        amrex::Box lhs_bx = lhs_fab.box();
-        amrex::ParallelFor( lhs_bx,
+        amrex::ParallelFor( lhs_mf[mfi].box(),
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 // Copy field
                 lhs_arr(i,j,k) = tmp_real_arr(i,j,k);
diff --git a/src/particles/deposition/BeamDepositCurrent.cpp b/src/particles/deposition/BeamDepositCurrent.cpp
index 9c939bd91f..ae0949e0d5 100644
--- a/src/particles/deposition/BeamDepositCurrent.cpp
+++ b/src/particles/deposition/BeamDepositCurrent.cpp
@@ -31,7 +31,7 @@ DepositCurrentSlice (BeamParticleContainer& beam, Fields& fields,
 
     // Extract properties associated with the extent of the current box
     amrex::Box tilebox = bx;
-    tilebox.grow(Fields::m_slices_nguards + amrex::IntVect{0, 0, Hipace::m_depos_order_z});
+    tilebox.grow({Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, Hipace::m_depos_order_z});
 
     amrex::RealBox const grid_box{tilebox, gm[lev].CellSize(), gm[lev].ProbLo()};
     amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/deposition/PlasmaDepositCurrent.cpp b/src/particles/deposition/PlasmaDepositCurrent.cpp
index 15d3db0ee1..b10a8c8df0 100644
--- a/src/particles/deposition/PlasmaDepositCurrent.cpp
+++ b/src/particles/deposition/PlasmaDepositCurrent.cpp
@@ -37,7 +37,7 @@ DepositCurrent (PlasmaParticleContainer& plasma, Fields & fields,
     {
         // Extract properties associated with the extent of the current box
         amrex::Box tilebox = pti.tilebox().grow(
-            Fields::m_slices_nguards);
+            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
 
         amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/pusher/BeamParticleAdvance.cpp b/src/particles/pusher/BeamParticleAdvance.cpp
index a3832976ce..a565f14aa3 100644
--- a/src/particles/pusher/BeamParticleAdvance.cpp
+++ b/src/particles/pusher/BeamParticleAdvance.cpp
@@ -28,7 +28,7 @@ AdvanceBeamParticlesSlice (BeamParticleContainer& beam, Fields& fields, amrex::G
     // Extract properties associated with the extent of the current box
     const int depos_order_xy = Hipace::m_depos_order_xy;
     amrex::Box tilebox = box;
-    tilebox.grow(Fields::m_slices_nguards + amrex::IntVect{0, 0, Hipace::m_depos_order_z});
+    tilebox.grow({depos_order_xy, depos_order_xy, Hipace::m_depos_order_z});
 
     amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
     amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();
diff --git a/src/particles/pusher/PlasmaParticleAdvance.cpp b/src/particles/pusher/PlasmaParticleAdvance.cpp
index 12e50e29e3..25cecdfba9 100644
--- a/src/particles/pusher/PlasmaParticleAdvance.cpp
+++ b/src/particles/pusher/PlasmaParticleAdvance.cpp
@@ -34,7 +34,8 @@ AdvancePlasmaParticles (PlasmaParticleContainer& plasma, Fields & fields,
     {
         // Extract properties associated with the extent of the current box
         // Grow to capture the extent of the particle shape
-        amrex::Box tilebox = pti.tilebox().grow(Fields::m_slices_nguards);
+        amrex::Box tilebox = pti.tilebox().grow(
+            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
 
         amrex::RealBox const grid_box{tilebox, gm.CellSize(), gm.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();

From 7997b272b68cc3fb2c1321c20115a2779dbd8a5a Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sun, 5 Dec 2021 22:50:29 +0100
Subject: [PATCH 08/52] fix output slicing

---
 src/fields/Fields.cpp                     | 4 ++--
 src/particles/PlasmaParticleContainer.cpp | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 165d5766fc..3296376be0 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -221,8 +221,8 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
         const int ncells_x = ncells_global[0];
         const int ncells_y = ncells_global[1];
 
-        const int cpyboxlo_x = copy_box.smallEnd(0);
-        const int cpyboxlo_y = copy_box.smallEnd(1);
+        const int cpyboxlo_x = Diagnostic::m_include_ghost_cells ? -m_slices_nguards[0] : 0;
+        const int cpyboxlo_y = Diagnostic::m_include_ghost_cells ? -m_slices_nguards[1] : 0;
 
         const int *diag_comps = diag_comps_vect.data();
 
diff --git a/src/particles/PlasmaParticleContainer.cpp b/src/particles/PlasmaParticleContainer.cpp
index 8c5e145744..fd8c2cb00c 100644
--- a/src/particles/PlasmaParticleContainer.cpp
+++ b/src/particles/PlasmaParticleContainer.cpp
@@ -164,7 +164,8 @@ IonizationModule (const int lev,
     {
         // Extract properties associated with the extent of the current box
         // Grow to capture the extent of the particle shape
-        amrex::Box tilebox = mfi_ion.tilebox().grow(Fields::m_slices_nguards);
+        amrex::Box tilebox = mfi_ion.tilebox().grow(
+            {Hipace::m_depos_order_xy, Hipace::m_depos_order_xy, 0});
 
         amrex::RealBox const grid_box{tilebox, geom.CellSize(), geom.ProbLo()};
         amrex::Real const * AMREX_RESTRICT xyzmin = grid_box.lo();

From 8466220750a7723d4ac784c4978eb0bff7484190 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 9 Dec 2021 03:43:48 +0100
Subject: [PATCH 09/52] fix? mesh refinement

---
 src/fields/Fields.H   |  16 +-
 src/fields/Fields.cpp | 437 +++++++++++++++++-------------------------
 2 files changed, 173 insertions(+), 280 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 17895e4cba..18725e579e 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -159,8 +159,8 @@ public:
      * \param[in] component which can be Psi, Ez, By, Bx ...
      * \param[in] islice longitudinal slice
      */
-    void InterpolateBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
-                                std::string component, const int islice);
+    void SetRefinedBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
+                               std::string component, const int islice);
 
     /** \brief Interpolate values from coarse grid to the fine grid
      *
@@ -172,14 +172,6 @@ public:
     void InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                                     std::string component, const int islice);
 
-    /** \brief Interpolate values from coarse grid to the fine grid
-     *
-     * \param[in] geom Geometry
-     * \param[in] lev current level
-     * \param[in] component which can be Psi or rho
-     */
-    void InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
-                                    std::string component);
 
     /** \brief Compute ExmBy and EypBx on the slice container from J by solving a Poisson equation
      * ExmBy and EypBx are solved in the same function because both rely on Psi.
@@ -276,10 +268,6 @@ public:
 
     /** Number of guard cells for slices MultiFab */
     static amrex::IntVect m_slices_nguards;
-
-    static amrex::Vector<amrex::Box> m_box_problem;
-    static amrex::Vector<amrex::Box> m_box_extended;
-    static amrex::Vector<amrex::Box> m_box_narrow;
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
     amrex::Vector<std::array<amrex::MultiFab, m_nslices>> m_slices;
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 3296376be0..d47adf22a7 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -8,17 +8,10 @@
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
 
-amrex::Vector<amrex::Box> Fields::m_box_problem{};
-amrex::Vector<amrex::Box> Fields::m_box_extended{};
-amrex::Vector<amrex::Box> Fields::m_box_narrow{};
-
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
 {
     const int max_lev = a_hipace->maxLevel()+1;
-    m_box_problem.resize(max_lev);
-    m_box_extended.resize(max_lev);
-    m_box_narrow.resize(max_lev);
     amrex::ParmParse ppf("fields");
     queryWithParser(ppf, "do_dirichlet_poisson", m_do_dirichlet_poisson);
 }
@@ -36,17 +29,6 @@ Fields::AllocData (
     int nguards_xy = Hipace::m_depos_order_xy + 1;
     m_slices_nguards = {nguards_xy, nguards_xy, 0};
 
-    // box where the problem is defined, contains particles
-    m_box_problem[lev] = slice_ba[0];
-
-    // contains valid fileds that can be used for particles, also the box of the Poisson solver
-    m_box_extended[lev] = m_box_problem[lev];
-    m_box_extended[lev].grow(m_slices_nguards);
-
-    // contains valid sources for lev=1
-    m_box_narrow[lev] = m_box_problem[lev];
-    m_box_narrow[lev].grow(-m_slices_nguards);
-
     for (int islice=0; islice<WhichSlice::N; islice++) {
         m_slices[lev][islice].define(
             slice_ba, slice_dm, Comps[islice]["N"], m_slices_nguards,
@@ -90,6 +72,13 @@ Fields::AllocData (
     }
 }
 
+// x = i * dx + GetPosOffset(0, geom, box);
+// i = (x - GetPosOffset(0, geom, box))/dx;
+amrex::Real GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
+    using namespace amrex::literals;
+    return 0.5_rt*(geom.ProbLo(direction) + geom.ProbHi(direction)
+           - geom.CellSize(direction) * (box.smallEnd(direction) + box.bigEnd(direction)));
+}
 
 template<int dir>
 struct derivative_GPU {
@@ -145,6 +134,62 @@ struct derivative<Direction::z> {
     }
 };
 
+template<int interp_order_xy>
+struct interpolated_field_GPU {
+    amrex::Array4<amrex::Real const> arr_this;
+    amrex::Array4<amrex::Real const> arr_prev;
+    amrex::Real dx_inv;
+    amrex::Real dy_inv;
+    amrex::Real offset0;
+    amrex::Real offset1;
+    amrex::Real rel_z;
+    int lo2;
+
+    AMREX_GPU_DEVICE amrex::Real operator() (amrex::Real x, amrex::Real y) const noexcept {
+        using namespace amrex::literals;
+
+        // x direction
+        const amrex::Real xmid = (x - offset0)*dx_inv;
+        amrex::Real sx_cell[interp_order_xy + 1];
+        const int i_cell = compute_shape_factor<interp_order_xy>(sx_cell, xmid);
+
+        // y direction
+        const amrex::Real ymid = (y - offset1)*dy_inv;
+        amrex::Real sy_cell[interp_order_xy + 1];
+        const int j_cell = compute_shape_factor<interp_order_xy>(sy_cell, ymid);
+
+        amrex::Real field_value = 0.0_rt;
+        // add interpolated contribution to boundary value
+        for (int iy=0; iy<=interp_order_xy; iy++){
+            for (int ix=0; ix<=interp_order_xy; ix++){
+                field_value += sx_cell[ix]*sy_cell[iy]*
+                    ((1.0_rt-rel_z)*arr_this(i_cell+ix,
+                                             j_cell+iy, lo2)
+                             +rel_z*arr_prev(i_cell+ix,
+                                             j_cell+iy, lo2));
+            }
+        }
+        return field_value;
+    }
+};
+
+template<int interp_order_xy>
+struct interpolated_field {
+    FieldView f_view_this;
+    FieldView f_view_prev;
+    const amrex::Geometry& geom;
+    amrex::Real rel_z;
+
+    interpolated_field_GPU<interp_order_xy> array (amrex::MFIter& mfi) const {
+        amrex::Box bx = f_view_this.m_mfab[mfi].box();
+        return interpolated_field_GPU<interp_order_xy>{
+            f_view_this.array(mfi), f_view_prev.array(mfi),
+            1/geom.CellSize(0), 1/geom.CellSize(1),
+            GetPosOffset(0, geom, bx), GetPosOffset(0, geom, bx),
+            rel_z, bx.smallEnd(2)};
+    }
+};
+
 template<class FVA, class FVB>
 void
 FieldOperation (const amrex::IntVect box_grow, FieldView dst,
@@ -156,8 +201,7 @@ FieldOperation (const amrex::IntVect box_grow, FieldView dst,
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU());
-          mfi.isValid(); ++mfi ){
+    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
         const auto dst_array = dst.array(mfi);
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
@@ -320,262 +364,124 @@ Fields::AddBeamCurrents (const int lev, const int which_slice)
     }
 }
 
+
+
+template<class Functional>
+void
+SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver_size,
+                        const amrex::Geometry& geom, const Functional boundary_value)
+{
+    const int box_len0 = solver_size.length(0);
+    const int box_len1 = solver_size.length(1);
+    const int box_lo0 = solver_size.smallEnd(0);
+    const int box_lo1 = solver_size.smallEnd(1);
+    const int box_lo2 = solver_size.smallEnd(2);
+    const amrex::Real dx = geom.CellSize(0);
+    const amrex::Real dy = geom.CellSize(1);
+    const amrex::Real offset0 = GetPosOffset(0, geom, solver_size);
+    const amrex::Real offset1 = GetPosOffset(1, geom, solver_size);
+
+    const amrex::Box edge_box = {{0, 0, 0}, {box_len0 + box_len1 - 1, 1, 0}};
+
+    amrex::ParallelFor(edge_box,
+        [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
+        {
+            const bool i_is_changing = (i < box_len0);
+            const bool i_lo_edge = (!i_is_changing)*(!j);
+            const bool i_hi_edge = (!i_is_changing)*j;
+            const bool j_lo_edge = i_is_changing*(!j);
+            const bool j_hi_edge = i_is_changing*j;
+
+            const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing*i;
+            const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + (!i_is_changing)*(i-box_len0);
+
+            const int i_idx_offset = i_idx - i_lo_edge + i_hi_edge;
+            const int j_idx_offset = j_idx - j_lo_edge + j_hi_edge;
+
+            const amrex::Real x = i_idx_offset * dx + offset0;
+            const amrex::Real y = j_idx_offset * dy + offset1;
+
+            const amrex::Real dxdx = dx*dx*(!i_is_changing) + dy*dy*i_is_changing;
+
+            amrex::Gpu::Atomic::AddNoRet(&(dst(i_idx, j_idx, box_lo2)),
+                                         - boundary_value(x, y) / dxdx);
+        });
+}
+
 void
-Fields::InterpolateBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
-                               std::string component, const int islice)
+Fields::SetRefinedBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
+                              std::string component, const int islice)
 {
-    // To solve a Poisson equation with non-zero Dirichlet boundary conditions, the source term
-    // must be corrected at the outmost grid points in x by -field_value_at_guard_cell / dx^2 and
-    // in y by -field_value_at_guard_cell / dy^2, where dx and dy are those of the fine grid
-    // This follows Van Loan, C. (1992). Computational frameworks for the fast Fourier transform.
-    // Page 254 ff.
-    // The interpolation is done in second order transversely and linearly in longitudinal direction
-
-    HIPACE_PROFILE("Fields::InterpolateBoundaries()");
+    HIPACE_PROFILE("Fields::SetRefinedBoundaries()");
     if (lev == 0) return; // only interpolate boundaries to lev 1
-    using namespace amrex::literals;
-    const auto plo = geom[lev].ProbLoArray();
-    const auto dx = geom[lev].CellSizeArray();
-    const auto plo_coarse = geom[lev-1].ProbLoArray();
-    const auto dx_coarse = geom[lev-1].CellSizeArray();
-constexpr int interp_order = 2;
-
-    // get relative position of fine grid slice between coarse grids for longitudinal lin. interpol.
-     const amrex::Real z = plo_coarse[2] + (islice+0.5_rt)*dx[2];
-     const int idz_coarse = (z-plo_coarse[2])/dx_coarse[2];
-     const amrex::Real rel_z = (z - (plo_coarse[2] + (idz_coarse)*dx_coarse[2])) / dx_coarse[2];
-
-    // get level 0 for interpolation to source term of level 1
-    amrex::MultiFab lhs_coarse(getSlices(lev-1, WhichSlice::This), amrex::make_alias,
-                               Comps[WhichSlice::This][component], 1);
-    amrex::MultiFab lhs_coarse_prev(getSlices(lev-1, WhichSlice::Previous1), amrex::make_alias,
-                               Comps[WhichSlice::Previous1][component], 1);
-    amrex::FArrayBox& lhs_fab = lhs_coarse[0];
-    amrex::Box lhs_bx = lhs_fab.box();
-    lhs_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-    // low end of the coarse grid excluding guard cells
-    const amrex::IntVect lo_coarse = lhs_bx.smallEnd();
-
-    // get offset of level 1 w.r.t. the staging area
-    amrex::MultiFab lhs_fine(getSlices(lev, WhichSlice::This), amrex::make_alias,
-                              Comps[WhichSlice::This][component], 1);
-    amrex::FArrayBox& lhs_fine_fab = lhs_fine[0];
-    amrex::Box lhs_fine_bx = lhs_fine_fab.box();
-    lhs_fine_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-    // low end of the fine grid excluding guard cells, in units of fine cells.
-    const amrex::IntVect lo = lhs_fine_bx.smallEnd();
-
-    for (amrex::MFIter mfi( m_poisson_solver[lev]->StagingArea(),false); mfi.isValid(); ++mfi)
+    constexpr int interp_order = 2;
+
+    const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
+    const amrex::Real islice_coarse_real = islice / ref_ratio_z;
+    const int islice_coarse_int = islice_coarse_real;
+    const amrex::Real rel_z = islice_coarse_real - islice_coarse_int;
+
+    auto solution_interp = interpolated_field<interp_order>{
+        getField(lev-1, WhichSlice::This, component),
+        getField(lev-1, WhichSlice::Previous1, component),
+        geom[lev-1], rel_z};
+    FieldView staging_area = getStagingArea(lev);
+
+    for (amrex::MFIter mfi(staging_area.m_mfab, false); mfi.isValid(); ++mfi)
     {
-        const amrex::Box & bx = mfi.tilebox();
-        // Get the big end of the Box
-        const amrex::IntVect& big = bx.bigEnd();
-        // highest valid index (not counting guard cells) of the staging area in x and y
-        const int nx_fine_high = big[0];
-        const int ny_fine_high = big[1];
-        amrex::Array4<amrex::Real>  data_array = m_poisson_solver[lev]->StagingArea().array(mfi);
-        amrex::Array4<amrex::Real>  arr_coarse = lhs_coarse.array(mfi);
-        amrex::Array4<amrex::Real>  arr_coarse_prev = lhs_coarse_prev.array(mfi);
-
-        // Loop over the valid indices on the fine grid and interpolate the value of the coarse grid
-        // at the location of the guard cell on the fine grid to the first/last valid grid point on
-        // the fine grid
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j , int k) noexcept
-            {
-                if (i==0 || i== nx_fine_high || j==0 || j == ny_fine_high) {
-                    // Compute coordinate on fine grid
-                    amrex::Real x, y;
-
-                    // handling of the left and right boundary of the staging area
-                    if ((i==0) || (i==nx_fine_high)) {
-                        if (i==0) {
-                            // position of guard cell left of first valid grid point
-                            x = plo[0] + (i+lo[0]-0.5_rt)*dx[0];
-                        } else if (i== nx_fine_high) {
-                            // position of guard cell right of last valid grid point
-                            x = plo[0] + (i+lo[0]+1.5_rt)*dx[0];
-                        }
-                        y = plo[1] + (j+lo[1]+0.5_rt)*dx[1];
-
-                        // --- Compute shape factors
-                        // x direction
-                        // j_cell leftmost cell in x that the particle touches.
-                        // sx_cell shape factor along x
-                        const amrex::Real xmid = (x - plo_coarse[0])/dx_coarse[0];
-                        amrex::Real sx_cell[interp_order + 1];
-                        const int j_cell = compute_shape_factor<interp_order>(sx_cell, xmid-0.5_rt);
-
-                        // y direction
-                        const amrex::Real ymid = (y - plo_coarse[1])/dx_coarse[1];
-                        amrex::Real sy_cell[interp_order + 1];
-                        const int k_cell = compute_shape_factor<interp_order>(sy_cell, ymid-0.5_rt);
-
-                        amrex::Real boundary_value = 0.0_rt;
-                        // add interpolated contribution to boundary value
-                        for (int iy=0; iy<=interp_order; iy++){
-                            for (int ix=0; ix<=interp_order; ix++){
-                                boundary_value += sx_cell[ix]*sy_cell[iy]*
-                                  ((1.0_rt-rel_z)*arr_coarse(lo_coarse[0]+j_cell+ix,
-                                                             lo_coarse[1]+k_cell+iy, lo_coarse[2])
-                                     + rel_z*arr_coarse_prev(lo_coarse[0]+j_cell+ix,
-                                                             lo_coarse[1]+k_cell+iy, lo_coarse[2]));
-                            }
-                        }
-
-                        // adjusting source term to get non-zero Dirichlet boundary condition
-                        data_array(i,j,k) -= boundary_value/(dx[0]*dx[0]);
-                    }
-
-                    // handling of the bottom and top boundary of the staging area
-                    if ((j==0) || (j==ny_fine_high)) {
-                        if (j==0) {
-                            // position of guard cell below of first valid grid point
-                            y = plo[1] + (j+lo[1]-0.5_rt)*dx[1];
-                        } else if (j== ny_fine_high) {
-                            // position of guard cell above of last valid grid point
-                            y = plo[1] + (j+lo[1]+1.5_rt)*dx[1];
-                        }
-                        x = plo[0] + (i+lo[0]+0.5_rt)*dx[0];
-
-                        // --- Compute shape factors
-                        // x direction
-                        // j_cell leftmost cell in x that the particle touches.
-                        // sx_cell shape factor along x
-                        const amrex::Real xmid = (x - plo_coarse[0])/dx_coarse[0];
-                        amrex::Real sx_cell[interp_order + 1];
-                        const int j_cell = compute_shape_factor<interp_order>(sx_cell, xmid-0.5_rt);
-
-                        // y direction
-                        const amrex::Real ymid = (y - plo_coarse[1])/dx_coarse[1];
-                        amrex::Real sy_cell[interp_order + 1];
-                        const int k_cell = compute_shape_factor<interp_order>(sy_cell, ymid-0.5_rt);
-
-                        amrex::Real boundary_value = 0.0_rt;
-                        // add interpolated contribution to boundary value
-                        for (int iy=0; iy<=interp_order; iy++){
-                            for (int ix=0; ix<=interp_order; ix++){
-                                boundary_value += sx_cell[ix]*sy_cell[iy]*
-                                  ((1.0_rt-rel_z)*arr_coarse(lo_coarse[0]+j_cell+ix,
-                                                             lo_coarse[1]+k_cell+iy, lo_coarse[2])
-                                     + rel_z*arr_coarse_prev(lo_coarse[0]+j_cell+ix,
-                                                             lo_coarse[1]+k_cell+iy, lo_coarse[2]));
-                            }
-                        }
-
-                        // adjusting source term to get non-zero Dirichlet boundary condition
-                        data_array(i,j,k) -= boundary_value/(dx[1]*dx[1]);
-                    }
-                }
-            });
+        const auto arr_solution_interp = solution_interp.array(mfi);
+        auto arr_staging_area = staging_area.array(mfi);
+        const amrex::Box fine_box = staging_area.m_mfab[mfi].box();
+
+        SetDirichletBoundaries(arr_staging_area, fine_box, geom[lev], arr_solution_interp);
     }
 }
 
+
 void
 Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                                    std::string component, const int islice)
 {
-    // This function interpolates values from the coarse to the fine grid with second order.
-    // This is required for rho to fix the incomplete deposition close to the boundary and for Psi
-    // to fill the guard cell, which is needed for the transverse derivative
-    // The interpolation is done in second order transversely and linearly in longitudinal direction
-
-    HIPACE_PROFILE("Fields::InterpolateFromLev0toLev1()");
     if (lev == 0) return; // only interpolate boundaries to lev 1
     using namespace amrex::literals;
-    const auto plo = geom[lev].ProbLoArray();
-    const auto dx = geom[lev].CellSizeArray();
-    const auto plo_coarse = geom[lev-1].ProbLoArray();
-    const auto dx_coarse = geom[lev-1].CellSizeArray();
     constexpr int interp_order = 2;
 
-    // get relative position of fine grid slice between coarse grids for longitudinal lin. interpol.
-     const amrex::Real z = plo_coarse[2] + (islice+0.5_rt)*dx[2];
-     const int idz_coarse = (z-plo_coarse[2])/dx_coarse[2];
-     const amrex::Real rel_z = (z - (plo_coarse[2] + (idz_coarse)*dx_coarse[2])) / dx_coarse[2];
-
-    // get level 0 array
-    amrex::MultiFab lhs_coarse(getSlices(lev-1, WhichSlice::This), amrex::make_alias,
-                               Comps[WhichSlice::This][component], 1);
-    amrex::MultiFab lhs_coarse_prev(getSlices(lev-1, WhichSlice::Previous1), amrex::make_alias,
-                              Comps[WhichSlice::Previous1][component], 1);
-    amrex::FArrayBox& lhs_fab = lhs_coarse[0];
-    amrex::Box lhs_bx = lhs_fab.box();
-    // lhs_bx should only have valid cells
-    lhs_bx.grow({-m_slices_nguards[0], -m_slices_nguards[1], 0});
-    // low end of the coarse grid excluding guard cells, in units of coarse cells.
-    const amrex::IntVect lo_coarse = lhs_bx.smallEnd();
-
-    // get level 1 array
-    amrex::MultiFab lhs_fine(getSlices(lev, WhichSlice::This), amrex::make_alias,
-                              Comps[WhichSlice::This][component], 1);
-
-    for (amrex::MFIter mfi( lhs_fine,false); mfi.isValid(); ++mfi)
+    const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
+    const amrex::Real islice_coarse_real = islice / ref_ratio_z;
+    const int islice_coarse_int = islice_coarse_real;
+    const amrex::Real rel_z = islice_coarse_real - islice_coarse_int;
+
+    auto field_coarse_interp = interpolated_field<interp_order>{
+        getField(lev-1, WhichSlice::This, component),
+        getField(lev-1, WhichSlice::Previous1, component),
+        geom[lev-1], rel_z};
+    FieldView field_fine = getField(lev, WhichSlice::This, component);
+
+    for (amrex::MFIter mfi( field_fine.m_mfab, false); mfi.isValid(); ++mfi)
     {
-        amrex::Box bx = mfi.tilebox();
-        // psi needs the guard cells, as these are the cells we need to fill
-        if (component == "Psi") bx.grow(m_slices_nguards);
-        // Get the small end of the Box
-        const amrex::IntVect& small = bx.smallEnd();
-        // the interpolation of rho at the low end starts at the lowest valid cell,
-        // for Psi at the guard cell below the first valid cell
-        const int nx_fine_low = (component == "rho") ? small[0] : small[0]+m_slices_nguards[0]-1;
-        const int ny_fine_low = (component == "rho") ? small[1] : small[1]+m_slices_nguards[1]-1;
-        // Get the big end of the Box
-        const amrex::IntVect& big = bx.bigEnd();
-        // the interpolation of rho at the high end starts at the highest valid cell,
-        // for Psi at the guard cell above the last valid cell
-        const int nx_fine_high = (component == "rho") ? big[0] : big[0]-m_slices_nguards[0]+1;
-        const int ny_fine_high = (component == "rho") ? big[1] : big[1]-m_slices_nguards[0]+1;
-        // rho needs to be interpolated for the number of guard cells, Psi just for one guard cell
-        const int x_range = (component == "rho") ? m_slices_nguards[0] : 1;
-        const int y_range = (component == "rho") ? m_slices_nguards[1] : 1;
-
-        amrex::Array4<amrex::Real>  data_array = lhs_fine.array(mfi);
-        amrex::Array4<amrex::Real>  arr_coarse = lhs_coarse.array(mfi);
-        amrex::Array4<amrex::Real>  arr_coarse_prev = lhs_coarse_prev.array(mfi);
-
-        // Loop over the valid indices on the fine grid and interpolate the value of the coarse grid
-        amrex::ParallelFor(
-            bx,
-            [=] AMREX_GPU_DEVICE(int i, int j , int k) noexcept
+        auto arr_field_coarse_interp = field_coarse_interp.array(mfi);
+        auto arr_field_fine = field_fine.array(mfi);
+
+        const amrex::Box fine_box_ghost = field_fine.m_mfab[mfi].box();
+        amrex::Box fine_box_narrow = fine_box_ghost;
+        fine_box_narrow.grow(-2*m_slices_nguards);
+        const int narrow_i_lo = fine_box_narrow.smallEnd(0);
+        const int narrow_i_hi = fine_box_narrow.bigEnd(0);
+        const int narrow_j_lo = fine_box_narrow.smallEnd(1);
+        const int narrow_j_hi = fine_box_narrow.bigEnd(1);
+
+        const amrex::Real dx = geom[lev].CellSize(0);
+        const amrex::Real dy = geom[lev].CellSize(1);
+        const amrex::Real offset0 = GetPosOffset(0, geom[lev], fine_box_ghost);
+        const amrex::Real offset1 = GetPosOffset(1, geom[lev], fine_box_ghost);
+
+        amrex::ParallelFor(fine_box_ghost,
+            [=] AMREX_GPU_DEVICE (int i, int j , int k) noexcept
             {
-                if ((i >= nx_fine_low  && i < nx_fine_low  + x_range) ||
-                    (i <= nx_fine_high && i > nx_fine_high - x_range) ||
-                    (j >= ny_fine_low  && j < ny_fine_low  + y_range) ||
-                    (j <= ny_fine_high && j > ny_fine_high - y_range) ) {
-
-                    const amrex::Real x = plo[0] + (i+0.5_rt)*dx[0];
-                    const amrex::Real y = plo[1] + (j+0.5_rt)*dx[1];
-
-                    // --- Compute shape factors
-                    // x direction
-                    // j_cell leftmost cell in x that the particle touches.
-                    // sx_cell shape factor along x
-                    const amrex::Real xmid = (x - plo_coarse[0])/dx_coarse[0];
-                    amrex::Real sx_cell[interp_order + 1];
-                    const int j_cell = compute_shape_factor<interp_order>(sx_cell, xmid-0.5_rt);
-
-                    // y direction
-                    const amrex::Real ymid = (y - plo_coarse[1])/dx_coarse[1];
-                    amrex::Real sy_cell[interp_order + 1];
-                    const int k_cell = compute_shape_factor<interp_order>(sy_cell, ymid-0.5_rt);
-
-                    amrex::Real coarse_value = 0.0_rt;
-                    // sum interpolated contributions
-                    for (int iy=0; iy<=interp_order; iy++){
-                        for (int ix=0; ix<=interp_order; ix++){
-                            coarse_value += sx_cell[ix]*sy_cell[iy]*
-                                ((1.0_rt-rel_z)*arr_coarse(lo_coarse[0]+j_cell+ix,
-                                                           lo_coarse[1]+k_cell+iy, lo_coarse[2])
-                                   + rel_z*arr_coarse_prev(lo_coarse[0]+j_cell+ix,
-                                                           lo_coarse[1]+k_cell+iy, lo_coarse[2]));
-                        }
-                    }
-
-                    // set value on the fine grid to the interpolated value of the coarse grid
-                    data_array(i,j,k) = coarse_value;
+                if(i<narrow_i_lo || i>narrow_i_hi || j<narrow_j_lo ||j>narrow_j_hi) {
+                    amrex::Real x = i * dx + offset0;
+                    amrex::Real y = j * dy + offset1;
+                    arr_field_fine(i,j,k) = arr_field_coarse_interp(x,y);
                 }
             });
     }
@@ -605,7 +511,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
-    InterpolateBoundaries(geom, lev, "Psi", islice);
+    SetRefinedBoundaries(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
     /* ---------- Transverse FillBoundary Psi ---------- */
@@ -620,8 +526,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-    for ( amrex::MFIter mfi(f_ExmBy.m_mfab, amrex::TilingIfNotGPU());
-          mfi.isValid(); ++mfi ){
+    for ( amrex::MFIter mfi(f_ExmBy.m_mfab, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
         const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
@@ -661,7 +566,7 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
 
-    InterpolateBoundaries(geom, lev, "Ez", islice);
+    SetRefinedBoundaries(geom, lev, "Ez", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
     // The LHS will be returned as lhs.
@@ -688,7 +593,7 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
                    getField(lev, WhichSlice::Next, "jy"), geom[lev]});
 
 
-    InterpolateBoundaries(geom, lev, "Bx", islice);
+    SetRefinedBoundaries(geom, lev, "Bx", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
     // The LHS will be returned as lhs.
@@ -715,7 +620,7 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
                    getField(lev, WhichSlice::Next, "jx"), geom[lev]});
 
 
-    InterpolateBoundaries(geom, lev, "By", islice);
+    SetRefinedBoundaries(geom, lev, "By", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
     // The LHS will be returned as lhs.
@@ -743,7 +648,7 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
 
-    InterpolateBoundaries(geom, lev, "Bz", islice);
+    SetRefinedBoundaries(geom, lev, "Bz", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of m_poisson_solver.
     // The LHS will be returned as lhs.

From fc7fccdcc0c58430dc63f0461ca3788d72b5c28c Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 9 Dec 2021 14:28:51 +0100
Subject: [PATCH 10/52] shrink fft back to original size

---
 examples/blowout_wake/inputs_SI               |  2 +-
 examples/blowout_wake/inputs_ionization_SI    |  2 +-
 examples/blowout_wake/inputs_normalized       |  2 +-
 src/fields/Fields.H                           |  8 ++-
 src/fields/Fields.cpp                         | 56 +++++++++----------
 .../FFTPoissonSolverDirichlet.cpp             | 12 ++--
 6 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/examples/blowout_wake/inputs_SI b/examples/blowout_wake/inputs_SI
index 14a35f0142..fb69a21500 100644
--- a/examples/blowout_wake/inputs_SI
+++ b/examples/blowout_wake/inputs_SI
@@ -1,7 +1,7 @@
 amr.n_cell = 64 64 100
 
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.10
+hipace.predcorr_B_mixing_factor = 0.12
 hipace.predcorr_B_error_tolerance = -1
 
 my_constants.kp_inv = 10.e-6
diff --git a/examples/blowout_wake/inputs_ionization_SI b/examples/blowout_wake/inputs_ionization_SI
index f079ede241..33f827cd63 100644
--- a/examples/blowout_wake/inputs_ionization_SI
+++ b/examples/blowout_wake/inputs_ionization_SI
@@ -1,7 +1,7 @@
 amr.n_cell = 64 64 100
 
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.10
+hipace.predcorr_B_mixing_factor = 0.12
 hipace.predcorr_B_error_tolerance = -1
 
 my_constants.ne = 1.25e24
diff --git a/examples/blowout_wake/inputs_normalized b/examples/blowout_wake/inputs_normalized
index 36ac28c2e1..c973902a3e 100644
--- a/examples/blowout_wake/inputs_normalized
+++ b/examples/blowout_wake/inputs_normalized
@@ -2,7 +2,7 @@ amr.n_cell = 64 64 100
 
 hipace.normalized_units=1
 hipace.predcorr_max_iterations = 1
-hipace.predcorr_B_mixing_factor = 0.10
+hipace.predcorr_B_mixing_factor = 0.12
 hipace.predcorr_B_error_tolerance = -1
 
 amr.blocking_factor = 2
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 18725e579e..8cf3722ed2 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -168,10 +168,13 @@ public:
      * \param[in] lev current level
      * \param[in] component which can be Psi or rho
      * \param[in] islice longitudinal slice
+     * \param[in] outer_edge number of ghost cells to write to
+     * \param[in] inner_edge number of problem cells to write to
      */
     void InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
-                                    std::string component, const int islice);
-
+                                    std::string component, const int islice,
+                                    const amrex::IntVect outer_edge,
+                                    const amrex::IntVect inner_edge);
 
     /** \brief Compute ExmBy and EypBx on the slice container from J by solving a Poisson equation
      * ExmBy and EypBx are solved in the same function because both rely on Psi.
@@ -268,6 +271,7 @@ public:
 
     /** Number of guard cells for slices MultiFab */
     static amrex::IntVect m_slices_nguards;
+    static amrex::IntVect m_poisson_nguards;
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
     amrex::Vector<std::array<amrex::MultiFab, m_nslices>> m_slices;
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index d47adf22a7..a23dac348d 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -7,6 +7,7 @@
 #include "particles/ShapeFactors.H"
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
+amrex::IntVect Fields::m_poisson_nguards = {-1, -1, -1};
 
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
@@ -26,8 +27,9 @@ Fields::AllocData (
         "Parallel field solvers not supported yet");
 
     // Need 1 extra guard cell transversally for transverse derivative
-    int nguards_xy = Hipace::m_depos_order_xy + 1;
+    int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
     m_slices_nguards = {nguards_xy, nguards_xy, 0};
+    m_poisson_nguards = {0, 0, 0};
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
         m_slices[lev][islice].define(
@@ -193,8 +195,8 @@ struct interpolated_field {
 template<class FVA, class FVB>
 void
 FieldOperation (const amrex::IntVect box_grow, FieldView dst,
-                const amrex::Real factor_a, const FVA src_a,
-                const amrex::Real factor_b, const FVB src_b)
+                const amrex::Real factor_a, const FVA& src_a,
+                const amrex::Real factor_b, const FVB& src_b)
 {
     HIPACE_PROFILE("Fields::FieldOperation()");
 
@@ -369,7 +371,7 @@ Fields::AddBeamCurrents (const int lev, const int which_slice)
 template<class Functional>
 void
 SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver_size,
-                        const amrex::Geometry& geom, const Functional boundary_value)
+                        const amrex::Geometry& geom, const Functional& boundary_value)
 {
     const int box_len0 = solver_size.length(0);
     const int box_len1 = solver_size.length(1);
@@ -430,21 +432,22 @@ Fields::SetRefinedBoundaries (amrex::Vector<amrex::Geometry> const& geom, const
     for (amrex::MFIter mfi(staging_area.m_mfab, false); mfi.isValid(); ++mfi)
     {
         const auto arr_solution_interp = solution_interp.array(mfi);
-        auto arr_staging_area = staging_area.array(mfi);
-        const amrex::Box fine_box = staging_area.m_mfab[mfi].box();
+        const auto arr_staging_area = staging_area.array(mfi);
+        const amrex::Box fine_staging_box = staging_area.m_mfab[mfi].box();
 
-        SetDirichletBoundaries(arr_staging_area, fine_box, geom[lev], arr_solution_interp);
+        SetDirichletBoundaries(arr_staging_area, fine_staging_box, geom[lev], arr_solution_interp);
     }
 }
 
 
 void
 Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
-                                   std::string component, const int islice)
+                                   std::string component, const int islice,
+                                   const amrex::IntVect outer_edge, const amrex::IntVect inner_edge)
 {
     if (lev == 0) return; // only interpolate boundaries to lev 1
-    using namespace amrex::literals;
     constexpr int interp_order = 2;
+    if (outer_edge == inner_edge) return;
 
     const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
     const amrex::Real islice_coarse_real = islice / ref_ratio_z;
@@ -461,10 +464,9 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
     {
         auto arr_field_coarse_interp = field_coarse_interp.array(mfi);
         auto arr_field_fine = field_fine.array(mfi);
+        const amrex::Box fine_box_extended = mfi.growntilebox(outer_edge);
+        const amrex::Box fine_box_narrow = mfi.growntilebox(inner_edge);
 
-        const amrex::Box fine_box_ghost = field_fine.m_mfab[mfi].box();
-        amrex::Box fine_box_narrow = fine_box_ghost;
-        fine_box_narrow.grow(-2*m_slices_nguards);
         const int narrow_i_lo = fine_box_narrow.smallEnd(0);
         const int narrow_i_hi = fine_box_narrow.bigEnd(0);
         const int narrow_j_lo = fine_box_narrow.smallEnd(1);
@@ -472,13 +474,13 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
 
         const amrex::Real dx = geom[lev].CellSize(0);
         const amrex::Real dy = geom[lev].CellSize(1);
-        const amrex::Real offset0 = GetPosOffset(0, geom[lev], fine_box_ghost);
-        const amrex::Real offset1 = GetPosOffset(1, geom[lev], fine_box_ghost);
+        const amrex::Real offset0 = GetPosOffset(0, geom[lev], fine_box_extended);
+        const amrex::Real offset1 = GetPosOffset(1, geom[lev], fine_box_extended);
 
-        amrex::ParallelFor(fine_box_ghost,
+        amrex::ParallelFor(fine_box_extended,
             [=] AMREX_GPU_DEVICE (int i, int j , int k) noexcept
             {
-                if(i<narrow_i_lo || i>narrow_i_hi || j<narrow_j_lo ||j>narrow_j_hi) {
+                if(i<narrow_i_lo || i>narrow_i_hi || j<narrow_j_lo || j>narrow_j_hi) {
                     amrex::Real x = i * dx + offset0;
                     amrex::Real y = j * dy + offset1;
                     arr_field_fine(i,j,k) = arr_field_coarse_interp(x,y);
@@ -503,11 +505,10 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     amrex::MultiFab lhs(getSlices(lev, WhichSlice::This), amrex::make_alias,
                         Comps[WhichSlice::This]["Psi"], 1);
 
-    InterpolateFromLev0toLev1(geom, lev, "rho", islice);
-    // TODO: InterpolateFromLev0toLev1 jz
+    InterpolateFromLev0toLev1(geom, lev, "rho", islice, m_poisson_nguards, -m_slices_nguards);
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    FieldOperation(m_slices_nguards, getStagingArea(lev),
+    FieldOperation(m_poisson_nguards, getStagingArea(lev),
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
@@ -516,7 +517,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
 
     /* ---------- Transverse FillBoundary Psi ---------- */
 
-    //InterpolateFromLev0toLev1(geom, lev, "Psi", islice);
+    InterpolateFromLev0toLev1(geom, lev, "Psi", islice, m_slices_nguards, m_poisson_nguards);
 
     /* Compute ExmBy and Eypbx from grad(-psi) */
     FieldView f_ExmBy = getField(lev, WhichSlice::This, "ExmBy");
@@ -534,8 +535,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
 
-        amrex::ParallelFor(
-            bx,
+        amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
             {
                 array_ExmBy(i,j,k) = - (array_Psi(i+1,j,k) - array_Psi(i-1,j,k))*dx_inv;
@@ -555,11 +555,10 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
     // Left-Hand Side for Poisson equation is Bz in the slice MF
     amrex::MultiFab lhs(getSlices(lev, WhichSlice::This), amrex::make_alias,
                         Comps[WhichSlice::This]["Ez"], 1);
-    // TODO: InterpolateFromLev0toLev1 jx, jy
 
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    FieldOperation(m_slices_nguards, getStagingArea(lev),
+    FieldOperation(m_poisson_nguards, getStagingArea(lev),
                    1./(phys_const.ep0*phys_const.c),
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    1./(phys_const.ep0*phys_const.c),
@@ -581,11 +580,10 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
     HIPACE_PROFILE("Fields::SolvePoissonBx()");
 
     PhysConst phys_const = get_phys_const();
-    // TODO: InterpolateFromLev0toLev1 jz, jy
 
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_slices_nguards, getStagingArea(lev),
+    FieldOperation(m_poisson_nguards, getStagingArea(lev),
                    -phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    phys_const.mu0,
@@ -608,11 +606,10 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
     HIPACE_PROFILE("Fields::SolvePoissonBy()");
 
     PhysConst phys_const = get_phys_const();
-    // TODO: InterpolateFromLev0toLev1 jz, jx
 
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_slices_nguards, getStagingArea(lev),
+    FieldOperation(m_poisson_nguards, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    -phys_const.mu0,
@@ -637,11 +634,10 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
     // Left-Hand Side for Poisson equation is Bz in the slice MF
     amrex::MultiFab lhs(getSlices(lev, WhichSlice::This), amrex::make_alias,
                         Comps[WhichSlice::This]["Bz"], 1);
-    // TODO: InterpolateFromLev0toLev1 jx, jy
 
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    FieldOperation(m_slices_nguards, getStagingArea(lev),
+    FieldOperation(m_poisson_nguards, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    -phys_const.mu0,
diff --git a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
index 98d17f4e7f..2994e73140 100644
--- a/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
+++ b/src/fields/fft_poisson_solver/FFTPoissonSolverDirichlet.cpp
@@ -28,11 +28,11 @@ FFTPoissonSolverDirichlet::define (amrex::BoxArray const& a_realspace_ba,
     // These arrays will store the data just before/after the FFT
     // The stagingArea is also created from 0 to nx, because the real space array may have
     // an offset for levels > 0
-    m_stagingArea = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
-    m_tmpSpectralField = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
-    m_eigenvalue_matrix = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_slices_nguards);
-    m_stagingArea.setVal(0.0, Fields::m_slices_nguards); // this is not required
-    m_tmpSpectralField.setVal(0.0, Fields::m_slices_nguards);
+    m_stagingArea = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_poisson_nguards);
+    m_tmpSpectralField = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_poisson_nguards);
+    m_eigenvalue_matrix = amrex::MultiFab(a_realspace_ba, dm, 1, Fields::m_poisson_nguards);
+    m_stagingArea.setVal(0.0, Fields::m_poisson_nguards); // this is not required
+    m_tmpSpectralField.setVal(0.0, Fields::m_poisson_nguards);
 
     // This must be true even for parallel FFT.
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(m_stagingArea.local_size() == 1,
@@ -117,7 +117,7 @@ FFTPoissonSolverDirichlet::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
         amrex::Array4<amrex::Real> lhs_arr = lhs_mf.array(mfi);
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE(lhs_mf.size() == 1,
                                          "Slice MFs must be defined on one box only");
-        amrex::ParallelFor( lhs_mf[mfi].box(),
+        amrex::ParallelFor( lhs_mf[mfi].box() & m_stagingArea[mfi].box(),
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 // Copy field
                 lhs_arr(i,j,k) = tmp_real_arr(i,j,k);

From 7281c99c4b07349ed8060b8cb45adad7692248ca Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 9 Dec 2021 16:04:59 +0100
Subject: [PATCH 11/52] add FillBoundary back in

---
 src/Hipace.cpp        | 22 +++++++++++-----------
 src/fields/Fields.H   |  2 +-
 src/fields/Fields.cpp | 14 ++++++++------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 5fc9828824..dd4329edbe 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -531,7 +531,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                                  WhichSlice::Next);
                 m_fields.AddBeamCurrents(lev, WhichSlice::Next);
                 // need to exchange jx jy jx_beam jy_beam
-                //j_slice_next.FillBoundary(Geom(lev).periodicity());
+                j_slice_next.FillBoundary(Geom(lev).periodicity());
             }
 
             m_fields.AddRhoIons(lev);
@@ -551,9 +551,9 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                  ijz == ijx+4 && ijz_beam == ijx+5 && irho == ijx+6 );
             amrex::MultiFab j_slice(m_fields.getSlices(lev, WhichSlice::This),
                                     amrex::make_alias, Comps[WhichSlice::This]["jx"], 7);
-            //j_slice.FillBoundary(Geom(lev).periodicity());
+            j_slice.FillBoundary(Geom(lev).periodicity());
 
-            m_fields.SolvePoissonExmByAndEypBx(Geom(), lev, islice);
+            m_fields.SolvePoissonExmByAndEypBx(Geom(), m_comm_xy, lev, islice);
 
             m_grid_current.DepositCurrentSlice(m_fields, geom[lev], lev, islice);
             m_multi_beam.DepositCurrentSlice(m_fields, geom, lev, islice_local, bx, bins[lev],
@@ -561,7 +561,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                              WhichSlice::This);
             m_fields.AddBeamCurrents(lev, WhichSlice::This);
 
-            //j_slice.FillBoundary(Geom(lev).periodicity());
+            j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonEz(Geom(), lev, islice);
             m_fields.SolvePoissonBz(Geom(), lev, islice);
@@ -850,10 +850,10 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
 
     /* Guess Bx and By */
     m_fields.InitialBfieldGuess(relative_Bfield_error, m_predcorr_B_error_tolerance, lev);
-    //amrex::ParallelContext::push(m_comm_xy);
+    amrex::ParallelContext::push(m_comm_xy);
      // exchange ExmBy EypBx Ez Bx By Bz
-    //m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-    //amrex::ParallelContext::pop();
+    m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+    amrex::ParallelContext::pop();
 
     /* creating temporary Bx and By arrays for the current and previous iteration */
     amrex::MultiFab Bx_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
@@ -918,7 +918,7 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
         // need to exchange jx jy jx_beam jy_beam
         amrex::MultiFab j_slice_next(m_fields.getSlices(lev, WhichSlice::Next),
                                      amrex::make_alias, Comps[WhichSlice::Next]["jx"], 4);
-        //j_slice_next.FillBoundary(Geom(lev).periodicity());
+        j_slice_next.FillBoundary(Geom(lev).periodicity());
         amrex::ParallelContext::pop();
 
         /* Calculate Bx and By */
@@ -948,10 +948,10 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
         jx_beam_next.setVal(0., m_fields.m_slices_nguards);
         jy_beam_next.setVal(0., m_fields.m_slices_nguards);
 
-        //amrex::ParallelContext::push(m_comm_xy);
+        amrex::ParallelContext::push(m_comm_xy);
          // exchange Bx By
-        //m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-        //amrex::ParallelContext::pop();
+        m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+        amrex::ParallelContext::pop();
 
         /* Update force terms using the calculated Bx and By */
         m_multi_plasma.AdvanceParticles(m_fields, geom[lev], false, false, true, false, lev);
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 8cf3722ed2..1534ed61e0 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -184,7 +184,7 @@ public:
      * \param[in] islice longitudinal slice
      */
     void SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
-                                    const int lev, const int islice);
+                                    const MPI_Comm& m_comm_xy, const int lev, const int islice);
     /** \brief Compute Ez on the slice container from J by solving a Poisson equation
      *
      * \param[in] geom Geometry
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index a23dac348d..4743c9a559 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -12,7 +12,6 @@ amrex::IntVect Fields::m_poisson_nguards = {-1, -1, -1};
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
 {
-    const int max_lev = a_hipace->maxLevel()+1;
     amrex::ParmParse ppf("fields");
     queryWithParser(ppf, "do_dirichlet_poisson", m_do_dirichlet_poisson);
 }
@@ -389,10 +388,10 @@ SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver
         [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
         {
             const bool i_is_changing = (i < box_len0);
-            const bool i_lo_edge = (!i_is_changing)*(!j);
-            const bool i_hi_edge = (!i_is_changing)*j;
-            const bool j_lo_edge = i_is_changing*(!j);
-            const bool j_hi_edge = i_is_changing*j;
+            const bool i_lo_edge = (!i_is_changing) && (!j);
+            const bool i_hi_edge = (!i_is_changing) && j;
+            const bool j_lo_edge = i_is_changing && (!j);
+            const bool j_hi_edge = i_is_changing && j;
 
             const int i_idx = box_lo0 + i_hi_edge*(box_len0-1) + i_is_changing*i;
             const int j_idx = box_lo1 + j_hi_edge*(box_len1-1) + (!i_is_changing)*(i-box_len0);
@@ -492,7 +491,7 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
 
 void
 Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
-                                   const int lev, const int islice)
+                                   const MPI_Comm& m_comm_xy, const int lev, const int islice)
 {
     /* Solves Laplacian(Psi) =  1/episilon0 * -(rho-Jz/c) and
      * calculates Ex-c By, Ey + c Bx from  grad(-Psi)
@@ -516,6 +515,9 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
     /* ---------- Transverse FillBoundary Psi ---------- */
+    amrex::ParallelContext::push(m_comm_xy);
+    lhs.FillBoundary(geom[lev].periodicity());
+    amrex::ParallelContext::pop();
 
     InterpolateFromLev0toLev1(geom, lev, "Psi", islice, m_slices_nguards, m_poisson_nguards);
 

From 272b402809a4eb4c240b14ce82ee040e64278597 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 9 Dec 2021 17:06:11 +0100
Subject: [PATCH 12/52] put error back to pass tests

---
 examples/blowout_wake/inputs_ionization_SI | 1 +
 src/Hipace.cpp                             | 4 ++--
 src/fields/Fields.H                        | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/blowout_wake/inputs_ionization_SI b/examples/blowout_wake/inputs_ionization_SI
index 33f827cd63..5611476255 100644
--- a/examples/blowout_wake/inputs_ionization_SI
+++ b/examples/blowout_wake/inputs_ionization_SI
@@ -1,3 +1,4 @@
+
 amr.n_cell = 64 64 100
 
 hipace.predcorr_max_iterations = 1
diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index dd4329edbe..d985a22201 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -504,8 +504,8 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                 const int iby = Comps[WhichSlice::This]["By"];
                 const int nc = Comps[WhichSlice::This]["N"];
                 AMREX_ALWAYS_ASSERT( iby == ibx+1 );
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx, m_fields.m_slices_nguards);
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1, m_fields.m_slices_nguards);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx /*, m_fields.m_slices_nguards*/);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1 /*, m_fields.m_slices_nguards*/);
             } else {
                 m_fields.getSlices(lev, WhichSlice::This).setVal(0., m_fields.m_slices_nguards);
             }
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 1534ed61e0..b4542dd207 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -180,6 +180,7 @@ public:
      * ExmBy and EypBx are solved in the same function because both rely on Psi.
      *
      * \param[in] geom Geometry
+     * \param[in] m_comm_xy transverse communicator on the slice
      * \param[in] lev current level
      * \param[in] islice longitudinal slice
      */

From 8a13ccffb42de6e75a0881e28e197a9be59838d2 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 9 Dec 2021 21:10:07 +0100
Subject: [PATCH 13/52] slightly change box

---
 src/fields/Fields.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 4743c9a559..cb7c1cdf8a 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -533,7 +533,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
-        const amrex::Box bx = mfi.growntilebox(m_slices_nguards - amrex::IntVect{1, 1, 0});
+        const amrex::Box bx = mfi.growntilebox(amrex::IntVect{0, 0, 0});
         const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
 

From 99875d8916217fcf3a1f528aa6ba28c2ddcdd0a5 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 11 Dec 2021 17:04:46 +0100
Subject: [PATCH 14/52] add header doc

---
 src/fields/Fields.H   | 53 ++++++++++++++++++++++++++++----------
 src/fields/Fields.cpp | 60 +++++++++++++++++--------------------------
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index b4542dd207..de4e6a0595 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -44,20 +44,41 @@ static std::array<std::map<std::string, int>, 5> Comps
             }}
     }};
 
- struct FieldView {
-     amrex::MultiFab& m_mfab;
-     int m_comp;
-
-     auto array (amrex::MFIter& mfi) const {
-         return m_mfab.array(mfi, m_comp);
-     }
- };
-
 /** \brief Direction of each dimension. Can be used for clean handling 2D vs. 3D in the future */
 struct Direction{
     enum dir{x=0, y, z};
 };
 
+/** \brief Helper class to pass non-owning, lightweight views of specific Fields around */
+struct FieldView {
+    // use brace initialization or getField/getStagingArea as constructor
+    amrex::MultiFab& m_mfab;
+    int m_comp;
+
+    /** \brief get amrex::Array4<const? amrex::Real> of the specific Field component
+     * \param[in] lev MR level
+     */
+    auto array (const amrex::MFIter& mfi) const {
+        return m_mfab.array(mfi, m_comp);
+    }
+};
+
+/** \brief Function to help converting field indexes to positions and backwards. Usage:
+ * x = i * dx + GetPosOffset(0, geom, box)
+ * i = round( (x - GetPosOffset(0, geom, box)) / dx )
+ *
+ * \param[in] direction 0, 1 or 2 for x, y or z respectively
+ * \param[in] geom geometry of the field
+ * \param[in] box box the filed, can be with or without ghost cells
+ */
+amrex::Real GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
+    using namespace amrex::literals;
+    // match boxes at center point
+    return 0.5_rt * (geom.ProbLo(direction) + geom.ProbHi(direction)
+           - geom.CellSize(direction) * (box.smallEnd(direction) + box.bigEnd(direction)));
+}
+
+
 /** \brief Main class handling all field data structures and operations
  *
  * This is a fundamental class of Hipace, handling initialization of the fields,
@@ -98,15 +119,20 @@ public:
      * \param[in] islice slice index
      */
     amrex::MultiFab& getSlices (int lev, int islice) {return m_slices[lev][islice]; }
-
+    /** get FieldView of a field in a slice
+     * \param[in] lev MR level
+     * \param[in] islice slice index
+     * \param[in] comp component name of field (see Comps)
+     */
     FieldView getField (const int lev, const int islice, const std::string comp) {
         return FieldView{getSlices(lev, islice), Comps[islice][comp]};
     }
-
+    /** get FieldView of the poisson staging area
+     * \param[in] lev MR level
+     */
     FieldView getStagingArea (const int lev) {
         return FieldView{m_poisson_solver[lev]->StagingArea(), 0};
     }
-
     /** Return reference to density tile arrays */
     amrex::Vector<amrex::FArrayBox>& getTmpDensities() { return m_tmp_densities; }
     /** \brief Copy between the full FArrayBox and slice MultiFab.
@@ -159,7 +185,7 @@ public:
      * \param[in] component which can be Psi, Ez, By, Bx ...
      * \param[in] islice longitudinal slice
      */
-    void SetRefinedBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
+    void SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                                std::string component, const int islice);
 
     /** \brief Interpolate values from coarse grid to the fine grid
@@ -272,6 +298,7 @@ public:
 
     /** Number of guard cells for slices MultiFab */
     static amrex::IntVect m_slices_nguards;
+    /** Number of guard cells for poisson solver MultiFab */
     static amrex::IntVect m_poisson_nguards;
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index cb7c1cdf8a..30ede29ed6 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -28,6 +28,7 @@ Fields::AllocData (
     // Need 1 extra guard cell transversally for transverse derivative
     int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
     m_slices_nguards = {nguards_xy, nguards_xy, 0};
+    // Poisson solver same size as domain, no ghost cells
     m_poisson_nguards = {0, 0, 0};
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
@@ -73,13 +74,6 @@ Fields::AllocData (
     }
 }
 
-// x = i * dx + GetPosOffset(0, geom, box);
-// i = (x - GetPosOffset(0, geom, box))/dx;
-amrex::Real GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
-    using namespace amrex::literals;
-    return 0.5_rt*(geom.ProbLo(direction) + geom.ProbHi(direction)
-           - geom.CellSize(direction) * (box.smallEnd(direction) + box.bigEnd(direction)));
-}
 
 template<int dir>
 struct derivative_GPU {
@@ -193,11 +187,11 @@ struct interpolated_field {
 
 template<class FVA, class FVB>
 void
-FieldOperation (const amrex::IntVect box_grow, FieldView dst,
+LinCombination (const amrex::IntVect box_grow, FieldView dst,
                 const amrex::Real factor_a, const FVA& src_a,
                 const amrex::Real factor_b, const FVB& src_b)
 {
-    HIPACE_PROFILE("Fields::FieldOperation()");
+    HIPACE_PROFILE("Fields::LinCombination()");
 
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
@@ -207,8 +201,7 @@ FieldOperation (const amrex::IntVect box_grow, FieldView dst,
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
         const amrex::Box bx = mfi.growntilebox(box_grow);
-        amrex::ParallelFor(
-            bx,
+        amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
             {
                 dst_array(i,j,k) = factor_a * src_a_array(i,j,k) + factor_b * src_b_array(i,j,k);
@@ -410,17 +403,16 @@ SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver
 }
 
 void
-Fields::SetRefinedBoundaries (amrex::Vector<amrex::Geometry> const& geom, const int lev,
+Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                               std::string component, const int islice)
 {
-    HIPACE_PROFILE("Fields::SetRefinedBoundaries()");
-    if (lev == 0) return; // only interpolate boundaries to lev 1
+    if (lev == 0) return; // keep lev=0 boundaries zero
+    HIPACE_PROFILE("Fields::SetBoundaryCondition()");
     constexpr int interp_order = 2;
 
     const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
-    const amrex::Real islice_coarse_real = islice / ref_ratio_z;
-    const int islice_coarse_int = islice_coarse_real;
-    const amrex::Real rel_z = islice_coarse_real - islice_coarse_int;
+    const amrex::Real islice_coarse = islice / ref_ratio_z;
+    const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
     auto solution_interp = interpolated_field<interp_order>{
         getField(lev-1, WhichSlice::This, component),
@@ -445,13 +437,13 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
                                    const amrex::IntVect outer_edge, const amrex::IntVect inner_edge)
 {
     if (lev == 0) return; // only interpolate boundaries to lev 1
+    HIPACE_PROFILE("Fields::InterpolateFromLev0toLev1()");
     constexpr int interp_order = 2;
     if (outer_edge == inner_edge) return;
 
     const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
-    const amrex::Real islice_coarse_real = islice / ref_ratio_z;
-    const int islice_coarse_int = islice_coarse_real;
-    const amrex::Real rel_z = islice_coarse_real - islice_coarse_int;
+    const amrex::Real islice_coarse = islice / ref_ratio_z;
+    const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
     auto field_coarse_interp = interpolated_field<interp_order>{
         getField(lev-1, WhichSlice::This, component),
@@ -507,11 +499,11 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     InterpolateFromLev0toLev1(geom, lev, "rho", islice, m_poisson_nguards, -m_slices_nguards);
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    FieldOperation(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_poisson_nguards, getStagingArea(lev),
                    1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
-    SetRefinedBoundaries(geom, lev, "Psi", islice);
+    SetBoundaryCondition(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
     /* ---------- Transverse FillBoundary Psi ---------- */
@@ -560,14 +552,13 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    FieldOperation(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_poisson_nguards, getStagingArea(lev),
                    1./(phys_const.ep0*phys_const.c),
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    1./(phys_const.ep0*phys_const.c),
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
-
-    SetRefinedBoundaries(geom, lev, "Ez", islice);
+    SetBoundaryCondition(geom, lev, "Ez", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
     // The LHS will be returned as lhs.
@@ -585,18 +576,17 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_poisson_nguards, getStagingArea(lev),
                    -phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    phys_const.mu0,
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jy"),
                    getField(lev, WhichSlice::Next, "jy"), geom[lev]});
 
-
-    SetRefinedBoundaries(geom, lev, "Bx", islice);
+    SetBoundaryCondition(geom, lev, "Bx", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
-    // The LHS will be returned as lhs.
+    // The LHS will be returned as Bx_iter.
     m_poisson_solver[lev]->SolvePoissonEquation(Bx_iter);
 }
 
@@ -611,18 +601,17 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    FieldOperation(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_poisson_nguards, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    -phys_const.mu0,
                    derivative<Direction::z>{getField(lev, WhichSlice::Previous1, "jx"),
                    getField(lev, WhichSlice::Next, "jx"), geom[lev]});
 
-
-    SetRefinedBoundaries(geom, lev, "By", islice);
+    SetBoundaryCondition(geom, lev, "By", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of poisson_solver.
-    // The LHS will be returned as lhs.
+    // The LHS will be returned as By_iter.
     m_poisson_solver[lev]->SolvePoissonEquation(By_iter);
 }
 
@@ -639,14 +628,13 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    FieldOperation(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_poisson_nguards, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    -phys_const.mu0,
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
-
-    SetRefinedBoundaries(geom, lev, "Bz", islice);
+    SetBoundaryCondition(geom, lev, "Bz", islice);
     // Solve Poisson equation.
     // The RHS is in the staging area of m_poisson_solver.
     // The LHS will be returned as lhs.

From 244955c22d23a78ed42c478ca6f48282c848d4f3 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 11 Dec 2021 17:19:38 +0100
Subject: [PATCH 15/52] fix header doc

---
 src/fields/Fields.H | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index de4e6a0595..69d6cd2844 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -56,7 +56,7 @@ struct FieldView {
     int m_comp;
 
     /** \brief get amrex::Array4<const? amrex::Real> of the specific Field component
-     * \param[in] lev MR level
+     * \param[in] mfi MFIter object of this or a related field
      */
     auto array (const amrex::MFIter& mfi) const {
         return m_mfab.array(mfi, m_comp);
@@ -71,7 +71,8 @@ struct FieldView {
  * \param[in] geom geometry of the field
  * \param[in] box box the filed, can be with or without ghost cells
  */
-amrex::Real GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
+inline amrex::Real
+GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
     using namespace amrex::literals;
     // match boxes at center point
     return 0.5_rt * (geom.ProbLo(direction) + geom.ProbHi(direction)

From fc392b562d2f2351637cbed4db6eb4cb3d947e07 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 11 Dec 2021 18:44:11 +0100
Subject: [PATCH 16/52] add doc in Fields.cpp

---
 src/fields/Fields.H   |  8 ++++--
 src/fields/Fields.cpp | 66 +++++++++++++++++++++++++++++++++----------
 2 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 69d6cd2844..bd6c527f17 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -179,7 +179,9 @@ public:
      */
     void AddBeamCurrents (const int lev, const int which_slice);
 
-    /** \brief Interpolate values at boundaries from coarse grid to the fine grid
+    /** \brief Set up boundary conditions before poisson solve
+     * lev==0: leave at zero
+     * lev!=0: interpolate boundaries from lev-1
      *
      * \param[in] geom Geometry
      * \param[in] lev current level
@@ -195,8 +197,8 @@ public:
      * \param[in] lev current level
      * \param[in] component which can be Psi or rho
      * \param[in] islice longitudinal slice
-     * \param[in] outer_edge number of ghost cells to write to
-     * \param[in] inner_edge number of problem cells to write to
+     * \param[in] outer_edge start writing interpolated values at domain + outer_edge
+     * \param[in] inner_edge stop writing interpolated values at domain + inner_edge
      */
     void InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                                     std::string component, const int islice,
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 30ede29ed6..39fab8aff0 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -74,14 +74,16 @@ Fields::AllocData (
     }
 }
 
-
 template<int dir>
 struct derivative_GPU {
+    // captured variables for GPU
     amrex::Array4<amrex::Real const> array;
     amrex::Real dx_inv;
     int box_lo;
     int box_hi;
 
+    // derivative of field in dir direction (x or y)
+    // the field is zero-extended such that this derivative can be accessed on the same box
     AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k) const noexcept {
         constexpr bool is_x_dir = dir == Direction::x;
         constexpr bool is_y_dir = dir == Direction::y;
@@ -95,21 +97,24 @@ struct derivative_GPU {
 
 template<>
 struct derivative_GPU<Direction::z> {
+    // captured variables for GPU
     amrex::Array4<amrex::Real const> array1;
     amrex::Array4<amrex::Real const> array2;
     amrex::Real dz_inv;
 
+    // derivative of field in z direction
     AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k) const noexcept {
         return (array1(i,j,k) - array2(i,j,k)) * dz_inv;
     }
 };
 
-
 template<int dir>
 struct derivative {
-    FieldView f_view;
-    const amrex::Geometry& geom;
+    // use brace initialization as constructor
+    FieldView f_view; // field to calculate its derivative
+    const amrex::Geometry& geom; // geometry of field
 
+    // use .array(mfi) like with amrex::MultiFab or FieldView
     derivative_GPU<dir> array (amrex::MFIter& mfi) const {
         amrex::Box bx = f_view.m_mfab[mfi].box();
         return derivative_GPU<dir>{f_view.array(mfi),
@@ -119,10 +124,12 @@ struct derivative {
 
 template<>
 struct derivative<Direction::z> {
-    FieldView f_view1;
-    FieldView f_view2;
-    const amrex::Geometry& geom;
+    // use brace initialization as constructor
+    FieldView f_view1; // field on previous slice to calculate its derivative
+    FieldView f_view2; // field on next slice to calculate its derivative
+    const amrex::Geometry& geom; // geometry of field
 
+    // use .array(mfi) like with amrex::MultiFab or FieldView
     derivative_GPU<Direction::z> array (amrex::MFIter& mfi) const {
         return derivative_GPU<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
             1/(2*geom.CellSize(Direction::z))};
@@ -131,6 +138,7 @@ struct derivative<Direction::z> {
 
 template<int interp_order_xy>
 struct interpolated_field_GPU {
+    // captured variables for GPU
     amrex::Array4<amrex::Real const> arr_this;
     amrex::Array4<amrex::Real const> arr_prev;
     amrex::Real dx_inv;
@@ -140,6 +148,8 @@ struct interpolated_field_GPU {
     amrex::Real rel_z;
     int lo2;
 
+    // interpolate field in x, y with <interp_order_xy> order transversely
+    // and linear order longitudinally. x and y must be inside field box
     AMREX_GPU_DEVICE amrex::Real operator() (amrex::Real x, amrex::Real y) const noexcept {
         using namespace amrex::literals;
 
@@ -154,7 +164,6 @@ struct interpolated_field_GPU {
         const int j_cell = compute_shape_factor<interp_order_xy>(sy_cell, ymid);
 
         amrex::Real field_value = 0.0_rt;
-        // add interpolated contribution to boundary value
         for (int iy=0; iy<=interp_order_xy; iy++){
             for (int ix=0; ix<=interp_order_xy; ix++){
                 field_value += sx_cell[ix]*sy_cell[iy]*
@@ -170,11 +179,13 @@ struct interpolated_field_GPU {
 
 template<int interp_order_xy>
 struct interpolated_field {
-    FieldView f_view_this;
-    FieldView f_view_prev;
-    const amrex::Geometry& geom;
-    amrex::Real rel_z;
+    // use brace initialization as constructor
+    FieldView f_view_this; // field to interpolate on this slice
+    FieldView f_view_prev; // field to interpolate on previous slice
+    const amrex::Geometry& geom; // geometry of field
+    amrex::Real rel_z; // mixing factor between f_view_this and f_view_prev for z interpolation
 
+    // use .array(mfi) like with amrex::MultiFab or FieldView
     interpolated_field_GPU<interp_order_xy> array (amrex::MFIter& mfi) const {
         amrex::Box bx = f_view_this.m_mfab[mfi].box();
         return interpolated_field_GPU<interp_order_xy>{
@@ -185,6 +196,15 @@ struct interpolated_field {
     }
 };
 
+/** \brief Calculates dst = factor_a*src_a + factor_b*src_b. src_a and src_b can be derivatives
+ *
+ * \param[in] box_grow how much the domain of dst should be grown
+ * \param[in] dst destination
+ * \param[in] factor_a factor before src_a
+ * \param[in] src_a first source
+ * \param[in] factor_b factor before src_b
+ * \param[in] src_a second source
+ */
 template<class FVA, class FVB>
 void
 LinCombination (const amrex::IntVect box_grow, FieldView dst,
@@ -358,13 +378,25 @@ Fields::AddBeamCurrents (const int lev, const int which_slice)
     }
 }
 
-
-
+/** \brief Sets non zero Dirichlet Boundary conditions in dst which is the source of the Poisson
+ * equation: laplace potential = dst
+ *
+ * \param[in] dst source of the Poisson equation: laplace potential = dst
+ * \param[in] solver_size size of dst/poisson solver (no tiling)
+ * \param[in] geom geometry of of dst/poisson solver
+ * \param[in] boundary_value functional object (Real x, Real y) -> Real value_of_potential
+ */
 template<class Functional>
 void
 SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver_size,
                         const amrex::Geometry& geom, const Functional& boundary_value)
 {
+    // To solve a Poisson equation with non-zero Dirichlet boundary conditions, the source term
+    // must be corrected at the outmost grid points in x by -field_value_at_guard_cell / dx^2 and
+    // in y by -field_value_at_guard_cell / dy^2, where dx and dy are those of the fine grid
+    // This follows Van Loan, C. (1992). Computational frameworks for the fast Fourier transform.
+    // Page 254 ff.
+    // The interpolation is done in second order transversely and linearly in longitudinal direction
     const int box_len0 = solver_size.length(0);
     const int box_len1 = solver_size.length(1);
     const int box_lo0 = solver_size.smallEnd(0);
@@ -377,6 +409,7 @@ SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver
 
     const amrex::Box edge_box = {{0, 0, 0}, {box_len0 + box_len1 - 1, 1, 0}};
 
+    // ParallelFor only over the edge of the box
     amrex::ParallelFor(edge_box,
         [=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
         {
@@ -397,6 +430,7 @@ SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver
 
             const amrex::Real dxdx = dx*dx*(!i_is_changing) + dy*dy*i_is_changing;
 
+            // atomic add because the corners of dst get two values
             amrex::Gpu::Atomic::AddNoRet(&(dst(i_idx, j_idx, box_lo2)),
                                          - boundary_value(x, y) / dxdx);
         });
@@ -406,7 +440,7 @@ void
 Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                               std::string component, const int islice)
 {
-    if (lev == 0) return; // keep lev=0 boundaries zero
+    if (lev == 0) return; // keep lev==0 boundaries zero
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
     constexpr int interp_order = 2;
 
@@ -471,6 +505,8 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
         amrex::ParallelFor(fine_box_extended,
             [=] AMREX_GPU_DEVICE (int i, int j , int k) noexcept
             {
+                // set interpolated values near edge of fine field between outer_edge and inner_edge
+                // to compensate for incomplete charge/current deposition in those cells
                 if(i<narrow_i_lo || i>narrow_i_hi || j<narrow_j_lo || j>narrow_j_hi) {
                     amrex::Real x = i * dx + offset0;
                     amrex::Real y = j * dy + offset1;

From 79a5f191a7bfde387a997198906543b2ec0b4d20 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 11 Dec 2021 19:11:50 +0100
Subject: [PATCH 17/52] add changes ROCm FFT as well

---
 src/fields/Fields.cpp                            |  2 +-
 src/fields/fft_poisson_solver/fft/WrapRocDST.cpp | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 39fab8aff0..1e8ec2fbbc 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -203,7 +203,7 @@ struct interpolated_field {
  * \param[in] factor_a factor before src_a
  * \param[in] src_a first source
  * \param[in] factor_b factor before src_b
- * \param[in] src_a second source
+ * \param[in] src_b second source
  */
 template<class FVA, class FVB>
 void
diff --git a/src/fields/fft_poisson_solver/fft/WrapRocDST.cpp b/src/fields/fft_poisson_solver/fft/WrapRocDST.cpp
index e090dc9248..fe54054673 100644
--- a/src/fields/fft_poisson_solver/fft/WrapRocDST.cpp
+++ b/src/fields/fft_poisson_solver/fft/WrapRocDST.cpp
@@ -21,21 +21,22 @@ namespace AnyDST
         const amrex::Box bx = src.box();
         const int nx = bx.length(0);
         const int ny = bx.length(1);
+        const amrex::IntVect lo = bx.smallEnd();
         amrex::Array4<amrex::Real const> const & src_array = src.array();
         amrex::Array4<amrex::Real> const & dst_array = dst.array();
 
         amrex::ParallelFor(
             bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            [=] AMREX_GPU_DEVICE(int i, int j, int)
             {
                 /* upper left quadrant */
-                dst_array(i+1,j+1,0,dcomp) = src_array(i, j, k, scomp);
+                dst_array(i+1,j+1,lo[2],dcomp) = src_array(i, j, lo[2], scomp);
                 /* lower left quadrant */
-                dst_array(i+1,j+ny+2,0,dcomp) = -src_array(i, ny-1-j, k, scomp);
+                dst_array(i+1,j+ny+2,lo[2],dcomp) = -src_array(i, ny-1-j+2*lo[1], lo[2], scomp);
                 /* upper right quadrant */
-                dst_array(i+nx+2,j+1,0,dcomp) = -src_array(nx-1-i, j, k, scomp);
+                dst_array(i+nx+2,j+1,lo[2],dcomp) = -src_array(nx-1-i+2*lo[0], j, lo[2], scomp);
                 /* lower right quadrant */
-                dst_array(i+nx+2,j+ny+2,0,dcomp) = src_array(nx-1-i, ny-1-j, k, scomp);
+                dst_array(i+nx+2,j+ny+2,lo[2],dcomp) = src_array(nx-1-i+2*lo[0], ny-1-j+2*lo[1], lo[2], scomp);
             }
             );
     }
@@ -244,6 +245,9 @@ namespace AnyDST
         // Allocate expanded_fourier_array Complex of size (nx+2, 2*ny+2)
         amrex::Box expanded_position_box {{0, 0, 0}, {2*nx+1, 2*ny+1, 0}};
         amrex::Box expanded_fourier_box {{0, 0, 0}, {nx+1, 2*ny+1, 0}};
+        // shift box to match rest of fields
+        expanded_position_box += position_array->box().smallEnd();
+        expanded_fourier_box += fourier_array->box().smallEnd();
         dst_plan.m_expanded_position_array =
             std::make_unique<amrex::FArrayBox>(
                 expanded_position_box, 1);

From e4afa2de1eec3b04602f0a4ae35916972c684422 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 14 Dec 2021 00:20:12 +0100
Subject: [PATCH 18/52] remove ghost cell output feature

---
 src/Hipace.cpp                 |  3 ---
 src/diagnostics/Diagnostic.H   |  2 --
 src/diagnostics/Diagnostic.cpp |  6 ------
 src/fields/Fields.H            |  2 +-
 src/fields/Fields.cpp          | 30 +++++++-----------------------
 5 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index d985a22201..56b9d2f507 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -1301,9 +1301,6 @@ Hipace::ResizeFDiagFAB (const int it)
 {
     for (int lev = 0; lev <= finestLevel(); ++lev) {
         amrex::Box bx = boxArray(lev)[it];
-        if(Diagnostic::m_include_ghost_cells) {
-            bx.grow(Fields::m_slices_nguards);
-        }
 
         if (lev == 1) {
             const amrex::Box& bx_lev0 = boxArray(0)[it];
diff --git a/src/diagnostics/Diagnostic.H b/src/diagnostics/Diagnostic.H
index 561113443e..ed32856e01 100644
--- a/src/diagnostics/Diagnostic.H
+++ b/src/diagnostics/Diagnostic.H
@@ -67,8 +67,6 @@ public:
      */
     void ResizeFDiagFAB (const amrex::Box box, const int lev);
 
-    static bool m_include_ghost_cells; /**< Include ghost cells in diagnostic output */
-
 private:
 
     /** Vector over levels, all fields */
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index bb70bbb5ae..9d4348df11 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -2,8 +2,6 @@
 #include "Hipace.H"
 #include <AMReX_ParmParse.H>
 
-bool Diagnostic::m_include_ghost_cells = false;
-
 Diagnostic::Diagnostic (int nlev)
     : m_F(nlev),
       m_diag_coarsen(nlev),
@@ -35,10 +33,6 @@ Diagnostic::Diagnostic (int nlev)
         m_diag_coarsen[ilev] = amrex::IntVect(diag_coarsen_arr);
         AMREX_ALWAYS_ASSERT_WITH_MESSAGE( m_diag_coarsen[ilev].min() >= 1,
             "Coarsening ratio must be >= 1");
-
-        if(diag_coarsen_arr == amrex::Array<int,3>{1,1,1}) {
-           queryWithParser(ppd, "include_ghost_cells", m_include_ghost_cells);
-        }
     }
 
     queryWithParser(ppd, "field_data", m_comps_output);
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index bd6c527f17..af405ff543 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -69,7 +69,7 @@ struct FieldView {
  *
  * \param[in] direction 0, 1 or 2 for x, y or z respectively
  * \param[in] geom geometry of the field
- * \param[in] box box the filed, can be with or without ghost cells
+ * \param[in] box box of the filed, can be with or without ghost cells
  */
 inline amrex::Real
 GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 1e8ec2fbbc..abe59fa40b 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -242,20 +242,14 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
     for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
         auto& slice_fab = slice_mf[mfi];
         amrex::Box slice_box = slice_fab.box();
-        slice_box -= amrex::IntVect(slice_box.smallEnd());
-        if (!Diagnostic::m_include_ghost_cells) {
-            slice_box -= m_slices_nguards;
-        }
+        slice_box.setSmall(Direction::z, i_slice);
+        slice_box.setBig  (Direction::z, i_slice);
         slice_array = amrex::makeArray4(slice_fab.dataPtr(), slice_box, slice_fab.nComp());
-        // slice_array's longitude index is 0.
+        // slice_array's longitude index is i_slice.
     }
 
     const int full_array_z = i_slice / diag_coarsen[2];
-    amrex::Box domain = geom.Domain();
-    if (Diagnostic::m_include_ghost_cells) {
-        domain.grow(m_slices_nguards);
-    }
-    const amrex::IntVect ncells_global = domain.length();
+    const amrex::IntVect ncells_global = geom.Domain().length();
 
     amrex::Box const& vbx = fab.box();
     if (vbx.smallEnd(Direction::z) <= full_array_z and
@@ -267,47 +261,37 @@ Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int
         amrex::Box copy_box = vbx;
         copy_box.setSmall(Direction::z, full_array_z);
         copy_box.setBig  (Direction::z, full_array_z);
-
         amrex::Array4<amrex::Real> const& full_array = fab.array();
-
         const int even_slice_x = ncells_global[0] % 2 == 0 and slice_dir == 0;
         const int even_slice_y = ncells_global[1] % 2 == 0 and slice_dir == 1;
-
         const int coarse_x = diag_coarsen[0];
         const int coarse_y = diag_coarsen[1];
-
         const int ncells_x = ncells_global[0];
         const int ncells_y = ncells_global[1];
 
-        const int cpyboxlo_x = Diagnostic::m_include_ghost_cells ? -m_slices_nguards[0] : 0;
-        const int cpyboxlo_y = Diagnostic::m_include_ghost_cells ? -m_slices_nguards[1] : 0;
-
         const int *diag_comps = diag_comps_vect.data();
 
         amrex::ParallelFor(copy_box, ncomp,
-        [=] AMREX_GPU_DEVICE (int i_l, int j_l, int k, int n) noexcept
+        [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
         {
             const int m = n[diag_comps];
-            const int i = i_l - cpyboxlo_x;
-            const int j = j_l - cpyboxlo_y;
 
             // coarsening in slice direction is always 1
             const int i_c_start = amrex::min(i*coarse_x +(coarse_x-1)/2 -even_slice_x, ncells_x-1);
             const int i_c_stop  = amrex::min(i*coarse_x +coarse_x/2+1, ncells_x);
             const int j_c_start = amrex::min(j*coarse_y +(coarse_y-1)/2 -even_slice_y, ncells_y-1);
             const int j_c_stop  = amrex::min(j*coarse_y +coarse_y/2+1, ncells_y);
-
             amrex::Real field_value = 0._rt;
             int n_values = 0;
 
             for (int j_c = j_c_start; j_c != j_c_stop; ++j_c) {
                 for (int i_c = i_c_start; i_c != i_c_stop; ++i_c) {
-                    field_value += slice_array(i_c, j_c, 0, m+slice_comp);
+                    field_value += slice_array(i_c, j_c, i_slice, m+slice_comp);
                     ++n_values;
                 }
             }
 
-            full_array(i_l,j_l,k,n+full_comp) = field_value / amrex::max(n_values,1);
+            full_array(i,j,k,n+full_comp) = field_value / amrex::max(n_values,1);
         });
     }
 }

From 3ecb1a482ef81352fad82b11edc82071f37359d6 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 15 Dec 2021 19:47:59 +0100
Subject: [PATCH 19/52] update FFTPoissonSolverPeriodic

---
 .../fft_poisson_solver/FFTPoissonSolverPeriodic.cpp    | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/fields/fft_poisson_solver/FFTPoissonSolverPeriodic.cpp b/src/fields/fft_poisson_solver/FFTPoissonSolverPeriodic.cpp
index 814775b161..64b98dee55 100644
--- a/src/fields/fft_poisson_solver/FFTPoissonSolverPeriodic.cpp
+++ b/src/fields/fft_poisson_solver/FFTPoissonSolverPeriodic.cpp
@@ -30,6 +30,7 @@ FFTPoissonSolverPeriodic::define ( amrex::BoxArray const& realspace_ba,
         // each direction and have the same number of points as the
         // (cell-centered) real space box
         amrex::Box realspace_bx = realspace_ba[i];
+        realspace_bx.grow(Fields::m_poisson_nguards);
         amrex::IntVect fft_size = realspace_bx.length();
         // Because the spectral solver uses real-to-complex FFTs, we only
         // need the positive k values along the fastest axis
@@ -47,7 +48,7 @@ FFTPoissonSolverPeriodic::define ( amrex::BoxArray const& realspace_ba,
 
     // Allocate temporary arrays - in real space and spectral space
     // These arrays will store the data just before/after the FFT
-    m_stagingArea = amrex::MultiFab(realspace_ba, dm, 1, 0);
+    m_stagingArea = amrex::MultiFab(realspace_ba, dm, 1, Fields::m_poisson_nguards);
     m_tmpSpectralField = SpectralField(m_spectralspace_ba, dm, 1, 0);
 
     // This must be true even for parallel FFT.
@@ -90,7 +91,7 @@ FFTPoissonSolverPeriodic::define ( amrex::BoxArray const& realspace_ba,
         // Note: the size of the real-space box and spectral-space box
         // differ when using real-to-complex FFT. When initializing
         // the FFT plan, the valid dimensions are those of the real-space box.
-        amrex::IntVect fft_size = mfi.validbox().length();
+        amrex::IntVect fft_size = m_stagingArea[mfi].box().length();
         m_forward_plan[mfi] = AnyFFT::CreatePlan(
             fft_size, m_stagingArea[mfi].dataPtr(),
             reinterpret_cast<AnyFFT::Complex*>( m_tmpSpectralField[mfi].dataPtr()),
@@ -130,8 +131,9 @@ FFTPoissonSolverPeriodic::SolvePoissonEquation (amrex::MultiFab& lhs_mf)
         // Copy from the staging area to output array (and normalize)
         amrex::Array4<amrex::Real> tmp_real_arr = m_stagingArea.array(mfi);
         amrex::Array4<amrex::Real> lhs_arr = lhs_mf.array(mfi);
-        const amrex::Real inv_N = 1./mfi.validbox().numPts();
-        amrex::ParallelFor( mfi.validbox(),
+        const amrex::Box fft_box = m_stagingArea[mfi].box();
+        const amrex::Real inv_N = 1./fft_box.numPts();
+        amrex::ParallelFor( fft_box,
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
                 // Copy and normalize field
                 lhs_arr(i,j,k) = inv_N*tmp_real_arr(i,j,k);

From 3d51ba58c3777cbccaf9096eee26fa7b50cffaec Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 5 Jan 2022 18:08:25 +0100
Subject: [PATCH 20/52] add some suggestions

---
 src/Hipace.cpp        |  5 +++--
 src/fields/Fields.H   |  6 +++---
 src/fields/Fields.cpp | 40 +++++++++++++++++++++-------------------
 3 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index f6e0b07dbb..fff918bcf5 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -504,8 +504,9 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                 const int iby = Comps[WhichSlice::This]["By"];
                 const int nc = Comps[WhichSlice::This]["N"];
                 AMREX_ALWAYS_ASSERT( iby == ibx+1 );
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx /*, m_fields.m_slices_nguards*/);
-                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1 /*, m_fields.m_slices_nguards*/);
+                // TODO: add m_fields.m_slices_nguards to setVal
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., 0, ibx);
+                m_fields.getSlices(lev, WhichSlice::This).setVal(0., iby+1, nc-iby-1);
             } else {
                 m_fields.getSlices(lev, WhichSlice::This).setVal(0., m_fields.m_slices_nguards);
             }
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index af405ff543..06fc0d8bca 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -72,11 +72,11 @@ struct FieldView {
  * \param[in] box box of the filed, can be with or without ghost cells
  */
 inline amrex::Real
-GetPosOffset (const int direction, const amrex::Geometry& geom, const amrex::Box& box) {
+GetPosOffset (const int dir, const amrex::Geometry& geom, const amrex::Box& box) {
     using namespace amrex::literals;
     // match boxes at center point
-    return 0.5_rt * (geom.ProbLo(direction) + geom.ProbHi(direction)
-           - geom.CellSize(direction) * (box.smallEnd(direction) + box.bigEnd(direction)));
+    return 0.5_rt * (geom.ProbLo(dir) + geom.ProbHi(dir)
+           - geom.CellSize(dir) * (box.smallEnd(dir) + box.bigEnd(dir)));
 }
 
 
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index abe59fa40b..acde870ee8 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -75,7 +75,7 @@ Fields::AllocData (
 }
 
 template<int dir>
-struct derivative_GPU {
+struct derivative_inner {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> array;
     amrex::Real dx_inv;
@@ -96,7 +96,7 @@ struct derivative_GPU {
 };
 
 template<>
-struct derivative_GPU<Direction::z> {
+struct derivative_inner<Direction::z> {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> array1;
     amrex::Array4<amrex::Real const> array2;
@@ -115,9 +115,9 @@ struct derivative {
     const amrex::Geometry& geom; // geometry of field
 
     // use .array(mfi) like with amrex::MultiFab or FieldView
-    derivative_GPU<dir> array (amrex::MFIter& mfi) const {
+    derivative_inner<dir> array (amrex::MFIter& mfi) const {
         amrex::Box bx = f_view.m_mfab[mfi].box();
-        return derivative_GPU<dir>{f_view.array(mfi),
+        return derivative_inner<dir>{f_view.array(mfi),
             1/(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
 };
@@ -130,14 +130,14 @@ struct derivative<Direction::z> {
     const amrex::Geometry& geom; // geometry of field
 
     // use .array(mfi) like with amrex::MultiFab or FieldView
-    derivative_GPU<Direction::z> array (amrex::MFIter& mfi) const {
-        return derivative_GPU<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
+    derivative_inner<Direction::z> array (amrex::MFIter& mfi) const {
+        return derivative_inner<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
             1/(2*geom.CellSize(Direction::z))};
     }
 };
 
 template<int interp_order_xy>
-struct interpolated_field_GPU {
+struct interpolated_field_inner {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> arr_this;
     amrex::Array4<amrex::Real const> arr_prev;
@@ -186,12 +186,12 @@ struct interpolated_field {
     amrex::Real rel_z; // mixing factor between f_view_this and f_view_prev for z interpolation
 
     // use .array(mfi) like with amrex::MultiFab or FieldView
-    interpolated_field_GPU<interp_order_xy> array (amrex::MFIter& mfi) const {
+    interpolated_field_inner<interp_order_xy> array (amrex::MFIter& mfi) const {
         amrex::Box bx = f_view_this.m_mfab[mfi].box();
-        return interpolated_field_GPU<interp_order_xy>{
+        return interpolated_field_inner<interp_order_xy>{
             f_view_this.array(mfi), f_view_prev.array(mfi),
             1/geom.CellSize(0), 1/geom.CellSize(1),
-            GetPosOffset(0, geom, bx), GetPosOffset(0, geom, bx),
+            GetPosOffset(0, geom, bx), GetPosOffset(1, geom, bx),
             rel_z, bx.smallEnd(2)};
     }
 };
@@ -362,17 +362,17 @@ Fields::AddBeamCurrents (const int lev, const int which_slice)
     }
 }
 
-/** \brief Sets non zero Dirichlet Boundary conditions in dst which is the source of the Poisson
- * equation: laplace potential = dst
+/** \brief Sets non zero Dirichlet Boundary conditions in RHS which is the source of the Poisson
+ * equation: laplace LHS = RHS
  *
- * \param[in] dst source of the Poisson equation: laplace potential = dst
- * \param[in] solver_size size of dst/poisson solver (no tiling)
- * \param[in] geom geometry of of dst/poisson solver
+ * \param[in] RHS source of the Poisson equation: laplace LHS = RHS
+ * \param[in] solver_size size of RHS/poisson solver (no tiling)
+ * \param[in] geom geometry of of RHS/poisson solver
  * \param[in] boundary_value functional object (Real x, Real y) -> Real value_of_potential
  */
 template<class Functional>
 void
-SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver_size,
+SetDirichletBoundaries (amrex::Array4<amrex::Real> RHS, const amrex::Box& solver_size,
                         const amrex::Geometry& geom, const Functional& boundary_value)
 {
     // To solve a Poisson equation with non-zero Dirichlet boundary conditions, the source term
@@ -414,8 +414,8 @@ SetDirichletBoundaries (amrex::Array4<amrex::Real> dst, const amrex::Box& solver
 
             const amrex::Real dxdx = dx*dx*(!i_is_changing) + dy*dy*i_is_changing;
 
-            // atomic add because the corners of dst get two values
-            amrex::Gpu::Atomic::AddNoRet(&(dst(i_idx, j_idx, box_lo2)),
+            // atomic add because the corners of RHS get two values
+            amrex::Gpu::Atomic::AddNoRet(&(RHS(i_idx, j_idx, box_lo2)),
                                          - boundary_value(x, y) / dxdx);
         });
 }
@@ -533,7 +533,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
 
     InterpolateFromLev0toLev1(geom, lev, "Psi", islice, m_slices_nguards, m_poisson_nguards);
 
-    /* Compute ExmBy and Eypbx from grad(-psi) */
+    // Compute ExmBy = -d/dx psi and EypBx = -d/dy psi
     FieldView f_ExmBy = getField(lev, WhichSlice::This, "ExmBy");
     FieldView f_EypBx = getField(lev, WhichSlice::This, "EypBx");
     FieldView f_Psi = getField(lev, WhichSlice::This, "Psi");
@@ -545,6 +545,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
+        // number of ghost cells where ExmBy and EypBx are calculated is 0 for now
         const amrex::Box bx = mfi.growntilebox(amrex::IntVect{0, 0, 0});
         const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
@@ -552,6 +553,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
             {
+                // derivatives in x and y direction, no guards needed
                 array_ExmBy(i,j,k) = - (array_Psi(i+1,j,k) - array_Psi(i-1,j,k))*dx_inv;
                 array_EypBx(i,j,k) = - (array_Psi(i,j+1,k) - array_Psi(i,j-1,k))*dy_inv;
             });

From ee504144e7bdaf562e0515e6ec41959562f906b5 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 5 Jan 2022 18:11:36 +0100
Subject: [PATCH 21/52] fix doc

---
 src/fields/Fields.H | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 06fc0d8bca..12e95c8151 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -67,7 +67,7 @@ struct FieldView {
  * x = i * dx + GetPosOffset(0, geom, box)
  * i = round( (x - GetPosOffset(0, geom, box)) / dx )
  *
- * \param[in] direction 0, 1 or 2 for x, y or z respectively
+ * \param[in] dir 0, 1 or 2 for x, y or z respectively
  * \param[in] geom geometry of the field
  * \param[in] box box of the filed, can be with or without ghost cells
  */

From d8c98d0357fc945479c41f591832df268cad714a Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 19 Jan 2022 18:33:32 +0100
Subject: [PATCH 22/52] remove FieldView and add +0.5 back to fix striping

---
 src/fields/Fields.H   | 26 +++++----------------
 src/fields/Fields.cpp | 54 ++++++++++++++++++++++---------------------
 2 files changed, 34 insertions(+), 46 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 937799a77a..73ae10c3b6 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -49,20 +49,6 @@ struct Direction{
     enum dir{x=0, y, z};
 };
 
-/** \brief Helper class to pass non-owning, lightweight views of specific Fields around */
-struct FieldView {
-    // use brace initialization or getField/getStagingArea as constructor
-    amrex::MultiFab& m_mfab;
-    int m_comp;
-
-    /** \brief get amrex::Array4<const? amrex::Real> of the specific Field component
-     * \param[in] mfi MFIter object of this or a related field
-     */
-    auto array (const amrex::MFIter& mfi) const {
-        return m_mfab.array(mfi, m_comp);
-    }
-};
-
 /** \brief Function to help converting field indexes to positions and backwards. Usage:
  * x = i * dx + GetPosOffset(0, geom, box)
  * i = round( (x - GetPosOffset(0, geom, box)) / dx )
@@ -120,19 +106,19 @@ public:
      * \param[in] islice slice index
      */
     amrex::MultiFab& getSlices (int lev, int islice) {return m_slices[lev][islice]; }
-    /** get FieldView of a field in a slice
+    /** get amrex::MultiFab of a field in a slice
      * \param[in] lev MR level
      * \param[in] islice slice index
      * \param[in] comp component name of field (see Comps)
      */
-    FieldView getField (const int lev, const int islice, const std::string comp) {
-        return FieldView{getSlices(lev, islice), Comps[islice][comp]};
+    amrex::MultiFab getField (const int lev, const int islice, const std::string comp) {
+        return amrex::MultiFab(getSlices(lev, islice), amrex::make_alias, Comps[islice][comp], 1);
     }
-    /** get FieldView of the poisson staging area
+    /** get amrex::MultiFab of the poisson staging area
      * \param[in] lev MR level
      */
-    FieldView getStagingArea (const int lev) {
-        return FieldView{m_poisson_solver[lev]->StagingArea(), 0};
+    amrex::MultiFab getStagingArea (const int lev) {
+        return amrex::MultiFab(m_poisson_solver[lev]->StagingArea(), amrex::make_alias, 0, 1);
     }
     /** Return reference to density tile arrays */
     amrex::Vector<amrex::FArrayBox>& getTmpDensities() { return m_tmp_densities; }
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index d8746ef044..29abeaf641 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -111,12 +111,12 @@ struct derivative_inner<Direction::z> {
 template<int dir>
 struct derivative {
     // use brace initialization as constructor
-    FieldView f_view; // field to calculate its derivative
+    amrex::MultiFab f_view; // field to calculate its derivative
     const amrex::Geometry& geom; // geometry of field
 
-    // use .array(mfi) like with amrex::MultiFab or FieldView
+    // use .array(mfi) like with amrex::MultiFab
     derivative_inner<dir> array (amrex::MFIter& mfi) const {
-        amrex::Box bx = f_view.m_mfab[mfi].box();
+        amrex::Box bx = f_view[mfi].box();
         return derivative_inner<dir>{f_view.array(mfi),
             1/(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
@@ -125,11 +125,11 @@ struct derivative {
 template<>
 struct derivative<Direction::z> {
     // use brace initialization as constructor
-    FieldView f_view1; // field on previous slice to calculate its derivative
-    FieldView f_view2; // field on next slice to calculate its derivative
+    amrex::MultiFab f_view1; // field on previous slice to calculate its derivative
+    amrex::MultiFab f_view2; // field on next slice to calculate its derivative
     const amrex::Geometry& geom; // geometry of field
 
-    // use .array(mfi) like with amrex::MultiFab or FieldView
+    // use .array(mfi) like with amrex::MultiFab
     derivative_inner<Direction::z> array (amrex::MFIter& mfi) const {
         return derivative_inner<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
             1/(2*geom.CellSize(Direction::z))};
@@ -180,14 +180,14 @@ struct interpolated_field_inner {
 template<int interp_order_xy>
 struct interpolated_field {
     // use brace initialization as constructor
-    FieldView f_view_this; // field to interpolate on this slice
-    FieldView f_view_prev; // field to interpolate on previous slice
+    amrex::MultiFab f_view_this; // field to interpolate on this slice
+    amrex::MultiFab f_view_prev; // field to interpolate on previous slice
     const amrex::Geometry& geom; // geometry of field
     amrex::Real rel_z; // mixing factor between f_view_this and f_view_prev for z interpolation
 
-    // use .array(mfi) like with amrex::MultiFab or FieldView
+    // use .array(mfi) like with amrex::MultiFab
     interpolated_field_inner<interp_order_xy> array (amrex::MFIter& mfi) const {
-        amrex::Box bx = f_view_this.m_mfab[mfi].box();
+        amrex::Box bx = f_view_this[mfi].box();
         return interpolated_field_inner<interp_order_xy>{
             f_view_this.array(mfi), f_view_prev.array(mfi),
             1/geom.CellSize(0), 1/geom.CellSize(1),
@@ -207,7 +207,7 @@ struct interpolated_field {
  */
 template<class FVA, class FVB>
 void
-LinCombination (const amrex::IntVect box_grow, FieldView dst,
+LinCombination (const amrex::IntVect box_grow, amrex::MultiFab dst,
                 const amrex::Real factor_a, const FVA& src_a,
                 const amrex::Real factor_b, const FVB& src_b)
 {
@@ -216,7 +216,7 @@ LinCombination (const amrex::IntVect box_grow, FieldView dst,
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-    for ( amrex::MFIter mfi(dst.m_mfab, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
+    for ( amrex::MFIter mfi(dst, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
         const auto dst_array = dst.array(mfi);
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
@@ -424,23 +424,24 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
 {
     if (lev == 0) return; // keep lev==0 boundaries zero
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
+    using namespace amrex::literals;
     constexpr int interp_order = 2;
 
-    const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
-    const amrex::Real islice_coarse = islice / ref_ratio_z;
+    const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
+    const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
     const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
     auto solution_interp = interpolated_field<interp_order>{
         getField(lev-1, WhichSlice::This, component),
         getField(lev-1, WhichSlice::Previous1, component),
         geom[lev-1], rel_z};
-    FieldView staging_area = getStagingArea(lev);
+    amrex::MultiFab staging_area = getStagingArea(lev);
 
-    for (amrex::MFIter mfi(staging_area.m_mfab, false); mfi.isValid(); ++mfi)
+    for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
     {
         const auto arr_solution_interp = solution_interp.array(mfi);
         const auto arr_staging_area = staging_area.array(mfi);
-        const amrex::Box fine_staging_box = staging_area.m_mfab[mfi].box();
+        const amrex::Box fine_staging_box = staging_area[mfi].box();
 
         SetDirichletBoundaries(arr_staging_area, fine_staging_box, geom[lev], arr_solution_interp);
     }
@@ -453,21 +454,22 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
                                    const amrex::IntVect outer_edge, const amrex::IntVect inner_edge)
 {
     if (lev == 0) return; // only interpolate boundaries to lev 1
+    if (outer_edge == inner_edge) return;
     HIPACE_PROFILE("Fields::InterpolateFromLev0toLev1()");
+    using namespace amrex::literals;
     constexpr int interp_order = 2;
-    if (outer_edge == inner_edge) return;
 
-    const amrex::Real ref_ratio_z = geom[lev-1].CellSize(2) / geom[lev].CellSize(2);
-    const amrex::Real islice_coarse = islice / ref_ratio_z;
+    const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
+    const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
     const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
     auto field_coarse_interp = interpolated_field<interp_order>{
         getField(lev-1, WhichSlice::This, component),
         getField(lev-1, WhichSlice::Previous1, component),
         geom[lev-1], rel_z};
-    FieldView field_fine = getField(lev, WhichSlice::This, component);
+    amrex::MultiFab field_fine = getField(lev, WhichSlice::This, component);
 
-    for (amrex::MFIter mfi( field_fine.m_mfab, false); mfi.isValid(); ++mfi)
+    for (amrex::MFIter mfi( field_fine, false); mfi.isValid(); ++mfi)
     {
         auto arr_field_coarse_interp = field_coarse_interp.array(mfi);
         auto arr_field_fine = field_fine.array(mfi);
@@ -532,14 +534,14 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     InterpolateFromLev0toLev1(geom, lev, "Psi", islice, m_slices_nguards, m_poisson_nguards);
 
     // Compute ExmBy = -d/dx psi and EypBx = -d/dy psi
-    FieldView f_ExmBy = getField(lev, WhichSlice::This, "ExmBy");
-    FieldView f_EypBx = getField(lev, WhichSlice::This, "EypBx");
-    FieldView f_Psi = getField(lev, WhichSlice::This, "Psi");
+    amrex::MultiFab f_ExmBy = getField(lev, WhichSlice::This, "ExmBy");
+    amrex::MultiFab f_EypBx = getField(lev, WhichSlice::This, "EypBx");
+    amrex::MultiFab f_Psi = getField(lev, WhichSlice::This, "Psi");
 
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
 #endif
-    for ( amrex::MFIter mfi(f_ExmBy.m_mfab, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
+    for ( amrex::MFIter mfi(f_ExmBy, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi ){
         const amrex::Array4<amrex::Real> array_ExmBy = f_ExmBy.array(mfi);
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);

From 0e46aa07933e7f409c5df3935646966030b58748 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 20 Jan 2022 04:27:38 +0100
Subject: [PATCH 23/52] enable z interpolation for output

---
 src/Hipace.cpp                    |   4 +-
 src/diagnostics/Diagnostic.cpp    |   1 +
 src/diagnostics/OpenPMDWriter.H   |   2 +-
 src/diagnostics/OpenPMDWriter.cpp |   3 +-
 src/fields/Fields.H               |  17 +--
 src/fields/Fields.cpp             | 211 ++++++++++++++++++------------
 6 files changed, 139 insertions(+), 99 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 40cb1acef5..bc5e1afd4d 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -1331,8 +1331,8 @@ Hipace::GetRefRatio (int lev)
 void
 Hipace::FillDiagnostics (const int lev, int i_slice)
 {
-    m_fields.Copy(lev, i_slice, 0, 0, m_diags.getCompsIdx(), m_diags.getNFields(),
-                  m_diags.getF(lev), m_diags.sliceDir(), m_diags.getCoarsening(lev), Geom(lev));
+    m_fields.Copy(lev, i_slice, m_diags.getGeom()[lev], m_diags.getF(lev), m_diags.getF(lev).box(),
+                  Geom(lev), m_diags.getCompsIdx(), m_diags.getNFields());
 }
 
 void
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index 9d4348df11..16778b14e6 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -121,6 +121,7 @@ Diagnostic::ResizeFDiagFAB (const amrex::Box box, const int lev)
     amrex::Box io_box = TrimIOBox(box);
     io_box.coarsen(m_diag_coarsen[lev]);
     m_F[lev].resize(io_box, m_nfields);
+    m_F[lev].setVal<amrex::RunOn::Device>(0);
 }
 
 amrex::Box
diff --git a/src/diagnostics/OpenPMDWriter.H b/src/diagnostics/OpenPMDWriter.H
index 162198d901..3d3b5a3ae5 100644
--- a/src/diagnostics/OpenPMDWriter.H
+++ b/src/diagnostics/OpenPMDWriter.H
@@ -140,7 +140,7 @@ public:
      * \param[in] call_type whether the beams or the fields should be written to file
      */
     void WriteDiagnostics(
-        amrex::Vector<amrex::FArrayBox> const& a_mf, MultiBeam& a_multi_beam,
+        amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
         amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Real physical_time, const int output_step, const int nlev,
         const int slice_dir, const amrex::Vector< std::string > varnames,
diff --git a/src/diagnostics/OpenPMDWriter.cpp b/src/diagnostics/OpenPMDWriter.cpp
index 6612472d08..3f3ff8cd38 100644
--- a/src/diagnostics/OpenPMDWriter.cpp
+++ b/src/diagnostics/OpenPMDWriter.cpp
@@ -65,7 +65,7 @@ OpenPMDWriter::InitDiagnostics (const int output_step, const int output_period,
 
 void
 OpenPMDWriter::WriteDiagnostics (
-    amrex::Vector<amrex::FArrayBox> const& a_mf, MultiBeam& a_multi_beam,
+    amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
     amrex::Vector<amrex::Geometry> const& geom,
     const amrex::Real physical_time, const int output_step, const int nlev,
     const int slice_dir, const amrex::Vector< std::string > varnames,
@@ -86,6 +86,7 @@ OpenPMDWriter::WriteDiagnostics (
         } else if (call_type == OpenPMDWriterCallType::fields ) {
             WriteFieldData(a_mf[lev], geom, slice_dir, varnames, iteration, output_step, lev);
             m_outputSeries[lev]->flush();
+            a_mf[lev].setVal<amrex::RunOn::Device>(0);
             m_last_output_dumped[lev] = output_step;
         }
     }
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index b87745036f..aa626d4348 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -134,19 +134,16 @@ public:
      *
      * \param[in] lev MR level
      * \param[in] i_slice z slice in which to write the data
-     * \param[in] slice_comp first component of the xy slice to copy from/to
-     * \param[in] full_comp first component of the full array to copy from/to
+     * \param[in] diag_geom diagnostics geometry
+     * \param[in,out] diag_fab full FArrayBox of diagnostics
+     * \param[in] diag_box box on which diag_fab is defined
+     * \param[in] calc_geom main geometry
      * \param[in] diag_comps_vect the field components to copy
      * \param[in] ncomp number of components to copy
-     * \param[in,out] fab full FArrayBox
-     * \param[in] slice_dir slicing direction. 0=x, 1=y, -1=no slicing (full 3D)
-     * \param[in] diag_coarsen coarsening ratio of diagnostics
-     * \param[in] geom main geometry
      */
-    void Copy (const int lev, const int i_slice, const int slice_comp, const int full_comp,
-               const amrex::Gpu::DeviceVector<int>& diag_comps_vect,
-               const int ncomp, amrex::FArrayBox& fab, const int slice_dir,
-               const amrex::IntVect diag_coarsen, const amrex::Geometry geom);
+     void Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom,
+                amrex::FArrayBox& diag_fab, amrex::Box diag_box, const amrex::Geometry& calc_geom,
+                const amrex::Gpu::DeviceVector<int>& diag_comps_vect, const int ncomp);
 
     /** \brief Shift slices by 1 element: slices (1,2) are then stored in (2,3).
      *
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 29abeaf641..217565d100 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -136,21 +136,19 @@ struct derivative<Direction::z> {
     }
 };
 
-template<int interp_order_xy>
-struct interpolated_field_inner {
+template<int interp_order_xy, class ArrayType>
+struct interpolated_field_xy_inner {
     // captured variables for GPU
-    amrex::Array4<amrex::Real const> arr_this;
-    amrex::Array4<amrex::Real const> arr_prev;
+    ArrayType array;
     amrex::Real dx_inv;
     amrex::Real dy_inv;
     amrex::Real offset0;
     amrex::Real offset1;
-    amrex::Real rel_z;
-    int lo2;
 
     // interpolate field in x, y with <interp_order_xy> order transversely
-    // and linear order longitudinally. x and y must be inside field box
-    AMREX_GPU_DEVICE amrex::Real operator() (amrex::Real x, amrex::Real y) const noexcept {
+    // x and y must be inside field box
+    template<class...Args> AMREX_GPU_DEVICE
+    amrex::Real operator() (amrex::Real x, amrex::Real y, Args...args) const noexcept {
         using namespace amrex::literals;
 
         // x direction
@@ -166,36 +164,80 @@ struct interpolated_field_inner {
         amrex::Real field_value = 0.0_rt;
         for (int iy=0; iy<=interp_order_xy; iy++){
             for (int ix=0; ix<=interp_order_xy; ix++){
-                field_value += sx_cell[ix]*sy_cell[iy]*
-                    ((1.0_rt-rel_z)*arr_this(i_cell+ix,
-                                             j_cell+iy, lo2)
-                             +rel_z*arr_prev(i_cell+ix,
-                                             j_cell+iy, lo2));
+                field_value += sx_cell[ix] * sy_cell[iy] * array(i_cell+ix, j_cell+iy, args...);
             }
         }
         return field_value;
     }
 };
 
-template<int interp_order_xy>
-struct interpolated_field {
+template<int interp_order_xy, class MfabType>
+struct interpolated_field_xy {
     // use brace initialization as constructor
-    amrex::MultiFab f_view_this; // field to interpolate on this slice
-    amrex::MultiFab f_view_prev; // field to interpolate on previous slice
-    const amrex::Geometry& geom; // geometry of field
+    MfabType mfab; // MultiFab type object of the field
+    amrex::Geometry geom; // geometry of field
+
+    // use .array(mfi) like with amrex::MultiFab
+    auto array (amrex::MFIter& mfi) const {
+        auto mfab_array = mfab.array(mfi);
+        return interpolated_field_xy_inner<interp_order_xy, decltype(mfab_array)>{
+            mfab_array, 1/geom.CellSize(0), 1/geom.CellSize(1),
+            GetPosOffset(0, geom, geom.Domain()), GetPosOffset(1, geom, geom.Domain())};
+    }
+};
+
+struct interpolated_field_z_inner {
+    // captured variables for GPU
+    amrex::Array4<amrex::Real const> arr_this;
+    amrex::Array4<amrex::Real const> arr_prev;
+    amrex::Real rel_z;
+    int lo2;
+
+    // linear longitudinal field interpolation
+    AMREX_GPU_DEVICE amrex::Real operator() (int i, int j) const noexcept {
+        using namespace amrex::literals;
+        return (1.0_rt-rel_z)*arr_this(i, j, lo2) + rel_z*arr_prev(i, j, lo2);
+    }
+};
+
+struct interpolated_field_z {
+    // use brace initialization as constructor
+    amrex::MultiFab mfab_this; // field to interpolate on this slice
+    amrex::MultiFab mfab_prev; // field to interpolate on previous slice
     amrex::Real rel_z; // mixing factor between f_view_this and f_view_prev for z interpolation
 
     // use .array(mfi) like with amrex::MultiFab
-    interpolated_field_inner<interp_order_xy> array (amrex::MFIter& mfi) const {
-        amrex::Box bx = f_view_this[mfi].box();
-        return interpolated_field_inner<interp_order_xy>{
-            f_view_this.array(mfi), f_view_prev.array(mfi),
-            1/geom.CellSize(0), 1/geom.CellSize(1),
-            GetPosOffset(0, geom, bx), GetPosOffset(1, geom, bx),
-            rel_z, bx.smallEnd(2)};
+    interpolated_field_z_inner array (amrex::MFIter& mfi) const {
+        return interpolated_field_z_inner{
+            mfab_this.array(mfi), mfab_prev.array(mfi), rel_z, mfab_this[mfi].box().smallEnd(2)};
     }
 };
 
+template<int interp_order_xy>
+using interpolated_field_xyz = interpolated_field_xy<interp_order_xy, interpolated_field_z>;
+
+struct guarded_field_inner {
+    amrex::Array4<amrex::Real const> array;
+    amrex::Box bx;
+
+    template<class...Args>
+    AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k, Args...args) const noexcept {
+        using namespace amrex::literals;
+        if (bx.contains(i,j,k)) {
+            return array(i,j,k,args...);
+        } else return 0._rt;
+    }
+};
+
+struct guarded_field {
+    amrex::MultiFab& mfab;
+
+    guarded_field_inner array (amrex::MFIter& mfi) const {
+        return guarded_field_inner{mfab.array(mfi), mfab[mfi].box()};
+    }
+
+};
+
 /** \brief Calculates dst = factor_a*src_a + factor_b*src_b. src_a and src_b can be derivatives
  *
  * \param[in] box_grow how much the domain of dst should be grown
@@ -222,7 +264,7 @@ LinCombination (const amrex::IntVect box_grow, amrex::MultiFab dst,
         const auto src_b_array = src_b.array(mfi);
         const amrex::Box bx = mfi.growntilebox(box_grow);
         amrex::ParallelFor(bx,
-            [=] AMREX_GPU_DEVICE(int i, int j, int k)
+            [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
                 dst_array(i,j,k) = factor_a * src_a_array(i,j,k) + factor_b * src_b_array(i,j,k);
             });
@@ -230,72 +272,71 @@ LinCombination (const amrex::IntVect box_grow, amrex::MultiFab dst,
 }
 
 void
-Fields::Copy (const int lev, const int i_slice, const int slice_comp, const int full_comp,
-              const amrex::Gpu::DeviceVector<int>& diag_comps_vect,
-              const int ncomp, amrex::FArrayBox& fab, const int slice_dir,
-              const amrex::IntVect diag_coarsen, const amrex::Geometry geom)
+Fields::Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom,
+              amrex::FArrayBox& diag_fab, amrex::Box diag_box, const amrex::Geometry& calc_geom,
+              const amrex::Gpu::DeviceVector<int>& diag_comps_vect, const int ncomp)
 {
-    using namespace amrex::literals;
     HIPACE_PROFILE("Fields::Copy()");
-    auto& slice_mf = m_slices[lev][WhichSlice::This]; // copy from the current slice
-    amrex::Array4<amrex::Real> slice_array; // There is only one Box.
-    for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
-        auto& slice_fab = slice_mf[mfi];
-        amrex::Box slice_box = slice_fab.box();
-        slice_box.setSmall(Direction::z, i_slice);
-        slice_box.setBig  (Direction::z, i_slice);
-        slice_array = amrex::makeArray4(slice_fab.dataPtr(), slice_box, slice_fab.nComp());
-        // slice_array's longitude index is i_slice.
+    constexpr int depos_order_xy = 1;
+    constexpr int depos_order_z = 1;
+    constexpr int depos_order_offset = depos_order_z / 2 + 1;
+
+    const amrex::Real poff_calc_z = GetPosOffset(2, calc_geom, calc_geom.Domain());
+    const amrex::Real poff_diag_x = GetPosOffset(0, diag_geom, diag_geom.Domain());
+    const amrex::Real poff_diag_y = GetPosOffset(1, diag_geom, diag_geom.Domain());
+    const amrex::Real poff_diag_z = GetPosOffset(2, diag_geom, diag_geom.Domain());
+
+    const int i_slice_min = i_slice - depos_order_offset;
+    const int i_slice_max = i_slice + depos_order_offset;
+
+    const amrex::Real pos_slice_min = i_slice_min * calc_geom.CellSize(2) + poff_calc_z;
+    const amrex::Real pos_slice_max = i_slice_max * calc_geom.CellSize(2) + poff_calc_z;
+    const int k_min = static_cast<int>(amrex::Math::round((pos_slice_min - poff_diag_z)/diag_geom.CellSize(2)));
+    const int k_max = static_cast<int>(amrex::Math::round((pos_slice_max - poff_diag_z)/diag_geom.CellSize(2)));
+
+    amrex::Gpu::DeviceVector<amrex::Real> rel_z_vec(k_max+1-k_min, 0.);
+
+    for (int k=k_min; k<=k_max; ++k ) {
+        amrex::Real pos = k * diag_geom.CellSize(2) + poff_diag_z;
+        amrex::Real mid_i_slice = (pos - poff_calc_z)/calc_geom.CellSize(2);
+        amrex::Real sz_cell[depos_order_z + 1];
+        int k_cell = compute_shape_factor<depos_order_z>(sz_cell, mid_i_slice);
+        for (int i=0; i<=depos_order_z; ++i) {
+            if (k_cell+i == i_slice) {
+                rel_z_vec[k-k_min] = sz_cell[i];
+            }
+        }
     }
 
-    const int full_array_z = i_slice / diag_coarsen[2];
-    const amrex::IntVect ncells_global = geom.Domain().length();
+    diag_box.setSmall(2, amrex::max(diag_box.smallEnd(2), k_min));
+    diag_box.setBig(2, amrex::min(diag_box.bigEnd(2), k_max));
 
-    amrex::Box const& vbx = fab.box();
-    if (vbx.smallEnd(Direction::z) <= full_array_z and
-        vbx.bigEnd  (Direction::z) >= full_array_z and
-        ( i_slice % diag_coarsen[2] == diag_coarsen[2]/2 or
-        ( i_slice == ncells_global[2] - 1 and
-        ( ncells_global[2] - 1 ) % diag_coarsen[2] < diag_coarsen[2]/2 )))
-    {
-        amrex::Box copy_box = vbx;
-        copy_box.setSmall(Direction::z, full_array_z);
-        copy_box.setBig  (Direction::z, full_array_z);
-        amrex::Array4<amrex::Real> const& full_array = fab.array();
-        const int even_slice_x = ncells_global[0] % 2 == 0 and slice_dir == 0;
-        const int even_slice_y = ncells_global[1] % 2 == 0 and slice_dir == 1;
-        const int coarse_x = diag_coarsen[0];
-        const int coarse_y = diag_coarsen[1];
-        const int ncells_x = ncells_global[0];
-        const int ncells_y = ncells_global[1];
+    auto& slice_mf = m_slices[lev][WhichSlice::This];
 
-        const int *diag_comps = diag_comps_vect.data();
+    auto slice_func = interpolated_field_xy<depos_order_xy, guarded_field>{slice_mf, calc_geom};
 
-        amrex::ParallelFor(copy_box, ncomp,
-        [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
-        {
-            const int m = n[diag_comps];
-
-            // coarsening in slice direction is always 1
-            const int i_c_start = amrex::min(i*coarse_x +(coarse_x-1)/2 -even_slice_x, ncells_x-1);
-            const int i_c_stop  = amrex::min(i*coarse_x +coarse_x/2+1, ncells_x);
-            const int j_c_start = amrex::min(j*coarse_y +(coarse_y-1)/2 -even_slice_y, ncells_y-1);
-            const int j_c_stop  = amrex::min(j*coarse_y +coarse_y/2+1, ncells_y);
-            amrex::Real field_value = 0._rt;
-            int n_values = 0;
-
-            for (int j_c = j_c_start; j_c != j_c_stop; ++j_c) {
-                for (int i_c = i_c_start; i_c != i_c_stop; ++i_c) {
-                    field_value += slice_array(i_c, j_c, i_slice, m+slice_comp);
-                    ++n_values;
-                }
-            }
+    for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
+        auto slice_array = slice_func.array(mfi);
+        amrex::Array4<amrex::Real> diag_array = diag_fab.array();
 
-            full_array(i,j,k,n+full_comp) = field_value / amrex::max(n_values,1);
-        });
+        const int *diag_comps = diag_comps_vect.data();
+        const amrex::Real *rel_z_data = rel_z_vec.data();
+        const int lo2 = slice_mf[mfi].box().smallEnd(2);
+        const amrex::Real dx = diag_geom.CellSize(0);
+        const amrex::Real dy = diag_geom.CellSize(1);
+
+        amrex::ParallelFor(diag_box, ncomp,
+            [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) noexcept
+            {
+                const amrex::Real x = i * dx + poff_diag_x;
+                const amrex::Real y = j * dy + poff_diag_y;
+                const int m = n[diag_comps];
+                diag_array(i,j,k,n) += rel_z_data[k-k_min] * slice_array(x,y,lo2,m);
+            });
     }
 }
 
+
 void
 Fields::ShiftSlices (int nlev, int islice, amrex::Geometry geom, amrex::Real patch_lo,
                      amrex::Real patch_hi)
@@ -431,10 +472,10 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
     const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
     const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
-    auto solution_interp = interpolated_field<interp_order>{
+    auto solution_interp = interpolated_field_xyz<interp_order>{
         getField(lev-1, WhichSlice::This, component),
         getField(lev-1, WhichSlice::Previous1, component),
-        geom[lev-1], rel_z};
+        rel_z, geom[lev-1]};
     amrex::MultiFab staging_area = getStagingArea(lev);
 
     for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
@@ -463,10 +504,10 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
     const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
     const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
 
-    auto field_coarse_interp = interpolated_field<interp_order>{
+    auto field_coarse_interp = interpolated_field_xyz<interp_order>{
         getField(lev-1, WhichSlice::This, component),
         getField(lev-1, WhichSlice::Previous1, component),
-        geom[lev-1], rel_z};
+        rel_z, geom[lev-1]};
     amrex::MultiFab field_fine = getField(lev, WhichSlice::This, component);
 
     for (amrex::MFIter mfi( field_fine, false); mfi.isValid(); ++mfi)

From 762b2375d2f54323a75516c3569c1df68b6b3945 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Fri, 21 Jan 2022 18:51:01 +0100
Subject: [PATCH 24/52] fix MR lev1 output size

---
 src/Hipace.cpp                    | 21 +++++---
 src/diagnostics/Diagnostic.H      | 23 ++++++---
 src/diagnostics/Diagnostic.cpp    | 81 ++++++++++++++++++-------------
 src/diagnostics/OpenPMDWriter.H   |  3 +-
 src/diagnostics/OpenPMDWriter.cpp | 10 ++--
 5 files changed, 81 insertions(+), 57 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index bc5e1afd4d..58cc5b1018 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -304,8 +304,7 @@ Hipace::MakeNewLevelFromScratch (
     }
     SetDistributionMap(lev, dm); // Let AmrCore know
     DefineSliceGDB(lev, ba, dm);
-    // Note: we pass ba[0] as a dummy box, it will be resized properly in the loop over boxes in Evolve
-    m_diags.AllocData(lev, ba[0], Geom(lev));
+    m_diags.AllocData(lev);
     m_fields.AllocData(lev, Geom(), m_slice_ba[lev], m_slice_dm[lev],
                        m_multi_plasma.m_sort_bin_size);
 }
@@ -1304,17 +1303,24 @@ void
 Hipace::ResizeFDiagFAB (const int it)
 {
     for (int lev = 0; lev <= finestLevel(); ++lev) {
-        amrex::Box bx = boxArray(lev)[it];
+        amrex::Box local_box = boxArray(lev)[it];
+        amrex::Box domain = boxArray(lev).minimalBox();
 
         if (lev == 1) {
             const amrex::Box& bx_lev0 = boxArray(0)[it];
             const int ref_ratio_z = GetRefRatio(lev)[Direction::z];
+
+            domain.setBig(Direction::z, domain.bigEnd(Direction::z) - ref_ratio_z); // ???
+
             // Ensuring the IO boxes on level 1 are aligned with the boxes on level 0
-            bx.setSmall(Direction::z, ref_ratio_z*bx_lev0.smallEnd(Direction::z));
-            bx.setBig  (Direction::z, ref_ratio_z*bx_lev0.bigEnd(Direction::z)+(ref_ratio_z-1));
+            local_box.setSmall(Direction::z, amrex::max(domain.smallEnd(Direction::z),
+                               ref_ratio_z*bx_lev0.smallEnd(Direction::z)));
+            local_box.setBig  (Direction::z, amrex::min(domain.bigEnd(Direction::z),
+                               ref_ratio_z*bx_lev0.bigEnd(Direction::z)+(ref_ratio_z-1)));
         }
 
-        m_diags.ResizeFDiagFAB(bx, lev);
+        std::cout << "lev: " << lev << "\nGeom: " << Geom(lev) << "\nlocal_box: " <<  local_box.smallEnd() << " " << local_box.bigEnd() << "\ndomain: " <<  domain.smallEnd() << " " << domain.bigEnd();
+        m_diags.ResizeFDiagFAB(local_box, domain, lev, Geom(lev));
     }
 }
 
@@ -1331,6 +1337,7 @@ Hipace::GetRefRatio (int lev)
 void
 Hipace::FillDiagnostics (const int lev, int i_slice)
 {
+    if (!m_diags.isActive()[lev]) return;
     m_fields.Copy(lev, i_slice, m_diags.getGeom()[lev], m_diags.getF(lev), m_diags.getF(lev).box(),
                   Geom(lev), m_diags.getCompsIdx(), m_diags.getNFields());
 }
@@ -1349,7 +1356,7 @@ Hipace::WriteDiagnostics (int output_step, const int it, const OpenPMDWriterCall
     const amrex::Vector< std::string > beamnames = getDiagBeamNames();
 
 #ifdef HIPACE_USE_OPENPMD
-    m_openpmd_writer.WriteDiagnostics(getDiagF(), m_multi_beam, getDiagGeom(),
+    m_openpmd_writer.WriteDiagnostics(getDiagF(), m_multi_beam, getDiagGeom(), m_diags.isActive(),
                         m_physical_time, output_step, finestLevel()+1, getDiagSliceDir(), varnames, beamnames,
                         it, m_box_sorters, geom, call_type);
 #else
diff --git a/src/diagnostics/Diagnostic.H b/src/diagnostics/Diagnostic.H
index ed32856e01..372a487ae1 100644
--- a/src/diagnostics/Diagnostic.H
+++ b/src/diagnostics/Diagnostic.H
@@ -19,10 +19,8 @@ public:
     /** \brief allocate arrays of this MF
      *
      * \param[in] lev MR level
-     * \param[in] bx Box for initialization
-     * \param[in] geom geometry of the full simulation domain
      */
-    void AllocData (int lev, const amrex::Box& bx, amrex::Geometry const& geom);
+    void AllocData (int lev);
 
     /** \brief return the main diagnostics multifab */
     amrex::Vector<amrex::FArrayBox>& getF () { return m_F; }
@@ -51,21 +49,28 @@ public:
     /** return slice direction of the diagnostics */
     int sliceDir () {return m_slice_dir;}
 
+    /**< return if there is field output to write */
+    std::vector<bool>& isActive () {return m_is_active;}
+
     /** return coarsening ratio of diagnostics */
     amrex::IntVect getCoarsening (int lev) { return m_diag_coarsen[lev]; }
 
-    /** \brief return box which possibly was trimmed in case of slice IO
+    /** \brief calculate box which possibly was trimmed in case of slice IO
      *
-     * \param[in] box_3d box to be possibly trimmed to a slice box
+     * \param[in,out] box_3d local box to be possibly trimmed to a slice box
+     * \param[in,out] domain_3d domain box to be possibly trimmed to a slice box
+     * \param[in,out] rbox_3d real box to be possibly trimmed to a slice box
      */
-    amrex::Box TrimIOBox (const amrex::Box box_3d);
+    void TrimIOBox (amrex::Box& box_3d, amrex::Box& domain_3d, amrex::RealBox& rbox_3d);
 
     /** \brief resizes the FArrayBox of the diagnostics to the currently calculated box
      *
-     * \param[in] box box to which the FArrayBox of the diagnostics will be resized to
+     * \param[in] local_box box to which the FArrayBox of the diagnostics will be resized to
+     * \param[in] domain box to which the Geometry of the diagnostics will be resized to
      * \param[in] lev MR level
+     * \param[in] geom geometry of the full simulation domain
      */
-    void ResizeFDiagFAB (const amrex::Box box, const int lev);
+    void ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev, amrex::Geometry const& geom);
 
 private:
 
@@ -80,6 +85,8 @@ private:
     amrex::Vector<std::string> m_output_beam_names; /**< Component names to Write to output file */
     int m_nfields; /**< Number of physical fields to write */
     amrex::Vector<amrex::Geometry> m_geom_io; /**< Diagnostics geometry */
+    bool m_include_ghost_cells = false; /**< if ghost cells are included in output */
+    std::vector<bool> m_is_active; /**< if there is field output to write */
 };
 
 #endif // DIAGNOSTIC_H_
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index 16778b14e6..fd2e5fb601 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -5,7 +5,8 @@
 Diagnostic::Diagnostic (int nlev)
     : m_F(nlev),
       m_diag_coarsen(nlev),
-      m_geom_io(nlev)
+      m_geom_io(nlev),
+      m_is_active(nlev)
 {
     amrex::ParmParse ppd("diagnostic");
     std::string str_type;
@@ -23,6 +24,8 @@ Diagnostic::Diagnostic (int nlev)
         amrex::Abort("Unknown diagnostics type: must be xyz, xz or yz.");
     }
 
+    queryWithParser(ppd, "include_ghost_cells", m_include_ghost_cells);
+
     for(int ilev = 0; ilev<nlev; ++ilev) {
         amrex::Array<int,3> diag_coarsen_arr{1,1,1};
         // set all levels the same for now
@@ -93,50 +96,58 @@ Diagnostic::Diagnostic (int nlev)
 }
 
 void
-Diagnostic::AllocData (int lev, const amrex::Box& bx, amrex::Geometry const& geom)
+Diagnostic::AllocData (int lev)
+{
+    // only usable after ResizeFDiagFAB
+    amrex::Box dummy_bx = {{0,0,0}, {0,0,0}};
+    m_F.push_back(amrex::FArrayBox(dummy_bx, m_nfields, amrex::The_Pinned_Arena()));
+}
+
+void
+Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev, amrex::Geometry const& geom)
 {
+    if (m_include_ghost_cells) {
+        local_box.grow(Fields::m_slices_nguards);
+        domain.grow(Fields::m_slices_nguards);
+    }
+
+    amrex::RealBox diag_domain = geom.ProbDomain();
+    for(int dir=0; dir<=2; ++dir) {
+        // make diag_domain correspond to box
+        diag_domain.setLo(dir, geom.ProbLo(dir)
+            + (domain.smallEnd(dir) - geom.Domain().smallEnd(dir)) * geom.CellSize(dir));
+        diag_domain.setHi(dir, geom.ProbHi(dir)
+            + (domain.bigEnd(dir) - geom.Domain().bigEnd(dir)) * geom.CellSize(dir));
+    }
     // trim the 3D box to slice box for slice IO
-    amrex::Box F_bx = TrimIOBox(bx);
+    TrimIOBox(local_box, domain, diag_domain);
 
-    F_bx.coarsen(m_diag_coarsen[lev]);
+    local_box.coarsen(m_diag_coarsen[lev]);
+    domain.coarsen(m_diag_coarsen[lev]);
 
-    m_F.push_back(amrex::FArrayBox(F_bx, m_nfields, amrex::The_Pinned_Arena()));
+    m_geom_io[lev] = amrex::Geometry(domain, &diag_domain, geom.Coord());
 
-    m_geom_io[lev] = geom;
-    amrex::RealBox prob_domain = geom.ProbDomain();
-    amrex::Box domain = geom.Domain();
-    // Define slice box
-    if (m_slice_dir >= 0){
-        int const icenter = domain.length(m_slice_dir)/2;
-        domain.setSmall(m_slice_dir, icenter);
-        domain.setBig(m_slice_dir, icenter);
-        m_geom_io[lev] = amrex::Geometry(domain, &prob_domain, geom.Coord());
+    std::cout << " Diag Domain: " << m_geom_io[lev].Domain().smallEnd() << " " << m_geom_io[lev].Domain().bigEnd() << std::endl;
+
+    m_is_active[lev] = local_box.ok();
+
+    if(m_is_active[lev]) {
+        m_F[lev].resize(local_box, m_nfields);
+        m_F[lev].setVal<amrex::RunOn::Device>(0);
     }
-    m_geom_io[lev].coarsen(m_diag_coarsen[lev]);
 }
 
 void
-Diagnostic::ResizeFDiagFAB (const amrex::Box box, const int lev)
+Diagnostic::TrimIOBox (amrex::Box& box_3d, amrex::Box& domain_3d, amrex::RealBox& rbox_3d)
 {
-    amrex::Box io_box = TrimIOBox(box);
-    io_box.coarsen(m_diag_coarsen[lev]);
-    m_F[lev].resize(io_box, m_nfields);
-    m_F[lev].setVal<amrex::RunOn::Device>(0);
-}
-
-amrex::Box
-Diagnostic::TrimIOBox (const amrex::Box box_3d)
-{
-    // Create a xz slice Box
-    amrex::Box slice_bx = box_3d;
     if (m_slice_dir >= 0){
-            // Flatten the box down to 1 cell in the approprate direction.
-            const int idx = box_3d.smallEnd(m_slice_dir) + box_3d.length(m_slice_dir)/2;
-            slice_bx.setSmall(m_slice_dir, idx);
-            slice_bx.setBig  (m_slice_dir, idx);
+        // Flatten the box down to 1 cell in the approprate direction.
+        box_3d.setSmall(m_slice_dir, 0);
+        box_3d.setBig  (m_slice_dir, 0);
+        domain_3d.setSmall(m_slice_dir, 0);
+        domain_3d.setBig  (m_slice_dir, 0);
+        const amrex::Real mid = (rbox_3d.lo(m_slice_dir) + rbox_3d.hi(m_slice_dir))/2;
+        rbox_3d.setLo(m_slice_dir, mid);
+        rbox_3d.setHi(m_slice_dir, mid);
     }
-    // m_F is defined on F_bx, the full or the slice Box
-    amrex::Box F_bx = m_slice_dir >= 0 ? slice_bx : box_3d;
-
-    return F_bx;
 }
diff --git a/src/diagnostics/OpenPMDWriter.H b/src/diagnostics/OpenPMDWriter.H
index 3d3b5a3ae5..f14e460971 100644
--- a/src/diagnostics/OpenPMDWriter.H
+++ b/src/diagnostics/OpenPMDWriter.H
@@ -128,6 +128,7 @@ public:
      * \param[in] a_mf Vector (levels) of FArrayBoxes
      * \param[in] a_multi_beams multi beam container which is written to openPMD file
      * \param[in] geom Geometry of the simulation, to get the cell size etc.
+     * \param[in] write_fields if there are fields to write
      * \param[in] physical_time Physical time of the currenerationt it.
      * \param[in] output_step current iteration to be written to file
      * \param[in] nlev number of MR levels
@@ -141,7 +142,7 @@ public:
      */
     void WriteDiagnostics(
         amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
-        amrex::Vector<amrex::Geometry> const& geom,
+        amrex::Vector<amrex::Geometry> const& geom, std::vector<bool> const& write_fields,
         const amrex::Real physical_time, const int output_step, const int nlev,
         const int slice_dir, const amrex::Vector< std::string > varnames,
         const amrex::Vector< std::string > beamnames, const int it,
diff --git a/src/diagnostics/OpenPMDWriter.cpp b/src/diagnostics/OpenPMDWriter.cpp
index 3f3ff8cd38..0ee85a16bc 100644
--- a/src/diagnostics/OpenPMDWriter.cpp
+++ b/src/diagnostics/OpenPMDWriter.cpp
@@ -66,7 +66,7 @@ OpenPMDWriter::InitDiagnostics (const int output_step, const int output_period,
 void
 OpenPMDWriter::WriteDiagnostics (
     amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
-    amrex::Vector<amrex::Geometry> const& geom,
+    amrex::Vector<amrex::Geometry> const& geom, std::vector<bool> const& write_fields,
     const amrex::Real physical_time, const int output_step, const int nlev,
     const int slice_dir, const amrex::Vector< std::string > varnames,
     const amrex::Vector< std::string > beamnames, const int it,
@@ -83,10 +83,9 @@ OpenPMDWriter::WriteDiagnostics (
             }
             m_outputSeries[lev]->flush();
 
-        } else if (call_type == OpenPMDWriterCallType::fields ) {
+        } else if (call_type == OpenPMDWriterCallType::fields && write_fields[lev]) {
             WriteFieldData(a_mf[lev], geom, slice_dir, varnames, iteration, output_step, lev);
             m_outputSeries[lev]->flush();
-            a_mf[lev].setVal<amrex::RunOn::Device>(0);
             m_last_output_dumped[lev] = output_step;
         }
     }
@@ -120,11 +119,10 @@ OpenPMDWriter::WriteFieldData (
         //   labels, spacing and offsets
         std::vector< std::string > axisLabels {"z", "y", "x"};
         auto dCells = utils::getReversedVec(geom[lev].CellSize()); // dx, dy, dz
-        amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> correct_problo = GetDomainLev(geom[lev], data_box, 1, lev);
         amrex::Vector<double> finalproblo = {AMREX_D_DECL(
                      static_cast<double>(geom[lev].ProbLo()[2]),
-                     static_cast<double>(correct_problo[1]),
-                     static_cast<double>(correct_problo[0])
+                     static_cast<double>(geom[lev].ProbLo()[1]),
+                     static_cast<double>(geom[lev].ProbLo()[0])
                       )};
         auto offWindow = finalproblo;
         if (slice_dir >= 0) {

From 6f82a70135e8dd354f7a53cd1ddda18b0255cf6f Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 26 Jan 2022 07:21:10 +0100
Subject: [PATCH 25/52] reduce time used for Copy

---
 src/fields/Fields.H   |  2 ++
 src/fields/Fields.cpp | 35 +++++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index aa626d4348..df3636f49a 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -301,6 +301,8 @@ private:
     bool m_do_dirichlet_poisson = true;
     /** Temporary density arrays. one per OpenMP thread, used when tiling is on. */
     amrex::Vector<amrex::FArrayBox> m_tmp_densities;
+    /** Stores temporary values for z interpolation in Fields::Copy */
+    amrex::Gpu::DeviceVector<amrex::Real> m_rel_z_vec;
 };
 
 #endif
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 217565d100..67c2cae0c7 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -294,33 +294,56 @@ Fields::Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom
     const int k_min = static_cast<int>(amrex::Math::round((pos_slice_min - poff_diag_z)/diag_geom.CellSize(2)));
     const int k_max = static_cast<int>(amrex::Math::round((pos_slice_max - poff_diag_z)/diag_geom.CellSize(2)));
 
-    amrex::Gpu::DeviceVector<amrex::Real> rel_z_vec(k_max+1-k_min, 0.);
+    m_rel_z_vec.resize(k_max+1-k_min);
 
-    for (int k=k_min; k<=k_max; ++k ) {
+    for (int k=k_min; k<=k_max; ++k) {
         amrex::Real pos = k * diag_geom.CellSize(2) + poff_diag_z;
         amrex::Real mid_i_slice = (pos - poff_calc_z)/calc_geom.CellSize(2);
         amrex::Real sz_cell[depos_order_z + 1];
         int k_cell = compute_shape_factor<depos_order_z>(sz_cell, mid_i_slice);
+        m_rel_z_vec[k-k_min] = 0;
         for (int i=0; i<=depos_order_z; ++i) {
             if (k_cell+i == i_slice) {
-                rel_z_vec[k-k_min] = sz_cell[i];
+                m_rel_z_vec[k-k_min] = sz_cell[i];
             }
         }
     }
 
-    diag_box.setSmall(2, amrex::max(diag_box.smallEnd(2), k_min));
-    diag_box.setBig(2, amrex::min(diag_box.bigEnd(2), k_max));
+    int k_start = k_min;
+    int k_stop = k_max;
+
+    for (int k=k_min; k<=k_max; ++k) {
+        if (m_rel_z_vec[k-k_min] == 0) ++k_start;
+        else break;
+    }
+
+    for (int k=k_max; k>=k_min; --k) {
+        if (m_rel_z_vec[k-k_min] == 0) --k_stop;
+        else break;
+    }
+
+    diag_box.setSmall(2, amrex::max(diag_box.smallEnd(2), k_start));
+    diag_box.setBig(2, amrex::min(diag_box.bigEnd(2), k_stop));
+
+    if (diag_box.isEmpty()) return;
 
     auto& slice_mf = m_slices[lev][WhichSlice::This];
 
     auto slice_func = interpolated_field_xy<depos_order_xy, guarded_field>{slice_mf, calc_geom};
 
+    std::cout << "Copy rel_z:";
+    for(int k=k_start; k<=k_stop; ++k) {
+        std::cout << " (" << k << ", " << m_rel_z_vec[k-k_min] << ")";
+    }
+    std::cout << std::endl;
+
+
     for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
         auto slice_array = slice_func.array(mfi);
         amrex::Array4<amrex::Real> diag_array = diag_fab.array();
 
         const int *diag_comps = diag_comps_vect.data();
-        const amrex::Real *rel_z_data = rel_z_vec.data();
+        const amrex::Real *rel_z_data = m_rel_z_vec.data();
         const int lo2 = slice_mf[mfi].box().smallEnd(2);
         const amrex::Real dx = diag_geom.CellSize(0);
         const amrex::Real dy = diag_geom.CellSize(1);

From c3f7be6a11e29359cf0d7fefa81b8f45355daade Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 26 Jan 2022 22:28:31 +0100
Subject: [PATCH 26/52] clean up

---
 src/Hipace.cpp                    | 20 +++++++++++-------
 src/diagnostics/Diagnostic.H      |  7 ++++---
 src/diagnostics/Diagnostic.cpp    | 15 +++++++------
 src/diagnostics/OpenPMDWriter.H   |  2 +-
 src/diagnostics/OpenPMDWriter.cpp |  5 +++--
 src/fields/Fields.cpp             | 35 ++++++++++++++-----------------
 6 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 58cc5b1018..cafe1d5e1c 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -1307,10 +1307,13 @@ Hipace::ResizeFDiagFAB (const int it)
         amrex::Box domain = boxArray(lev).minimalBox();
 
         if (lev == 1) {
+            // boxArray(1) is not correct in z direction. We need to manually enforece a
+            // parent/child relationship between lev_0 and lev_1 boxes in z
             const amrex::Box& bx_lev0 = boxArray(0)[it];
             const int ref_ratio_z = GetRefRatio(lev)[Direction::z];
 
-            domain.setBig(Direction::z, domain.bigEnd(Direction::z) - ref_ratio_z); // ???
+            // This seems to be required for some reason
+            domain.setBig(Direction::z, domain.bigEnd(Direction::z) - ref_ratio_z);
 
             // Ensuring the IO boxes on level 1 are aligned with the boxes on level 0
             local_box.setSmall(Direction::z, amrex::max(domain.smallEnd(Direction::z),
@@ -1319,7 +1322,6 @@ Hipace::ResizeFDiagFAB (const int it)
                                ref_ratio_z*bx_lev0.bigEnd(Direction::z)+(ref_ratio_z-1)));
         }
 
-        std::cout << "lev: " << lev << "\nGeom: " << Geom(lev) << "\nlocal_box: " <<  local_box.smallEnd() << " " << local_box.bigEnd() << "\ndomain: " <<  domain.smallEnd() << " " << domain.bigEnd();
         m_diags.ResizeFDiagFAB(local_box, domain, lev, Geom(lev));
     }
 }
@@ -1337,9 +1339,11 @@ Hipace::GetRefRatio (int lev)
 void
 Hipace::FillDiagnostics (const int lev, int i_slice)
 {
-    if (!m_diags.isActive()[lev]) return;
-    m_fields.Copy(lev, i_slice, m_diags.getGeom()[lev], m_diags.getF(lev), m_diags.getF(lev).box(),
-                  Geom(lev), m_diags.getCompsIdx(), m_diags.getNFields());
+    if (m_diags.hasField()[lev]) {
+        m_fields.Copy(lev, i_slice, m_diags.getGeom()[lev], m_diags.getF(lev),
+                      m_diags.getF(lev).box(), Geom(lev),
+                      m_diags.getCompsIdx(), m_diags.getNFields());
+    }
 }
 
 void
@@ -1356,9 +1360,9 @@ Hipace::WriteDiagnostics (int output_step, const int it, const OpenPMDWriterCall
     const amrex::Vector< std::string > beamnames = getDiagBeamNames();
 
 #ifdef HIPACE_USE_OPENPMD
-    m_openpmd_writer.WriteDiagnostics(getDiagF(), m_multi_beam, getDiagGeom(), m_diags.isActive(),
-                        m_physical_time, output_step, finestLevel()+1, getDiagSliceDir(), varnames, beamnames,
-                        it, m_box_sorters, geom, call_type);
+    m_openpmd_writer.WriteDiagnostics(getDiagF(), m_multi_beam, getDiagGeom(), m_diags.hasField(),
+                        m_physical_time, output_step, finestLevel()+1, getDiagSliceDir(), varnames,
+                        beamnames, it, m_box_sorters, geom, call_type);
 #else
     amrex::ignore_unused(it, call_type);
     amrex::Print()<<"WARNING: HiPACE++ compiled without openPMD support, the simulation has no I/O.\n";
diff --git a/src/diagnostics/Diagnostic.H b/src/diagnostics/Diagnostic.H
index 372a487ae1..5fa6f14de6 100644
--- a/src/diagnostics/Diagnostic.H
+++ b/src/diagnostics/Diagnostic.H
@@ -50,7 +50,7 @@ public:
     int sliceDir () {return m_slice_dir;}
 
     /**< return if there is field output to write */
-    std::vector<bool>& isActive () {return m_is_active;}
+    std::vector<bool>& hasField () {return m_has_field;}
 
     /** return coarsening ratio of diagnostics */
     amrex::IntVect getCoarsening (int lev) { return m_diag_coarsen[lev]; }
@@ -70,7 +70,8 @@ public:
      * \param[in] lev MR level
      * \param[in] geom geometry of the full simulation domain
      */
-    void ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev, amrex::Geometry const& geom);
+    void ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev,
+                         amrex::Geometry const& geom);
 
 private:
 
@@ -86,7 +87,7 @@ private:
     int m_nfields; /**< Number of physical fields to write */
     amrex::Vector<amrex::Geometry> m_geom_io; /**< Diagnostics geometry */
     bool m_include_ghost_cells = false; /**< if ghost cells are included in output */
-    std::vector<bool> m_is_active; /**< if there is field output to write */
+    std::vector<bool> m_has_field; /**< if there is field output to write */
 };
 
 #endif // DIAGNOSTIC_H_
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index fd2e5fb601..8adbf47938 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -6,7 +6,7 @@ Diagnostic::Diagnostic (int nlev)
     : m_F(nlev),
       m_diag_coarsen(nlev),
       m_geom_io(nlev),
-      m_is_active(nlev)
+      m_has_field(nlev)
 {
     amrex::ParmParse ppd("diagnostic");
     std::string str_type;
@@ -99,12 +99,13 @@ void
 Diagnostic::AllocData (int lev)
 {
     // only usable after ResizeFDiagFAB
-    amrex::Box dummy_bx = {{0,0,0}, {0,0,0}};
+    const amrex::Box dummy_bx = {{0,0,0}, {0,0,0}};
     m_F.push_back(amrex::FArrayBox(dummy_bx, m_nfields, amrex::The_Pinned_Arena()));
 }
 
 void
-Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev, amrex::Geometry const& geom)
+Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev,
+                            amrex::Geometry const& geom)
 {
     if (m_include_ghost_cells) {
         local_box.grow(Fields::m_slices_nguards);
@@ -127,13 +128,11 @@ Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int l
 
     m_geom_io[lev] = amrex::Geometry(domain, &diag_domain, geom.Coord());
 
-    std::cout << " Diag Domain: " << m_geom_io[lev].Domain().smallEnd() << " " << m_geom_io[lev].Domain().bigEnd() << std::endl;
+    m_has_field[lev] = local_box.ok();
 
-    m_is_active[lev] = local_box.ok();
-
-    if(m_is_active[lev]) {
+    if(m_has_field[lev]) {
         m_F[lev].resize(local_box, m_nfields);
-        m_F[lev].setVal<amrex::RunOn::Device>(0);
+        m_F[lev].setVal<amrex::RunOn::Host>(0);
     }
 }
 
diff --git a/src/diagnostics/OpenPMDWriter.H b/src/diagnostics/OpenPMDWriter.H
index f14e460971..799d72d734 100644
--- a/src/diagnostics/OpenPMDWriter.H
+++ b/src/diagnostics/OpenPMDWriter.H
@@ -141,7 +141,7 @@ public:
      * \param[in] call_type whether the beams or the fields should be written to file
      */
     void WriteDiagnostics(
-        amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
+        amrex::Vector<amrex::FArrayBox> const& a_mf, MultiBeam& a_multi_beam,
         amrex::Vector<amrex::Geometry> const& geom, std::vector<bool> const& write_fields,
         const amrex::Real physical_time, const int output_step, const int nlev,
         const int slice_dir, const amrex::Vector< std::string > varnames,
diff --git a/src/diagnostics/OpenPMDWriter.cpp b/src/diagnostics/OpenPMDWriter.cpp
index 0ee85a16bc..1640eb8706 100644
--- a/src/diagnostics/OpenPMDWriter.cpp
+++ b/src/diagnostics/OpenPMDWriter.cpp
@@ -65,7 +65,7 @@ OpenPMDWriter::InitDiagnostics (const int output_step, const int output_period,
 
 void
 OpenPMDWriter::WriteDiagnostics (
-    amrex::Vector<amrex::FArrayBox>& a_mf, MultiBeam& a_multi_beam,
+    amrex::Vector<amrex::FArrayBox> const& a_mf, MultiBeam& a_multi_beam,
     amrex::Vector<amrex::Geometry> const& geom, std::vector<bool> const& write_fields,
     const amrex::Real physical_time, const int output_step, const int nlev,
     const int slice_dir, const amrex::Vector< std::string > varnames,
@@ -79,7 +79,8 @@ OpenPMDWriter::WriteDiagnostics (
         if (call_type == OpenPMDWriterCallType::beams ) {
             iteration.setTime(physical_time);
             if (lev == 0) {
-                WriteBeamParticleData(a_multi_beam, iteration, output_step, it, a_box_sorter_vec, geom3D[lev], beamnames, lev);
+                WriteBeamParticleData(a_multi_beam, iteration, output_step, it, a_box_sorter_vec,
+                                      geom3D[lev], beamnames, lev);
             }
             m_outputSeries[lev]->flush();
 
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 7006140ee6..48a2b372f7 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -217,6 +217,7 @@ template<int interp_order_xy>
 using interpolated_field_xyz = interpolated_field_xy<interp_order_xy, interpolated_field_z>;
 
 struct guarded_field_inner {
+    // captured variables for GPU
     amrex::Array4<amrex::Real const> array;
     amrex::Box bx;
 
@@ -230,8 +231,10 @@ struct guarded_field_inner {
 };
 
 struct guarded_field {
-    amrex::MultiFab& mfab;
+    // use brace initialization as constructor
+    amrex::MultiFab& mfab; // field to be guarded (zero extended)
 
+    // use .array(mfi) like with amrex::MultiFab
     guarded_field_inner array (amrex::MFIter& mfi) const {
         return guarded_field_inner{mfab.array(mfi), mfab[mfi].box()};
     }
@@ -285,21 +288,24 @@ Fields::Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom
     const amrex::Real poff_diag_y = GetPosOffset(1, diag_geom, diag_geom.Domain());
     const amrex::Real poff_diag_z = GetPosOffset(2, diag_geom, diag_geom.Domain());
 
+    // Interpolation in z Direction, done as if looped over diag_fab not i_slice
+    // Calculate to which diag_fab slices this slice could contribute
     const int i_slice_min = i_slice - depos_order_offset;
     const int i_slice_max = i_slice + depos_order_offset;
-
     const amrex::Real pos_slice_min = i_slice_min * calc_geom.CellSize(2) + poff_calc_z;
     const amrex::Real pos_slice_max = i_slice_max * calc_geom.CellSize(2) + poff_calc_z;
-    const int k_min = static_cast<int>(amrex::Math::round((pos_slice_min - poff_diag_z)/diag_geom.CellSize(2)));
-    const int k_max = static_cast<int>(amrex::Math::round((pos_slice_max - poff_diag_z)/diag_geom.CellSize(2)));
+    const int k_min = static_cast<int>(amrex::Math::round((pos_slice_min - poff_diag_z)
+                                                          / diag_geom.CellSize(2)));
+    const int k_max = static_cast<int>(amrex::Math::round((pos_slice_max - poff_diag_z)
+                                                          / diag_geom.CellSize(2)));
 
+    // Put contributions from i_slice to different diag_fab slices in GPU vector
     m_rel_z_vec.resize(k_max+1-k_min);
-
     for (int k=k_min; k<=k_max; ++k) {
-        amrex::Real pos = k * diag_geom.CellSize(2) + poff_diag_z;
-        amrex::Real mid_i_slice = (pos - poff_calc_z)/calc_geom.CellSize(2);
+        const amrex::Real pos = k * diag_geom.CellSize(2) + poff_diag_z;
+        const amrex::Real mid_i_slice = (pos - poff_calc_z)/calc_geom.CellSize(2);
         amrex::Real sz_cell[depos_order_z + 1];
-        int k_cell = compute_shape_factor<depos_order_z>(sz_cell, mid_i_slice);
+        const int k_cell = compute_shape_factor<depos_order_z>(sz_cell, mid_i_slice);
         m_rel_z_vec[k-k_min] = 0;
         for (int i=0; i<=depos_order_z; ++i) {
             if (k_cell+i == i_slice) {
@@ -308,34 +314,25 @@ Fields::Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom
         }
     }
 
+    // Optimization: don’t loop over diag_fab slices with 0 contribution
     int k_start = k_min;
     int k_stop = k_max;
-
     for (int k=k_min; k<=k_max; ++k) {
         if (m_rel_z_vec[k-k_min] == 0) ++k_start;
         else break;
     }
-
     for (int k=k_max; k>=k_min; --k) {
         if (m_rel_z_vec[k-k_min] == 0) --k_stop;
         else break;
     }
-
     diag_box.setSmall(2, amrex::max(diag_box.smallEnd(2), k_start));
     diag_box.setBig(2, amrex::min(diag_box.bigEnd(2), k_stop));
-
     if (diag_box.isEmpty()) return;
 
     auto& slice_mf = m_slices[lev][WhichSlice::This];
-
     auto slice_func = interpolated_field_xy<depos_order_xy, guarded_field>{slice_mf, calc_geom};
 
-    std::cout << "Copy rel_z:";
-    for(int k=k_start; k<=k_stop; ++k) {
-        std::cout << " (" << k << ", " << m_rel_z_vec[k-k_min] << ")";
-    }
-    std::cout << std::endl;
-
+    // Finally actual kernel: Interpolation in x, y, z of zero-extended fields
     for (amrex::MFIter mfi(slice_mf); mfi.isValid(); ++mfi) {
         auto slice_array = slice_func.array(mfi);
         amrex::Array4<amrex::Real> diag_array = diag_fab.array();

From a77a335e20876ab7c59df2464157d25d4f87f874 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 26 Jan 2022 23:03:55 +0100
Subject: [PATCH 27/52] add doc

---
 docs/source/run/parameters.rst | 7 +++++--
 src/fields/Fields.cpp          | 1 -
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
index 4f31bfb455..c66f237ccc 100644
--- a/docs/source/run/parameters.rst
+++ b/docs/source/run/parameters.rst
@@ -357,8 +357,11 @@ Diagnostic parameters
     between the two inner grid points.
 
 * ``diagnostic.coarsening`` (3 `int`) optional (default `1 1 1`)
-    Coarsening ratio of field output in x, y and z direction respectively. For x and y, the
-    value is subsampled at the cell center. For z, the slice nearest to the cell center is taken.
+    Coarsening ratio of field output in x, y and z direction respectively. The coarsened output is
+    obtained through first order interpolation.
+
+* ``diagnostic.include_ghost_cells`` (`bool`) optional (default `0`)
+    Whether the field diagnostics should include ghost cells.
 
 * ``diagnostic.field_data`` (`string`) optional (default `all`)
     Names of the fields written to file, separated by a space. The field names need to be `all`,
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 48a2b372f7..38b3062325 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -354,7 +354,6 @@ Fields::Copy (const int lev, const int i_slice, const amrex::Geometry& diag_geom
     }
 }
 
-
 void
 Fields::ShiftSlices (int nlev, int islice, amrex::Geometry geom, amrex::Real patch_lo,
                      amrex::Real patch_hi)

From 1368e591c343d0a1f3c907423fff03e198fe567c Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Fri, 28 Jan 2022 05:59:27 +0100
Subject: [PATCH 28/52] add open boundary

---
 src/Hipace.cpp            |  39 +++++++------
 src/fields/Fields.H       |   9 +++
 src/fields/Fields.cpp     | 117 +++++++++++++++++++++++++++++---------
 src/fields/OpenBoundary.H | 115 +++++++++++++++++++++++++++++++++++++
 4 files changed, 238 insertions(+), 42 deletions(-)
 create mode 100644 src/fields/OpenBoundary.H

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index cafe1d5e1c..f2c62bb2f6 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -552,7 +552,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                  ijz == ijx+4 && ijz_beam == ijx+5 && irho == ijx+6 );
             amrex::MultiFab j_slice(m_fields.getSlices(lev, WhichSlice::This),
                                     amrex::make_alias, Comps[WhichSlice::This]["jx"], 7);
-            j_slice.FillBoundary(Geom(lev).periodicity());
+            if (!Fields::m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonExmByAndEypBx(Geom(), m_comm_xy, lev, islice);
 
@@ -562,7 +562,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                              WhichSlice::This);
             m_fields.AddBeamCurrents(lev, WhichSlice::This);
 
-            j_slice.FillBoundary(Geom(lev).periodicity());
+            if (!Fields::m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonEz(Geom(), lev, islice);
             m_fields.SolvePoissonBz(Geom(), lev, islice);
@@ -853,10 +853,13 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
 
     /* Guess Bx and By */
     m_fields.InitialBfieldGuess(relative_Bfield_error, m_predcorr_B_error_tolerance, lev);
-    amrex::ParallelContext::push(m_comm_xy);
-     // exchange ExmBy EypBx Ez Bx By Bz
-    m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-    amrex::ParallelContext::pop();
+
+    if (!Fields::m_extended_solve) {
+        amrex::ParallelContext::push(m_comm_xy);
+        // exchange ExmBy EypBx Ez Bx By Bz
+        m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+        amrex::ParallelContext::pop();
+    }
 
     /* creating temporary Bx and By arrays for the current and previous iteration */
     amrex::MultiFab Bx_iter(m_fields.getSlices(lev, WhichSlice::This).boxArray(),
@@ -917,12 +920,14 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
                                          ibox, m_do_beam_jx_jy_deposition, WhichSlice::Next);
         m_fields.AddBeamCurrents(lev, WhichSlice::Next);
 
-        amrex::ParallelContext::push(m_comm_xy);
-        // need to exchange jx jy jx_beam jy_beam
-        amrex::MultiFab j_slice_next(m_fields.getSlices(lev, WhichSlice::Next),
-                                     amrex::make_alias, Comps[WhichSlice::Next]["jx"], 4);
-        j_slice_next.FillBoundary(Geom(lev).periodicity());
-        amrex::ParallelContext::pop();
+        if (!Fields::m_extended_solve) {
+            amrex::ParallelContext::push(m_comm_xy);
+            // need to exchange jx jy jx_beam jy_beam
+            amrex::MultiFab j_slice_next(m_fields.getSlices(lev, WhichSlice::Next),
+                                         amrex::make_alias, Comps[WhichSlice::Next]["jx"], 4);
+            j_slice_next.FillBoundary(Geom(lev).periodicity());
+            amrex::ParallelContext::pop();
+        }
 
         /* Calculate Bx and By */
         m_fields.SolvePoissonBx(Bx_iter, Geom(), lev, islice);
@@ -951,10 +956,12 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
         jx_beam_next.setVal(0., m_fields.m_slices_nguards);
         jy_beam_next.setVal(0., m_fields.m_slices_nguards);
 
-        amrex::ParallelContext::push(m_comm_xy);
-         // exchange Bx By
-        m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
-        amrex::ParallelContext::pop();
+        if (!Fields::m_extended_solve) {
+            amrex::ParallelContext::push(m_comm_xy);
+            // exchange Bx By
+            m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
+            amrex::ParallelContext::pop();
+        }
 
         /* Update force terms using the calculated Bx and By */
         m_multi_plasma.AdvanceParticles(m_fields, geom[lev], false, false, true, false, lev);
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index df3636f49a..cb212798bb 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -294,6 +294,13 @@ public:
     static amrex::IntVect m_slices_nguards;
     /** Number of guard cells for poisson solver MultiFab */
     static amrex::IntVect m_poisson_nguards;
+    /** Number of guard cells where ExmBy and EypBx are calculated */
+    static amrex::IntVect m_exmby_eypbx_grow;
+    /** If the poisson solver should include the guard cells */
+    static bool m_extended_solve;
+    /** If lev_0 should be solved with open boundary conditions */
+    static bool m_open_boundary;
+
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
     amrex::Vector<std::array<amrex::MultiFab, m_nslices>> m_slices;
@@ -303,6 +310,8 @@ private:
     amrex::Vector<amrex::FArrayBox> m_tmp_densities;
     /** Stores temporary values for z interpolation in Fields::Copy */
     amrex::Gpu::DeviceVector<amrex::Real> m_rel_z_vec;
+    /** Stores multipole coefficients for open boundaries */
+    amrex::Gpu::DeviceVector<amrex::Real> m_multipole_coeffs;
 };
 
 #endif
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 38b3062325..7690c3bd5b 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -2,18 +2,24 @@
 #include "fft_poisson_solver/FFTPoissonSolverPeriodic.H"
 #include "fft_poisson_solver/FFTPoissonSolverDirichlet.H"
 #include "Hipace.H"
+#include "OpenBoundary.H"
 #include "utils/HipaceProfilerWrapper.H"
 #include "utils/Constants.H"
 #include "particles/ShapeFactors.H"
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
 amrex::IntVect Fields::m_poisson_nguards = {-1, -1, -1};
+amrex::IntVect Fields::m_exmby_eypbx_grow = {-1, -1, -1};
+bool Fields::m_extended_solve = false;
+bool Fields::m_open_boundary = false;
 
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
 {
     amrex::ParmParse ppf("fields");
     queryWithParser(ppf, "do_dirichlet_poisson", m_do_dirichlet_poisson);
+    queryWithParser(ppf, "extended_solve", m_extended_solve);
+    queryWithParser(ppf, "open_boundary", m_open_boundary);
 }
 
 void
@@ -25,11 +31,20 @@ Fields::AllocData (
     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(slice_ba.size() == 1,
         "Parallel field solvers not supported yet");
 
-    // Need 1 extra guard cell transversally for transverse derivative
-    int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
-    m_slices_nguards = {nguards_xy, nguards_xy, 0};
-    // Poisson solver same size as domain, no ghost cells
-    m_poisson_nguards = {0, 0, 0};
+    if (m_extended_solve) {
+        // Need 1 extra guard cell transversally for transverse derivative
+        int nguards_xy = (Hipace::m_depos_order_xy + 1) / 2 + 1;
+        m_slices_nguards = {nguards_xy, nguards_xy, 0};
+        m_poisson_nguards = m_slices_nguards;
+        m_exmby_eypbx_grow = m_slices_nguards - amrex::IntVect{1, 1, 0};
+    } else {
+        // Need 1 extra guard cell transversally for transverse derivative
+        int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
+        m_slices_nguards = {nguards_xy, nguards_xy, 0};
+        // Poisson solver same size as domain, no ghost cells
+        m_poisson_nguards = {0, 0, 0};
+        m_exmby_eypbx_grow = {0, 0, 0};
+    }
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
         m_slices[lev][islice].define(
@@ -480,28 +495,76 @@ void
 Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const int lev,
                               std::string component, const int islice)
 {
-    if (lev == 0) return; // keep lev==0 boundaries zero
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
     using namespace amrex::literals;
-    constexpr int interp_order = 2;
+    if (lev == 0 && m_open_boundary) {
+        m_multipole_coeffs.resize(37);
+        for (amrex::Real& coeff : m_multipole_coeffs) {
+            coeff = 0;
+        }
 
-    const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
-    const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
-    const amrex::Real rel_z = islice_coarse - static_cast<int>(amrex::Math::floor(islice_coarse));
+        amrex::MultiFab staging_area = getStagingArea(lev);
+        for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
+        {
+            const auto arr_staging_area = staging_area.array(mfi);
+            const amrex::Box staging_box = staging_area[mfi].box();
+            const int lo2 = staging_box.smallEnd(2);
+            amrex::Real * coeffs_ptr = m_multipole_coeffs.data();
+
+            const amrex::Real poff_x = GetPosOffset(0, geom[lev], staging_box);
+            const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
+            const amrex::Real dx = geom[lev].CellSize(0);
+            const amrex::Real dy = geom[lev].CellSize(1);
+            const amrex::Real scale = 3._rt/std::sqrt(geom[lev].ProbLength(0)*
+                geom[lev].ProbLength(0) + geom[lev].ProbLength(1)*geom[lev].ProbLength(1));
+
+            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
+                {
+                    amrex::Real x = (i * dx + poff_x) * scale;
+                    amrex::Real y = (j * dy + poff_y) * scale;
+                    amrex::Real s_v = arr_staging_area(i, j, lo2);
+                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
+
+                    for (int n=0; n<37; ++n) {
+                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
+                    }
+                }
+            );
+            amrex::Gpu::Device::synchronize();
 
-    auto solution_interp = interpolated_field_xyz<interp_order>{
-        getField(lev-1, WhichSlice::This, component),
-        getField(lev-1, WhichSlice::Previous1, component),
-        rel_z, geom[lev-1]};
-    amrex::MultiFab staging_area = getStagingArea(lev);
+            std::cout << "Mcoeff: " << m_multipole_coeffs[0] << " " << m_multipole_coeffs[1] << " " << m_multipole_coeffs[2];
+            std::cout << std::endl;
 
-    for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
-    {
-        const auto arr_solution_interp = solution_interp.array(mfi);
-        const auto arr_staging_area = staging_area.array(mfi);
-        const amrex::Box fine_staging_box = staging_area[mfi].box();
+            SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
+                [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
+                {
+                    return dx*dy*GetFieldMultipole(coeffs_ptr, x*scale, y*scale);
+                }
+            );
+        }
+
+    } else if (lev == 1) {
+        constexpr int interp_order = 2;
+
+        const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
+        const amrex::Real islice_coarse = (islice + 0.5_rt) / ref_ratio_z;
+        const amrex::Real rel_z = islice_coarse-static_cast<int>(amrex::Math::floor(islice_coarse));
 
-        SetDirichletBoundaries(arr_staging_area, fine_staging_box, geom[lev], arr_solution_interp);
+        auto solution_interp = interpolated_field_xyz<interp_order>{
+            getField(lev-1, WhichSlice::This, component),
+            getField(lev-1, WhichSlice::Previous1, component),
+            rel_z, geom[lev-1]};
+        amrex::MultiFab staging_area = getStagingArea(lev);
+
+        for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
+        {
+            const auto arr_solution_interp = solution_interp.array(mfi);
+            const auto arr_staging_area = staging_area.array(mfi);
+            const amrex::Box fine_staging_box = staging_area[mfi].box();
+
+            SetDirichletBoundaries(arr_staging_area,fine_staging_box,geom[lev],arr_solution_interp);
+        }
     }
 }
 
@@ -584,10 +647,12 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     SetBoundaryCondition(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
 
-    /* ---------- Transverse FillBoundary Psi ---------- */
-    amrex::ParallelContext::push(m_comm_xy);
-    lhs.FillBoundary(geom[lev].periodicity());
-    amrex::ParallelContext::pop();
+    if (!m_extended_solve) {
+        /* ---------- Transverse FillBoundary Psi ---------- */
+        amrex::ParallelContext::push(m_comm_xy);
+        lhs.FillBoundary(geom[lev].periodicity());
+        amrex::ParallelContext::pop();
+    }
 
     InterpolateFromLev0toLev1(geom, lev, "Psi", islice, m_slices_nguards, m_poisson_nguards);
 
@@ -604,7 +669,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
         // number of ghost cells where ExmBy and EypBx are calculated is 0 for now
-        const amrex::Box bx = mfi.growntilebox(amrex::IntVect{0, 0, 0});
+        const amrex::Box bx = mfi.growntilebox(m_exmby_eypbx_grow);
         const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
 
diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H
new file mode 100644
index 0000000000..42d9a2d616
--- /dev/null
+++ b/src/fields/OpenBoundary.H
@@ -0,0 +1,115 @@
+#ifndef OPEN_BOUNDARY_H_
+#define OPEN_BOUNDARY_H_
+
+#include <AMReX_AmrCore.H>
+#include <cmath>
+
+template<unsigned int exp> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real pow (amrex::Real base) {
+    return pow<exp-1>(base) * base;
+}
+
+template<> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real pow<0> (amrex::Real base) {
+    return 1;
+}
+
+template<> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real pow<1> (amrex::Real base) {
+    return base;
+}
+
+template<class T> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+T GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
+{
+    using namespace amrex::literals;
+    return {
+    s_v,
+    s_v*x,
+    s_v*y,
+    s_v*(-pow<2>(x) + pow<2>(y)),
+    s_v*x*y,
+    s_v*(pow<3>(x) - 3*x*pow<2>(y)),
+    s_v*(-3*pow<2>(x)*y + pow<3>(y)),
+    s_v*(pow<4>(x) - 6*pow<2>(x)*pow<2>(y) + pow<4>(y)),
+    s_v*(-pow<3>(x)*y + x*pow<3>(y)),
+    s_v*(pow<5>(x) - 10*pow<3>(x)*pow<2>(y) + 5*x*pow<4>(y)),
+    s_v*(5*pow<4>(x)*y - 10*pow<2>(x)*pow<3>(y) + pow<5>(y)),
+    s_v*(-pow<6>(x) + 15*pow<4>(x)*pow<2>(y) - 15*pow<2>(x)*pow<4>(y) + pow<6>(y)),
+    s_v*(pow<5>(x)*y - 10.0_rt/3.0_rt*pow<3>(x)*pow<3>(y) + x*pow<5>(y)),
+    s_v*(pow<7>(x) - 21*pow<5>(x)*pow<2>(y) + 35*pow<3>(x)*pow<4>(y) - 7*x*pow<6>(y)),
+    s_v*(-7*pow<6>(x)*y + 35*pow<4>(x)*pow<3>(y) - 21*pow<2>(x)*pow<5>(y) + pow<7>(y)),
+    s_v*(pow<8>(x) - 28*pow<6>(x)*pow<2>(y) + 70*pow<4>(x)*pow<4>(y) - 28*pow<2>(x)*pow<6>(y) + pow<8>(y)),
+    s_v*(-pow<7>(x)*y + 7*pow<5>(x)*pow<3>(y) - 7*pow<3>(x)*pow<5>(y) + x*pow<7>(y)),
+    s_v*(pow<9>(x) - 36*pow<7>(x)*pow<2>(y) + 126*pow<5>(x)*pow<4>(y) - 84*pow<3>(x)*pow<6>(y) + 9*x*pow<8>(y)),
+    s_v*(9*pow<8>(x)*y - 84*pow<6>(x)*pow<3>(y) + 126*pow<4>(x)*pow<5>(y) - 36*pow<2>(x)*pow<7>(y) + pow<9>(y)),
+    s_v*(-pow<10>(x) + 45*pow<8>(x)*pow<2>(y) - 210*pow<6>(x)*pow<4>(y) + 210*pow<4>(x)*pow<6>(y) - 45*pow<2>(x)*pow<8>(y) + pow<10>(y)),
+    s_v*(pow<9>(x)*y - 12*pow<7>(x)*pow<3>(y) + (126.0_rt/5.0_rt)*pow<5>(x)*pow<5>(y) - 12*pow<3>(x)*pow<7>(y) + x*pow<9>(y)),
+    s_v*(pow<11>(x) - 55*pow<9>(x)*pow<2>(y) + 330*pow<7>(x)*pow<4>(y) - 462*pow<5>(x)*pow<6>(y) + 165*pow<3>(x)*pow<8>(y) - 11*x*pow<10>(y)),
+    s_v*(-11*pow<10>(x)*y + 165*pow<8>(x)*pow<3>(y) - 462*pow<6>(x)*pow<5>(y) + 330*pow<4>(x)*pow<7>(y) - 55*pow<2>(x)*pow<9>(y) + pow<11>(y)),
+    s_v*(pow<12>(x) - 66*pow<10>(x)*pow<2>(y) + 495*pow<8>(x)*pow<4>(y) - 924*pow<6>(x)*pow<6>(y) + 495*pow<4>(x)*pow<8>(y) - 66*pow<2>(x)*pow<10>(y) + pow<12>(y)),
+    s_v*(-pow<11>(x)*y + (55.0_rt/3.0_rt)*pow<9>(x)*pow<3>(y) - 66*pow<7>(x)*pow<5>(y) + 66*pow<5>(x)*pow<7>(y) - 55.0_rt/3.0_rt*pow<3>(x)*pow<9>(y) + x*pow<11>(y)),
+    s_v*(pow<13>(x) - 78*pow<11>(x)*pow<2>(y) + 715*pow<9>(x)*pow<4>(y) - 1716*pow<7>(x)*pow<6>(y) + 1287*pow<5>(x)*pow<8>(y) - 286*pow<3>(x)*pow<10>(y) + 13*x*pow<12>(y)),
+    s_v*(13*pow<12>(x)*y - 286*pow<10>(x)*pow<3>(y) + 1287*pow<8>(x)*pow<5>(y) - 1716*pow<6>(x)*pow<7>(y) + 715*pow<4>(x)*pow<9>(y) - 78*pow<2>(x)*pow<11>(y) + pow<13>(y)),
+    s_v*(-pow<14>(x) + 91*pow<12>(x)*pow<2>(y) - 1001*pow<10>(x)*pow<4>(y) + 3003*pow<8>(x)*pow<6>(y) - 3003*pow<6>(x)*pow<8>(y) + 1001*pow<4>(x)*pow<10>(y) - 91*pow<2>(x)*pow<12>(y) + pow<14>(y)),
+    s_v*(pow<13>(x)*y - 26*pow<11>(x)*pow<3>(y) + 143*pow<9>(x)*pow<5>(y) - 1716.0_rt/7.0_rt*pow<7>(x)*pow<7>(y) + 143*pow<5>(x)*pow<9>(y) - 26*pow<3>(x)*pow<11>(y) + x*pow<13>(y)),
+    s_v*(pow<15>(x) - 105*pow<13>(x)*pow<2>(y) + 1365*pow<11>(x)*pow<4>(y) - 5005*pow<9>(x)*pow<6>(y) + 6435*pow<7>(x)*pow<8>(y) - 3003*pow<5>(x)*pow<10>(y) + 455*pow<3>(x)*pow<12>(y) - 15*x*pow<14>(y)),
+    s_v*(-15*pow<14>(x)*y + 455*pow<12>(x)*pow<3>(y) - 3003*pow<10>(x)*pow<5>(y) + 6435*pow<8>(x)*pow<7>(y) - 5005*pow<6>(x)*pow<9>(y) + 1365*pow<4>(x)*pow<11>(y) - 105*pow<2>(x)*pow<13>(y) + pow<15>(y)),
+    s_v*(pow<16>(x) - 120*pow<14>(x)*pow<2>(y) + 1820*pow<12>(x)*pow<4>(y) - 8008*pow<10>(x)*pow<6>(y) + 12870*pow<8>(x)*pow<8>(y) - 8008*pow<6>(x)*pow<10>(y) + 1820*pow<4>(x)*pow<12>(y) - 120*pow<2>(x)*pow<14>(y) + pow<16>(y)),
+    s_v*(-pow<15>(x)*y + 35*pow<13>(x)*pow<3>(y) - 273*pow<11>(x)*pow<5>(y) + 715*pow<9>(x)*pow<7>(y) - 715*pow<7>(x)*pow<9>(y) + 273*pow<5>(x)*pow<11>(y) - 35*pow<3>(x)*pow<13>(y) + x*pow<15>(y)),
+    s_v*(pow<17>(x) - 136*pow<15>(x)*pow<2>(y) + 2380*pow<13>(x)*pow<4>(y) - 12376*pow<11>(x)*pow<6>(y) + 24310*pow<9>(x)*pow<8>(y) - 19448*pow<7>(x)*pow<10>(y) + 6188*pow<5>(x)*pow<12>(y) - 680*pow<3>(x)*pow<14>(y) + 17*x*pow<16>(y)),
+    s_v*(17*pow<16>(x)*y - 680*pow<14>(x)*pow<3>(y) + 6188*pow<12>(x)*pow<5>(y) - 19448*pow<10>(x)*pow<7>(y) + 24310*pow<8>(x)*pow<9>(y) - 12376*pow<6>(x)*pow<11>(y) + 2380*pow<4>(x)*pow<13>(y) - 136*pow<2>(x)*pow<15>(y) + pow<17>(y)),
+    s_v*(-pow<18>(x) + 153*pow<16>(x)*pow<2>(y) - 3060*pow<14>(x)*pow<4>(y) + 18564*pow<12>(x)*pow<6>(y) - 43758*pow<10>(x)*pow<8>(y) + 43758*pow<8>(x)*pow<10>(y) - 18564*pow<6>(x)*pow<12>(y) + 3060*pow<4>(x)*pow<14>(y) - 153*pow<2>(x)*pow<16>(y) + pow<18>(y)),
+    s_v*(pow<17>(x)*y - 136.0_rt/3.0_rt*pow<15>(x)*pow<3>(y) + 476*pow<13>(x)*pow<5>(y) - 1768*pow<11>(x)*pow<7>(y) + (24310.0_rt/9.0_rt)*pow<9>(x)*pow<9>(y) - 1768*pow<7>(x)*pow<11>(y) + 476*pow<5>(x)*pow<13>(y) - 136.0_rt/3.0_rt*pow<3>(x)*pow<15>(y) + x*pow<17>(y))
+    };
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real GetFieldMultipole (amrex::Real* m_c, amrex::Real x_domain, amrex::Real y_domain)
+{
+    using namespace amrex::literals;
+    amrex::Real radius_2 = pow<2>(x_domain) + pow<2>(y_domain);
+    amrex::Real x = x_domain / radius_2;
+    amrex::Real y = y_domain / radius_2;
+    return
+      m_c[0] * (std::log(radius_2))
+    + m_c[1] * (-2*x)
+    + m_c[2] * (-2*y)
+    + m_c[3] * (pow<2>(x) - pow<2>(y))
+    + m_c[4] * (-4*x*y)
+    + m_c[5] * (-2.0_rt/3.0_rt*pow<3>(x) + 2*x*pow<2>(y))
+    + m_c[6] * (2*pow<2>(x)*y - 2.0_rt/3.0_rt*pow<3>(y))
+    + m_c[7] * (-1.0_rt/2.0_rt*pow<4>(x) + 3*pow<2>(x)*pow<2>(y) - 1.0_rt/2.0_rt*pow<4>(y))
+    + m_c[8] * (8*pow<3>(x)*y - 8*x*pow<3>(y))
+    + m_c[9] * (-2.0_rt/5.0_rt*pow<5>(x) + 4*pow<3>(x)*pow<2>(y) - 2*x*pow<4>(y))
+    + m_c[10] * (-2*pow<4>(x)*y + 4*pow<2>(x)*pow<3>(y) - 2.0_rt/5.0_rt*pow<5>(y))
+    + m_c[11] * ((1.0_rt/3.0_rt)*pow<6>(x) - 5*pow<4>(x)*pow<2>(y) + 5*pow<2>(x)*pow<4>(y) - 1.0_rt/3.0_rt*pow<6>(y))
+    + m_c[12] * (-12*pow<5>(x)*y + 40*pow<3>(x)*pow<3>(y) - 12*x*pow<5>(y))
+    + m_c[13] * (-2.0_rt/7.0_rt*pow<7>(x) + 6*pow<5>(x)*pow<2>(y) - 10*pow<3>(x)*pow<4>(y) + 2*x*pow<6>(y))
+    + m_c[14] * (2*pow<6>(x)*y - 10*pow<4>(x)*pow<3>(y) + 6*pow<2>(x)*pow<5>(y) - 2.0_rt/7.0_rt*pow<7>(y))
+    + m_c[15] * (-1.0_rt/4.0_rt*pow<8>(x) + 7*pow<6>(x)*pow<2>(y) - 35.0_rt/2.0_rt*pow<4>(x)*pow<4>(y) + 7*pow<2>(x)*pow<6>(y) - 1.0_rt/4.0_rt*pow<8>(y))
+    + m_c[16] * (16*pow<7>(x)*y - 112*pow<5>(x)*pow<3>(y) + 112*pow<3>(x)*pow<5>(y) - 16*x*pow<7>(y))
+    + m_c[17] * (-2.0_rt/9.0_rt*pow<9>(x) + 8*pow<7>(x)*pow<2>(y) - 28*pow<5>(x)*pow<4>(y) + (56.0_rt/3.0_rt)*pow<3>(x)*pow<6>(y) - 2*x*pow<8>(y))
+    + m_c[18] * (-2*pow<8>(x)*y + (56.0_rt/3.0_rt)*pow<6>(x)*pow<3>(y) - 28*pow<4>(x)*pow<5>(y) + 8*pow<2>(x)*pow<7>(y) - 2.0_rt/9.0_rt*pow<9>(y))
+    + m_c[19] * ((1.0_rt/5.0_rt)*pow<10>(x) - 9*pow<8>(x)*pow<2>(y) + 42*pow<6>(x)*pow<4>(y) - 42*pow<4>(x)*pow<6>(y) + 9*pow<2>(x)*pow<8>(y) - 1.0_rt/5.0_rt*pow<10>(y))
+    + m_c[20] * (-20*pow<9>(x)*y + 240*pow<7>(x)*pow<3>(y) - 504*pow<5>(x)*pow<5>(y) + 240*pow<3>(x)*pow<7>(y) - 20*x*pow<9>(y))
+    + m_c[21] * (-2.0_rt/11.0_rt*pow<11>(x) + 10*pow<9>(x)*pow<2>(y) - 60*pow<7>(x)*pow<4>(y) + 84*pow<5>(x)*pow<6>(y) - 30*pow<3>(x)*pow<8>(y) + 2*x*pow<10>(y))
+    + m_c[22] * (2*pow<10>(x)*y - 30*pow<8>(x)*pow<3>(y) + 84*pow<6>(x)*pow<5>(y) - 60*pow<4>(x)*pow<7>(y) + 10*pow<2>(x)*pow<9>(y) - 2.0_rt/11.0_rt*pow<11>(y))
+    + m_c[23] * (-1.0_rt/6.0_rt*pow<12>(x) + 11*pow<10>(x)*pow<2>(y) - 165.0_rt/2.0_rt*pow<8>(x)*pow<4>(y) + 154*pow<6>(x)*pow<6>(y) - 165.0_rt/2.0_rt*pow<4>(x)*pow<8>(y) + 11*pow<2>(x)*pow<10>(y) - 1.0_rt/6.0_rt*pow<12>(y))
+    + m_c[24] * (24*pow<11>(x)*y - 440*pow<9>(x)*pow<3>(y) + 1584*pow<7>(x)*pow<5>(y) - 1584*pow<5>(x)*pow<7>(y) + 440*pow<3>(x)*pow<9>(y) - 24*x*pow<11>(y))
+    + m_c[25] * (-2.0_rt/13.0_rt*pow<13>(x) + 12*pow<11>(x)*pow<2>(y) - 110*pow<9>(x)*pow<4>(y) + 264*pow<7>(x)*pow<6>(y) - 198*pow<5>(x)*pow<8>(y) + 44*pow<3>(x)*pow<10>(y) - 2*x*pow<12>(y))
+    + m_c[26] * (-2*pow<12>(x)*y + 44*pow<10>(x)*pow<3>(y) - 198*pow<8>(x)*pow<5>(y) + 264*pow<6>(x)*pow<7>(y) - 110*pow<4>(x)*pow<9>(y) + 12*pow<2>(x)*pow<11>(y) - 2.0_rt/13.0_rt*pow<13>(y))
+    + m_c[27] * ((1.0_rt/7.0_rt)*pow<14>(x) - 13*pow<12>(x)*pow<2>(y) + 143*pow<10>(x)*pow<4>(y) - 429*pow<8>(x)*pow<6>(y) + 429*pow<6>(x)*pow<8>(y) - 143*pow<4>(x)*pow<10>(y) + 13*pow<2>(x)*pow<12>(y) - 1.0_rt/7.0_rt*pow<14>(y))
+    + m_c[28] * (-28*pow<13>(x)*y + 728*pow<11>(x)*pow<3>(y) - 4004*pow<9>(x)*pow<5>(y) + 6864*pow<7>(x)*pow<7>(y) - 4004*pow<5>(x)*pow<9>(y) + 728*pow<3>(x)*pow<11>(y) - 28*x*pow<13>(y))
+    + m_c[29] * (-2.0_rt/15.0_rt*pow<15>(x) + 14*pow<13>(x)*pow<2>(y) - 182*pow<11>(x)*pow<4>(y) + (2002.0_rt/3.0_rt)*pow<9>(x)*pow<6>(y) - 858*pow<7>(x)*pow<8>(y) + (2002.0_rt/5.0_rt)*pow<5>(x)*pow<10>(y) - 182.0_rt/3.0_rt*pow<3>(x)*pow<12>(y) + 2*x*pow<14>(y))
+    + m_c[30] * (2*pow<14>(x)*y - 182.0_rt/3.0_rt*pow<12>(x)*pow<3>(y) + (2002.0_rt/5.0_rt)*pow<10>(x)*pow<5>(y) - 858*pow<8>(x)*pow<7>(y) + (2002.0_rt/3.0_rt)*pow<6>(x)*pow<9>(y) - 182*pow<4>(x)*pow<11>(y) + 14*pow<2>(x)*pow<13>(y) - 2.0_rt/15.0_rt*pow<15>(y))
+    + m_c[31] * (-1.0_rt/8.0_rt*pow<16>(x) + 15*pow<14>(x)*pow<2>(y) - 455.0_rt/2.0_rt*pow<12>(x)*pow<4>(y) + 1001*pow<10>(x)*pow<6>(y) - 6435.0_rt/4.0_rt*pow<8>(x)*pow<8>(y) + 1001*pow<6>(x)*pow<10>(y) - 455.0_rt/2.0_rt*pow<4>(x)*pow<12>(y) + 15*pow<2>(x)*pow<14>(y) - 1.0_rt/8.0_rt*pow<16>(y))
+    + m_c[32] * (32*pow<15>(x)*y - 1120*pow<13>(x)*pow<3>(y) + 8736*pow<11>(x)*pow<5>(y) - 22880*pow<9>(x)*pow<7>(y) + 22880*pow<7>(x)*pow<9>(y) - 8736*pow<5>(x)*pow<11>(y) + 1120*pow<3>(x)*pow<13>(y) - 32*x*pow<15>(y))
+    + m_c[33] * (-2.0_rt/17.0_rt*pow<17>(x) + 16*pow<15>(x)*pow<2>(y) - 280*pow<13>(x)*pow<4>(y) + 1456*pow<11>(x)*pow<6>(y) - 2860*pow<9>(x)*pow<8>(y) + 2288*pow<7>(x)*pow<10>(y) - 728*pow<5>(x)*pow<12>(y) + 80*pow<3>(x)*pow<14>(y) - 2*x*pow<16>(y))
+    + m_c[34] * (-2*pow<16>(x)*y + 80*pow<14>(x)*pow<3>(y) - 728*pow<12>(x)*pow<5>(y) + 2288*pow<10>(x)*pow<7>(y) - 2860*pow<8>(x)*pow<9>(y) + 1456*pow<6>(x)*pow<11>(y) - 280*pow<4>(x)*pow<13>(y) + 16*pow<2>(x)*pow<15>(y) - 2.0_rt/17.0_rt*pow<17>(y))
+    + m_c[35] * ((1.0_rt/9.0_rt)*pow<18>(x) - 17*pow<16>(x)*pow<2>(y) + 340*pow<14>(x)*pow<4>(y) - 6188.0_rt/3.0_rt*pow<12>(x)*pow<6>(y) + 4862*pow<10>(x)*pow<8>(y) - 4862*pow<8>(x)*pow<10>(y) + (6188.0_rt/3.0_rt)*pow<6>(x)*pow<12>(y) - 340*pow<4>(x)*pow<14>(y) + 17*pow<2>(x)*pow<16>(y) - 1.0_rt/9.0_rt*pow<18>(y))
+    + m_c[36] * (-36*pow<17>(x)*y + 1632*pow<15>(x)*pow<3>(y) - 17136*pow<13>(x)*pow<5>(y) + 63648*pow<11>(x)*pow<7>(y) - 97240*pow<9>(x)*pow<9>(y) + 63648*pow<7>(x)*pow<11>(y) - 17136*pow<5>(x)*pow<13>(y) + 1632*pow<3>(x)*pow<15>(y) - 36*x*pow<17>(y))
+    ;
+}
+
+#endif

From 94266c74ec674ff3dea12f130fb2c685610d9b80 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Mon, 31 Jan 2022 21:17:36 +0100
Subject: [PATCH 29/52] fix critical allocation bug and include vector

---
 src/Hipace.cpp                    |  1 -
 src/diagnostics/Diagnostic.H      |  8 ++------
 src/diagnostics/Diagnostic.cpp    | 10 +---------
 src/diagnostics/OpenPMDWriter.cpp |  1 -
 4 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index cafe1d5e1c..6ae74893c8 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -304,7 +304,6 @@ Hipace::MakeNewLevelFromScratch (
     }
     SetDistributionMap(lev, dm); // Let AmrCore know
     DefineSliceGDB(lev, ba, dm);
-    m_diags.AllocData(lev);
     m_fields.AllocData(lev, Geom(), m_slice_ba[lev], m_slice_dm[lev],
                        m_multi_plasma.m_sort_bin_size);
 }
diff --git a/src/diagnostics/Diagnostic.H b/src/diagnostics/Diagnostic.H
index 5fa6f14de6..d46b762986 100644
--- a/src/diagnostics/Diagnostic.H
+++ b/src/diagnostics/Diagnostic.H
@@ -4,6 +4,8 @@
 #include <AMReX_MultiFab.H>
 #include <AMReX_Vector.H>
 
+#include <vector>
+
 /** type of diagnostics: full xyz array or xz slice or yz slice */
 enum struct DiagType{xyz, xz, yz};
 
@@ -16,12 +18,6 @@ public:
     /** \brief Constructor */
     explicit Diagnostic (int nlev);
 
-    /** \brief allocate arrays of this MF
-     *
-     * \param[in] lev MR level
-     */
-    void AllocData (int lev);
-
     /** \brief return the main diagnostics multifab */
     amrex::Vector<amrex::FArrayBox>& getF () { return m_F; }
 
diff --git a/src/diagnostics/Diagnostic.cpp b/src/diagnostics/Diagnostic.cpp
index 8adbf47938..27d7716221 100644
--- a/src/diagnostics/Diagnostic.cpp
+++ b/src/diagnostics/Diagnostic.cpp
@@ -95,14 +95,6 @@ Diagnostic::Diagnostic (int nlev)
     }
 }
 
-void
-Diagnostic::AllocData (int lev)
-{
-    // only usable after ResizeFDiagFAB
-    const amrex::Box dummy_bx = {{0,0,0}, {0,0,0}};
-    m_F.push_back(amrex::FArrayBox(dummy_bx, m_nfields, amrex::The_Pinned_Arena()));
-}
-
 void
 Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int lev,
                             amrex::Geometry const& geom)
@@ -131,7 +123,7 @@ Diagnostic::ResizeFDiagFAB (amrex::Box local_box, amrex::Box domain, const int l
     m_has_field[lev] = local_box.ok();
 
     if(m_has_field[lev]) {
-        m_F[lev].resize(local_box, m_nfields);
+        m_F[lev].resize(local_box, m_nfields, amrex::The_Pinned_Arena());
         m_F[lev].setVal<amrex::RunOn::Host>(0);
     }
 }
diff --git a/src/diagnostics/OpenPMDWriter.cpp b/src/diagnostics/OpenPMDWriter.cpp
index 1640eb8706..f6687b116f 100644
--- a/src/diagnostics/OpenPMDWriter.cpp
+++ b/src/diagnostics/OpenPMDWriter.cpp
@@ -3,7 +3,6 @@
 #include "utils/HipaceProfilerWrapper.H"
 #include "utils/Constants.H"
 #include "utils/IOUtil.H"
-#include "particles/pusher/GetDomainLev.H"
 
 #ifdef HIPACE_USE_OPENPMD
 

From 4424c864718764dfa421c1572955870ac3a2b25d Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 2 Feb 2022 20:44:38 +0100
Subject: [PATCH 30/52] add _rt

---
 src/fields/Fields.cpp | 57 ++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 38b3062325..1fa4b7da68 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -6,6 +6,8 @@
 #include "utils/Constants.H"
 #include "particles/ShapeFactors.H"
 
+using namespace amrex::literals;
+
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
 amrex::IntVect Fields::m_poisson_nguards = {-1, -1, -1};
 
@@ -35,7 +37,7 @@ Fields::AllocData (
         m_slices[lev][islice].define(
             slice_ba, slice_dm, Comps[islice]["N"], m_slices_nguards,
             amrex::MFInfo().SetArena(amrex::The_Arena()));
-        m_slices[lev][islice].setVal(0.0, m_slices_nguards);
+        m_slices[lev][islice].setVal(0._rt, m_slices_nguards);
     }
 
     // The Poisson solver operates on transverse slices only.
@@ -118,7 +120,7 @@ struct derivative {
     derivative_inner<dir> array (amrex::MFIter& mfi) const {
         amrex::Box bx = f_view[mfi].box();
         return derivative_inner<dir>{f_view.array(mfi),
-            1/(2*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
+            1._rt/(2._rt*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
     }
 };
 
@@ -132,7 +134,7 @@ struct derivative<Direction::z> {
     // use .array(mfi) like with amrex::MultiFab
     derivative_inner<Direction::z> array (amrex::MFIter& mfi) const {
         return derivative_inner<Direction::z>{f_view1.array(mfi), f_view2.array(mfi),
-            1/(2*geom.CellSize(Direction::z))};
+            1._rt/(2._rt*geom.CellSize(Direction::z))};
     }
 };
 
@@ -149,7 +151,6 @@ struct interpolated_field_xy_inner {
     // x and y must be inside field box
     template<class...Args> AMREX_GPU_DEVICE
     amrex::Real operator() (amrex::Real x, amrex::Real y, Args...args) const noexcept {
-        using namespace amrex::literals;
 
         // x direction
         const amrex::Real xmid = (x - offset0)*dx_inv;
@@ -161,7 +162,7 @@ struct interpolated_field_xy_inner {
         amrex::Real sy_cell[interp_order_xy + 1];
         const int j_cell = compute_shape_factor<interp_order_xy>(sy_cell, ymid);
 
-        amrex::Real field_value = 0.0_rt;
+        amrex::Real field_value = 0._rt;
         for (int iy=0; iy<=interp_order_xy; iy++){
             for (int ix=0; ix<=interp_order_xy; ix++){
                 field_value += sx_cell[ix] * sy_cell[iy] * array(i_cell+ix, j_cell+iy, args...);
@@ -181,7 +182,7 @@ struct interpolated_field_xy {
     auto array (amrex::MFIter& mfi) const {
         auto mfab_array = mfab.array(mfi);
         return interpolated_field_xy_inner<interp_order_xy, decltype(mfab_array)>{
-            mfab_array, 1/geom.CellSize(0), 1/geom.CellSize(1),
+            mfab_array, 1._rt/geom.CellSize(0), 1._rt/geom.CellSize(1),
             GetPosOffset(0, geom, geom.Domain()), GetPosOffset(1, geom, geom.Domain())};
     }
 };
@@ -195,8 +196,7 @@ struct interpolated_field_z_inner {
 
     // linear longitudinal field interpolation
     AMREX_GPU_DEVICE amrex::Real operator() (int i, int j) const noexcept {
-        using namespace amrex::literals;
-        return (1.0_rt-rel_z)*arr_this(i, j, lo2) + rel_z*arr_prev(i, j, lo2);
+        return (1._rt-rel_z)*arr_this(i, j, lo2) + rel_z*arr_prev(i, j, lo2);
     }
 };
 
@@ -223,7 +223,6 @@ struct guarded_field_inner {
 
     template<class...Args>
     AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k, Args...args) const noexcept {
-        using namespace amrex::literals;
         if (bx.contains(i,j,k)) {
             return array(i,j,k,args...);
         } else return 0._rt;
@@ -367,7 +366,7 @@ Fields::ShiftSlices (int nlev, int islice, amrex::Geometry geom, amrex::Real pat
         // use geometry of coarse grid to determine whether slice is to be solved
         const amrex::Real* problo = geom.ProbLo();
         const amrex::Real* dx = geom.CellSize();
-        const amrex::Real pos = (islice+0.5)*dx[2]+problo[2];
+        const amrex::Real pos = (islice+0.5_rt)*dx[2]+problo[2];
         if (pos < patch_lo || pos > patch_hi) continue;
     }
 
@@ -482,7 +481,6 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
 {
     if (lev == 0) return; // keep lev==0 boundaries zero
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
-    using namespace amrex::literals;
     constexpr int interp_order = 2;
 
     const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
@@ -514,7 +512,6 @@ Fields::InterpolateFromLev0toLev1 (amrex::Vector<amrex::Geometry> const& geom, c
     if (lev == 0) return; // only interpolate boundaries to lev 1
     if (outer_edge == inner_edge) return;
     HIPACE_PROFILE("Fields::InterpolateFromLev0toLev1()");
-    using namespace amrex::literals;
     constexpr int interp_order = 2;
 
     const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
@@ -578,8 +575,8 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
     LinCombination(m_poisson_nguards, getStagingArea(lev),
-                   1./(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
-                   -1./(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
+                   1._rt/(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
+                   -1._rt/(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
     SetBoundaryCondition(geom, lev, "Psi", islice);
     m_poisson_solver[lev]->SolvePoissonEquation(lhs);
@@ -605,8 +602,8 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
         // number of ghost cells where ExmBy and EypBx are calculated is 0 for now
         const amrex::Box bx = mfi.growntilebox(amrex::IntVect{0, 0, 0});
-        const amrex::Real dx_inv = 1./(2*geom[lev].CellSize(Direction::x));
-        const amrex::Real dy_inv = 1./(2*geom[lev].CellSize(Direction::y));
+        const amrex::Real dx_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::x));
+        const amrex::Real dy_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::y));
 
         amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE(int i, int j, int k)
@@ -633,9 +630,9 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
     LinCombination(m_poisson_nguards, getStagingArea(lev),
-                   1./(phys_const.ep0*phys_const.c),
+                   1._rt/(phys_const.ep0*phys_const.c),
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
-                   1./(phys_const.ep0*phys_const.c),
+                   1._rt/(phys_const.ep0*phys_const.c),
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jy"), geom[lev]});
 
     SetBoundaryCondition(geom, lev, "Ez", islice);
@@ -729,18 +726,18 @@ Fields::InitialBfieldGuess (const amrex::Real relative_Bfield_error,
      */
     HIPACE_PROFILE("Fields::InitialBfieldGuess()");
 
-    const amrex::Real mix_factor_init_guess = exp(-0.5 * pow(relative_Bfield_error /
-                                              ( 2.5 * predcorr_B_error_tolerance ), 2));
+    const amrex::Real mix_factor_init_guess = exp(-0.5_rt * pow(relative_Bfield_error /
+                                              ( 2.5_rt * predcorr_B_error_tolerance ), 2));
 
     amrex::MultiFab::LinComb(
         getSlices(lev, WhichSlice::This),
-        1+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["Bx"],
+        1._rt+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["Bx"],
         -mix_factor_init_guess, getSlices(lev, WhichSlice::Previous2), Comps[WhichSlice::Previous2]["Bx"],
         Comps[WhichSlice::This]["Bx"], 1, m_slices_nguards);
 
     amrex::MultiFab::LinComb(
         getSlices(lev, WhichSlice::This),
-        1+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["By"],
+        1._rt+mix_factor_init_guess, getSlices(lev, WhichSlice::Previous1), Comps[WhichSlice::Previous1]["By"],
         -mix_factor_init_guess, getSlices(lev, WhichSlice::Previous2), Comps[WhichSlice::Previous2]["By"],
         Comps[WhichSlice::This]["By"], 1, m_slices_nguards);
 }
@@ -761,7 +758,7 @@ Fields::MixAndShiftBfields (const amrex::MultiFab& B_iter, amrex::MultiFab& B_pr
     amrex::Real weight_B_prev_iter;
     /* calculating the weight for mixing the current and previous iteration based
      * on their respective errors. Large errors will induce a small weight of and vice-versa  */
-    if (relative_Bfield_error != 0.0 || relative_Bfield_error_prev_iter != 0.0)
+    if (relative_Bfield_error != 0._rt || relative_Bfield_error_prev_iter != 0._rt)
     {
         weight_B_iter = relative_Bfield_error_prev_iter /
                         ( relative_Bfield_error + relative_Bfield_error_prev_iter );
@@ -770,8 +767,8 @@ Fields::MixAndShiftBfields (const amrex::MultiFab& B_iter, amrex::MultiFab& B_pr
     }
     else
     {
-        weight_B_iter = 0.5;
-        weight_B_prev_iter = 0.5;
+        weight_B_iter = 0.5_rt;
+        weight_B_prev_iter = 0.5_rt;
     }
 
     /* calculating the mixed temporary B field  B_prev_iter = c*B_iter + d*B_prev_iter.
@@ -786,7 +783,7 @@ Fields::MixAndShiftBfields (const amrex::MultiFab& B_iter, amrex::MultiFab& B_pr
     /* calculating the mixed B field  B = a*B + (1-a)*B_prev_iter */
     amrex::MultiFab::LinComb(
         getSlices(lev, WhichSlice::This),
-        1-predcorr_B_mixing_factor, getSlices(lev, WhichSlice::This), field_comp,
+        1._rt-predcorr_B_mixing_factor, getSlices(lev, WhichSlice::This), field_comp,
         predcorr_B_mixing_factor, B_prev_iter, 0,
         field_comp, 1, m_slices_nguards);
 
@@ -805,11 +802,11 @@ Fields::ComputeRelBFieldError (
     // for both Bx and By simultaneously
     HIPACE_PROFILE("Fields::ComputeRelBFieldError()");
 
-    amrex::Real norm_Bdiff = 0;
+    amrex::Real norm_Bdiff = 0._rt;
     amrex::Gpu::DeviceScalar<amrex::Real> gpu_norm_Bdiff(norm_Bdiff);
     amrex::Real* p_norm_Bdiff = gpu_norm_Bdiff.dataPtr();
 
-    amrex::Real norm_B = 0;
+    amrex::Real norm_B = 0._rt;
     amrex::Gpu::DeviceScalar<amrex::Real> gpu_norm_B(norm_B);
     amrex::Real* p_norm_B = gpu_norm_B.dataPtr();
 
@@ -844,8 +841,8 @@ Fields::ComputeRelBFieldError (
 
     // calculating the relative error
     // Warning: this test might be not working in SI units!
-    const amrex::Real relative_Bfield_error = (norm_B/numPts_transverse > 1e-10)
-                                               ? norm_Bdiff/norm_B : 0.;
+    const amrex::Real relative_Bfield_error = (norm_B/numPts_transverse > 1e-10_rt)
+                                               ? norm_Bdiff/norm_B : 0._rt;
 
     return relative_Bfield_error;
 }

From d4856a8c31762f2265c570e62839faacd4110076 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 2 Feb 2022 21:25:55 +0100
Subject: [PATCH 31/52] add doc

---
 src/fields/Fields.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 1fa4b7da68..0ab0138cbe 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -76,6 +76,7 @@ Fields::AllocData (
     }
 }
 
+/** \brief inner version of derivative */
 template<int dir>
 struct derivative_inner {
     // captured variables for GPU
@@ -97,6 +98,7 @@ struct derivative_inner {
     }
 };
 
+/** \brief inner version of derivative */
 template<>
 struct derivative_inner<Direction::z> {
     // captured variables for GPU
@@ -110,6 +112,8 @@ struct derivative_inner<Direction::z> {
     }
 };
 
+/** \brief derivative in x or y direction. Field is zero-extended by one cell such that this
+ * derivative can be accessed on the same box as the field */
 template<int dir>
 struct derivative {
     // use brace initialization as constructor
@@ -124,6 +128,7 @@ struct derivative {
     }
 };
 
+/** \brief derivative in z direction. Use fields from previous and next slice */
 template<>
 struct derivative<Direction::z> {
     // use brace initialization as constructor
@@ -138,6 +143,7 @@ struct derivative<Direction::z> {
     }
 };
 
+/** \brief inner version of interpolated_field_xy */
 template<int interp_order_xy, class ArrayType>
 struct interpolated_field_xy_inner {
     // captured variables for GPU
@@ -147,7 +153,7 @@ struct interpolated_field_xy_inner {
     amrex::Real offset0;
     amrex::Real offset1;
 
-    // interpolate field in x, y with <interp_order_xy> order transversely
+    // interpolate field in x, y with <interp_order_xy> order transversely,
     // x and y must be inside field box
     template<class...Args> AMREX_GPU_DEVICE
     amrex::Real operator() (amrex::Real x, amrex::Real y, Args...args) const noexcept {
@@ -172,6 +178,8 @@ struct interpolated_field_xy_inner {
     }
 };
 
+/** \brief interpolate field in x, y with <interp_order_xy> order transversely,
+ * x and y must be inside field box */
 template<int interp_order_xy, class MfabType>
 struct interpolated_field_xy {
     // use brace initialization as constructor
@@ -187,6 +195,7 @@ struct interpolated_field_xy {
     }
 };
 
+/** \brief inner version of interpolated_field_z */
 struct interpolated_field_z_inner {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> arr_this;
@@ -200,6 +209,7 @@ struct interpolated_field_z_inner {
     }
 };
 
+/** \brief linear longitudinal field interpolation */
 struct interpolated_field_z {
     // use brace initialization as constructor
     amrex::MultiFab mfab_this; // field to interpolate on this slice
@@ -213,9 +223,12 @@ struct interpolated_field_z {
     }
 };
 
+/** \brief interpolate field in <interp_order_xy> order transversely and
+ * first order (linear) longitudinally */
 template<int interp_order_xy>
 using interpolated_field_xyz = interpolated_field_xy<interp_order_xy, interpolated_field_z>;
 
+/** \brief inner version of guarded_field */
 struct guarded_field_inner {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> array;
@@ -229,6 +242,7 @@ struct guarded_field_inner {
     }
 };
 
+/** \brief if indices are outside of the fields box zero is returned */
 struct guarded_field {
     // use brace initialization as constructor
     amrex::MultiFab& mfab; // field to be guarded (zero extended)

From 043d5a5f0342a001d83f571f4316b3dc1f3b636a Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 2 Feb 2022 21:29:41 +0100
Subject: [PATCH 32/52] fix doc

---
 src/fields/Fields.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 0ab0138cbe..9822688f18 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -153,7 +153,7 @@ struct interpolated_field_xy_inner {
     amrex::Real offset0;
     amrex::Real offset1;
 
-    // interpolate field in x, y with <interp_order_xy> order transversely,
+    // interpolate field in x, y with interp_order_xy order transversely,
     // x and y must be inside field box
     template<class...Args> AMREX_GPU_DEVICE
     amrex::Real operator() (amrex::Real x, amrex::Real y, Args...args) const noexcept {
@@ -178,7 +178,7 @@ struct interpolated_field_xy_inner {
     }
 };
 
-/** \brief interpolate field in x, y with <interp_order_xy> order transversely,
+/** \brief interpolate field in x, y with interp_order_xy order transversely,
  * x and y must be inside field box */
 template<int interp_order_xy, class MfabType>
 struct interpolated_field_xy {
@@ -223,7 +223,7 @@ struct interpolated_field_z {
     }
 };
 
-/** \brief interpolate field in <interp_order_xy> order transversely and
+/** \brief interpolate field in interp_order_xy order transversely and
  * first order (linear) longitudinally */
 template<int interp_order_xy>
 using interpolated_field_xyz = interpolated_field_xy<interp_order_xy, interpolated_field_z>;

From a13bce34fcc2fed9473bb88599194fffe4b7d63a Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 3 Feb 2022 06:40:36 +0100
Subject: [PATCH 33/52] speed test

---
 src/fields/Fields.cpp | 190 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 182 insertions(+), 8 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 50782483fa..0259cfc3c9 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -512,9 +512,8 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
     using namespace amrex::literals;
     if (lev == 0 && m_open_boundary) {
         m_multipole_coeffs.resize(37);
-        for (amrex::Real& coeff : m_multipole_coeffs) {
-            coeff = 0;
-        }
+
+        amrex::MFIter::allowMultipleMFIters(true);
 
         amrex::MultiFab staging_area = getStagingArea(lev);
         for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
@@ -528,14 +527,186 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
             const amrex::Real dx = geom[lev].CellSize(0);
             const amrex::Real dy = geom[lev].CellSize(1);
-            const amrex::Real scale = 3._rt/std::sqrt(geom[lev].ProbLength(0)*
-                geom[lev].ProbLength(0) + geom[lev].ProbLength(1)*geom[lev].ProbLength(1));
+            const amrex::Real scale = 3._rt/std::sqrt(pow<2>(geom[lev].ProbLength(0)) +
+                                                      pow<2>(geom[lev].ProbLength(1)));
+
+            const amrex::Real radius = amrex::min(geom[lev].ProbLo(0), geom[lev].ProbHi(0),
+                                                  geom[lev].ProbLo(1), geom[lev].ProbHi(1));
+            const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale) * 100;
+
+            amrex::ParallelFor(37,
+                [=] AMREX_GPU_DEVICE (int i) noexcept {
+                    coeffs_ptr[i] = 0;
+                });
+
+            {HIPACE_PROFILE("BoundaryCutoffDeviceReduceSum()");
+            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
+                {
+                    amrex::Real x = (i * dx + poff_x) * scale;
+                    amrex::Real y = (j * dy + poff_y) * scale;
+                    if (x*x + y*y > cutoff_sq) return;
+                    amrex::Real s_v = arr_staging_area(i, j, lo2);
+                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
+
+                    for (int n=0; n<37; ++n) {
+                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
+                    }
+                }
+            );
+            amrex::Gpu::Device::synchronize();
+            std::cout << "MReduceSum: " << m_multipole_coeffs[0] << std::endl;
+            }
+
+            amrex::ParallelFor(37,
+                [=] AMREX_GPU_DEVICE (int i) noexcept {
+                    coeffs_ptr[i] = 0;
+                });
+
+            {HIPACE_PROFILE("BoundaryNoCutoffDeviceReduceSum()");
+            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
+                {
+                    amrex::Real x = (i * dx + poff_x) * scale;
+                    amrex::Real y = (j * dy + poff_y) * scale;
+                    amrex::Real s_v = arr_staging_area(i, j, lo2);
+                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
+
+                    for (int n=0; n<37; ++n) {
+                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
+                    }
+                }
+            );
+            amrex::Gpu::Device::synchronize();
+            std::cout << "MReduceSum: " << m_multipole_coeffs[0] << std::endl;
+            }
+
+            amrex::ParallelFor(37,
+                [=] AMREX_GPU_DEVICE (int i) noexcept {
+                    coeffs_ptr[i] = 0;
+                });
+
+            {HIPACE_PROFILE("BoundaryCutoffParReduce()");
+            auto result =
+            amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum>{},
+                             amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real>{},
+                             staging_area, m_poisson_nguards,
+                [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
+                {
+                    const amrex::Real x = (i * dx + poff_x) * scale;
+                    const amrex::Real y = (j * dy + poff_y) * scale;
+                    amrex::Real s_v = arr_staging_area(i, j, k);
+                    if (x*x + y*y > cutoff_sq) s_v = 0._rt;
+                    return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real>>(s_v, x, y);
+                }
+            );
+            amrex::Gpu::Device::synchronize();
+            std::cout << "MParReduce: " << amrex::get<0>(result) << std::endl;
+            }
+
+
+            {HIPACE_PROFILE("BoundaryNoCutoffParReduce()");
+            auto result =
+            amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                             amrex::ReduceOpSum>{},
+                             amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real, amrex::Real, amrex::Real,
+                                             amrex::Real>{},
+                             staging_area, m_poisson_nguards,
+                [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
+                {
+                    const amrex::Real x = (i * dx + poff_x) * scale;
+                    const amrex::Real y = (j * dy + poff_y) * scale;
+                    amrex::Real s_v = arr_staging_area(i, j, k);
+                    return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real, amrex::Real, amrex::Real,
+                                                              amrex::Real>>(s_v, x, y);
+                }
+            );
+            amrex::Gpu::Device::synchronize();
+            std::cout << "MParReduce: " << amrex::get<0>(result) << std::endl;
+            }
+
+            amrex::ParallelFor(37,
+                [=] AMREX_GPU_DEVICE (int i) noexcept {
+                    coeffs_ptr[i] = 0;
+                });
 
+
+            {HIPACE_PROFILE("BoundaryCutoffDeviceReduceSumControl()");
             amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
                 [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
                 {
                     amrex::Real x = (i * dx + poff_x) * scale;
                     amrex::Real y = (j * dy + poff_y) * scale;
+                    if (x*x + y*y > cutoff_sq) return;
                     amrex::Real s_v = arr_staging_area(i, j, lo2);
                     auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
 
@@ -545,16 +716,19 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
                 }
             );
             amrex::Gpu::Device::synchronize();
+            std::cout << "MControl: " << m_multipole_coeffs[0] << std::endl;
+            }
 
-            std::cout << "Mcoeff: " << m_multipole_coeffs[0] << " " << m_multipole_coeffs[1] << " " << m_multipole_coeffs[2];
-            std::cout << std::endl;
 
+
+
+            {HIPACE_PROFILE("BoundarySetDirichlet()");
             SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
                 [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
                 {
                     return dx*dy*GetFieldMultipole(coeffs_ptr, x*scale, y*scale);
                 }
-            );
+            );}
         }
 
     } else if (lev == 1) {

From 244b2ed6a589651cdff2c65a6e311bd964a3feda Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 3 Feb 2022 07:38:09 +0100
Subject: [PATCH 34/52] ugly mess

---
 src/fields/Fields.cpp     | 339 ++++++++++++++------------------------
 src/fields/OpenBoundary.H |   4 +-
 2 files changed, 130 insertions(+), 213 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 0259cfc3c9..1fe61a190c 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -511,226 +511,143 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
     using namespace amrex::literals;
     if (lev == 0 && m_open_boundary) {
-        m_multipole_coeffs.resize(37);
-
-        amrex::MFIter::allowMultipleMFIters(true);
 
         amrex::MultiFab staging_area = getStagingArea(lev);
-        for (amrex::MFIter mfi(staging_area, false); mfi.isValid(); ++mfi)
-        {
-            const auto arr_staging_area = staging_area.array(mfi);
-            const amrex::Box staging_box = staging_area[mfi].box();
-            const int lo2 = staging_box.smallEnd(2);
-            amrex::Real * coeffs_ptr = m_multipole_coeffs.data();
-
-            const amrex::Real poff_x = GetPosOffset(0, geom[lev], staging_box);
-            const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
-            const amrex::Real dx = geom[lev].CellSize(0);
-            const amrex::Real dy = geom[lev].CellSize(1);
-            const amrex::Real scale = 3._rt/std::sqrt(pow<2>(geom[lev].ProbLength(0)) +
-                                                      pow<2>(geom[lev].ProbLength(1)));
-
-            const amrex::Real radius = amrex::min(geom[lev].ProbLo(0), geom[lev].ProbHi(0),
-                                                  geom[lev].ProbLo(1), geom[lev].ProbHi(1));
-            const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale) * 100;
-
-            amrex::ParallelFor(37,
-                [=] AMREX_GPU_DEVICE (int i) noexcept {
-                    coeffs_ptr[i] = 0;
-                });
-
-            {HIPACE_PROFILE("BoundaryCutoffDeviceReduceSum()");
-            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
-                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
-                {
-                    amrex::Real x = (i * dx + poff_x) * scale;
-                    amrex::Real y = (j * dy + poff_y) * scale;
-                    if (x*x + y*y > cutoff_sq) return;
-                    amrex::Real s_v = arr_staging_area(i, j, lo2);
-                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
-
-                    for (int n=0; n<37; ++n) {
-                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
-                    }
-                }
-            );
-            amrex::Gpu::Device::synchronize();
-            std::cout << "MReduceSum: " << m_multipole_coeffs[0] << std::endl;
-            }
-
-            amrex::ParallelFor(37,
-                [=] AMREX_GPU_DEVICE (int i) noexcept {
-                    coeffs_ptr[i] = 0;
-                });
-
-            {HIPACE_PROFILE("BoundaryNoCutoffDeviceReduceSum()");
-            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
-                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
-                {
-                    amrex::Real x = (i * dx + poff_x) * scale;
-                    amrex::Real y = (j * dy + poff_y) * scale;
-                    amrex::Real s_v = arr_staging_area(i, j, lo2);
-                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
-
-                    for (int n=0; n<37; ++n) {
-                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
-                    }
-                }
-            );
-            amrex::Gpu::Device::synchronize();
-            std::cout << "MReduceSum: " << m_multipole_coeffs[0] << std::endl;
-            }
-
-            amrex::ParallelFor(37,
-                [=] AMREX_GPU_DEVICE (int i) noexcept {
-                    coeffs_ptr[i] = 0;
-                });
-
-            {HIPACE_PROFILE("BoundaryCutoffParReduce()");
-            auto result =
-            amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum>{},
-                             amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real>{},
-                             staging_area, m_poisson_nguards,
-                [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
-                {
-                    const amrex::Real x = (i * dx + poff_x) * scale;
-                    const amrex::Real y = (j * dy + poff_y) * scale;
-                    amrex::Real s_v = arr_staging_area(i, j, k);
-                    if (x*x + y*y > cutoff_sq) s_v = 0._rt;
-                    return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real>>(s_v, x, y);
-                }
-            );
-            amrex::Gpu::Device::synchronize();
-            std::cout << "MParReduce: " << amrex::get<0>(result) << std::endl;
-            }
+        amrex::FArrayBox& staging_area_fab = staging_area[0];
 
+        const auto arr_staging_area = staging_area_fab.array();
+        const amrex::Box staging_box = staging_area_fab.box();
 
-            {HIPACE_PROFILE("BoundaryNoCutoffParReduce()");
-            auto result =
-            amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                             amrex::ReduceOpSum>{},
-                             amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real, amrex::Real, amrex::Real,
-                                             amrex::Real>{},
-                             staging_area, m_poisson_nguards,
-                [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
-                {
-                    const amrex::Real x = (i * dx + poff_x) * scale;
-                    const amrex::Real y = (j * dy + poff_y) * scale;
-                    amrex::Real s_v = arr_staging_area(i, j, k);
-                    return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real, amrex::Real, amrex::Real,
-                                                              amrex::Real>>(s_v, x, y);
-                }
-            );
-            amrex::Gpu::Device::synchronize();
-            std::cout << "MParReduce: " << amrex::get<0>(result) << std::endl;
+        const amrex::Real poff_x = GetPosOffset(0, geom[lev], staging_box);
+        const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
+        const amrex::Real dx = geom[lev].CellSize(0);
+        const amrex::Real dy = geom[lev].CellSize(1);
+        const amrex::Real scale = 3._rt/std::sqrt(pow<2>(geom[lev].ProbLength(0)) +
+                                                  pow<2>(geom[lev].ProbLength(1)));
+
+        const amrex::Real radius = amrex::min(geom[lev].ProbLo(0), geom[lev].ProbHi(0),
+                                              geom[lev].ProbLo(1), geom[lev].ProbHi(1));
+        const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale) * 100;
+
+        amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real, amrex::Real, amrex::Real,
+                        amrex::Real> coeff_tuple{};
+        {HIPACE_PROFILE("Boundary::ParReduce()");
+        coeff_tuple =
+        amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+                                         amrex::ReduceOpSum>{},
+                         amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real, amrex::Real, amrex::Real,
+                                         amrex::Real>{},
+                         staging_area, m_poisson_nguards,
+            [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
+            {
+                const amrex::Real x = (i * dx + poff_x) * scale;
+                const amrex::Real y = (j * dy + poff_y) * scale;
+                amrex::Real s_v = arr_staging_area(i, j, k);
+                if (x*x + y*y > cutoff_sq) s_v = 0._rt;
+                return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real, amrex::Real, amrex::Real,
+                                                          amrex::Real>>(s_v, x, y);
             }
+        );}
+
+        amrex::GpuArray<amrex::Real, 37> coeff_array{};
+
+        coeff_array[0] = amrex::get<0>(coeff_tuple);
+        coeff_array[1] = amrex::get<1>(coeff_tuple);
+        coeff_array[2] = amrex::get<2>(coeff_tuple);
+        coeff_array[3] = amrex::get<3>(coeff_tuple);
+        coeff_array[4] = amrex::get<4>(coeff_tuple);
+        coeff_array[5] = amrex::get<5>(coeff_tuple);
+        coeff_array[6] = amrex::get<6>(coeff_tuple);
+        coeff_array[7] = amrex::get<7>(coeff_tuple);
+        coeff_array[8] = amrex::get<8>(coeff_tuple);
+        coeff_array[9] = amrex::get<9>(coeff_tuple);
+        coeff_array[10] = amrex::get<10>(coeff_tuple);
+        coeff_array[11] = amrex::get<11>(coeff_tuple);
+        coeff_array[12] = amrex::get<12>(coeff_tuple);
+        coeff_array[13] = amrex::get<13>(coeff_tuple);
+        coeff_array[14] = amrex::get<14>(coeff_tuple);
+        coeff_array[15] = amrex::get<15>(coeff_tuple);
+        coeff_array[16] = amrex::get<16>(coeff_tuple);
+        coeff_array[17] = amrex::get<17>(coeff_tuple);
+        coeff_array[18] = amrex::get<18>(coeff_tuple);
+        coeff_array[19] = amrex::get<19>(coeff_tuple);
+        coeff_array[20] = amrex::get<20>(coeff_tuple);
+        coeff_array[21] = amrex::get<21>(coeff_tuple);
+        coeff_array[22] = amrex::get<22>(coeff_tuple);
+        coeff_array[23] = amrex::get<23>(coeff_tuple);
+        coeff_array[24] = amrex::get<24>(coeff_tuple);
+        coeff_array[25] = amrex::get<25>(coeff_tuple);
+        coeff_array[26] = amrex::get<26>(coeff_tuple);
+        coeff_array[27] = amrex::get<27>(coeff_tuple);
+        coeff_array[28] = amrex::get<28>(coeff_tuple);
+        coeff_array[29] = amrex::get<29>(coeff_tuple);
+        coeff_array[30] = amrex::get<30>(coeff_tuple);
+        coeff_array[31] = amrex::get<31>(coeff_tuple);
+        coeff_array[32] = amrex::get<32>(coeff_tuple);
+        coeff_array[33] = amrex::get<33>(coeff_tuple);
+        coeff_array[34] = amrex::get<34>(coeff_tuple);
+        coeff_array[35] = amrex::get<35>(coeff_tuple);
+        coeff_array[36] = amrex::get<36>(coeff_tuple);
+
+        std::cout << "Mcoeff:";
+        for (amrex::Real num : coeff_array) {
+            std::cout << " " << num;
+        }
+        std::cout << std::endl;
 
-            amrex::ParallelFor(37,
-                [=] AMREX_GPU_DEVICE (int i) noexcept {
-                    coeffs_ptr[i] = 0;
-                });
-
-
-            {HIPACE_PROFILE("BoundaryCutoffDeviceReduceSumControl()");
-            amrex::ParallelFor(amrex::Gpu::KernelInfo().setReduction(true), staging_box,
-                [=] AMREX_GPU_DEVICE (int i, int j, int, amrex::Gpu::Handler const& handler) noexcept
-                {
-                    amrex::Real x = (i * dx + poff_x) * scale;
-                    amrex::Real y = (j * dy + poff_y) * scale;
-                    if (x*x + y*y > cutoff_sq) return;
-                    amrex::Real s_v = arr_staging_area(i, j, lo2);
-                    auto coeffs = GetMultipoleCoeffs<amrex::GpuArray<amrex::Real, 37>>(s_v, x, y);
-
-                    for (int n=0; n<37; ++n) {
-                        amrex::Gpu::deviceReduceSum(coeffs_ptr + n, coeffs[n], handler);
-                    }
-                }
-            );
-            amrex::Gpu::Device::synchronize();
-            std::cout << "MControl: " << m_multipole_coeffs[0] << std::endl;
+        {HIPACE_PROFILE("Boundary::Dirichlet()");
+        SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
+            [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
+            {
+                return dx*dy*GetFieldMultipole(coeff_array, x*scale, y*scale);
             }
+        );}
 
 
-
-
-            {HIPACE_PROFILE("BoundarySetDirichlet()");
-            SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
-                [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
-                {
-                    return dx*dy*GetFieldMultipole(coeffs_ptr, x*scale, y*scale);
-                }
-            );}
-        }
-
     } else if (lev == 1) {
         constexpr int interp_order = 2;
 
diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H
index 42d9a2d616..3fa355a84d 100644
--- a/src/fields/OpenBoundary.H
+++ b/src/fields/OpenBoundary.H
@@ -64,8 +64,8 @@ T GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
     };
 }
 
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-amrex::Real GetFieldMultipole (amrex::Real* m_c, amrex::Real x_domain, amrex::Real y_domain)
+template<class T> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real GetFieldMultipole (T m_c, amrex::Real x_domain, amrex::Real y_domain)
 {
     using namespace amrex::literals;
     amrex::Real radius_2 = pow<2>(x_domain) + pow<2>(y_domain);

From a2936da52ac6cd32b351ade8b8857752765d2029 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 9 Feb 2022 00:35:10 +0100
Subject: [PATCH 35/52] clean up

---
 src/fields/Fields.cpp     | 158 ++++++++------------------------------
 src/fields/OpenBoundary.H | 143 ++++++++++++++++++++++------------
 2 files changed, 124 insertions(+), 177 deletions(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 4e35670489..84fb1cb2a5 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -521,131 +521,39 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
         const amrex::Real dx = geom[lev].CellSize(0);
         const amrex::Real dy = geom[lev].CellSize(1);
-        const amrex::Real scale = 3._rt/std::sqrt(pow<2>(geom[lev].ProbLength(0)) +
-                                                  pow<2>(geom[lev].ProbLength(1)));
-
-        const amrex::Real radius = amrex::min(geom[lev].ProbLo(0), geom[lev].ProbHi(0),
-                                              geom[lev].ProbLo(1), geom[lev].ProbHi(1));
-        const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale) * 100;
-
-        amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real, amrex::Real, amrex::Real,
-                        amrex::Real> coeff_tuple{};
-        {HIPACE_PROFILE("Boundary::ParReduce()");
-        coeff_tuple =
-        amrex::ParReduce(amrex::TypeList<amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
-                                         amrex::ReduceOpSum>{},
-                         amrex::TypeList<amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real, amrex::Real, amrex::Real,
-                                         amrex::Real>{},
-                         staging_area, m_poisson_nguards,
-            [=] AMREX_GPU_DEVICE (int, int i, int j, int k) noexcept
-            {
-                const amrex::Real x = (i * dx + poff_x) * scale;
-                const amrex::Real y = (j * dy + poff_y) * scale;
-                amrex::Real s_v = arr_staging_area(i, j, k);
-                if (x*x + y*y > cutoff_sq) s_v = 0._rt;
-                return GetMultipoleCoeffs<amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real, amrex::Real, amrex::Real,
-                                                          amrex::Real>>(s_v, x, y);
-            }
-        );}
-
-        amrex::GpuArray<amrex::Real, 37> coeff_array{};
-
-        coeff_array[0] = amrex::get<0>(coeff_tuple);
-        coeff_array[1] = amrex::get<1>(coeff_tuple);
-        coeff_array[2] = amrex::get<2>(coeff_tuple);
-        coeff_array[3] = amrex::get<3>(coeff_tuple);
-        coeff_array[4] = amrex::get<4>(coeff_tuple);
-        coeff_array[5] = amrex::get<5>(coeff_tuple);
-        coeff_array[6] = amrex::get<6>(coeff_tuple);
-        coeff_array[7] = amrex::get<7>(coeff_tuple);
-        coeff_array[8] = amrex::get<8>(coeff_tuple);
-        coeff_array[9] = amrex::get<9>(coeff_tuple);
-        coeff_array[10] = amrex::get<10>(coeff_tuple);
-        coeff_array[11] = amrex::get<11>(coeff_tuple);
-        coeff_array[12] = amrex::get<12>(coeff_tuple);
-        coeff_array[13] = amrex::get<13>(coeff_tuple);
-        coeff_array[14] = amrex::get<14>(coeff_tuple);
-        coeff_array[15] = amrex::get<15>(coeff_tuple);
-        coeff_array[16] = amrex::get<16>(coeff_tuple);
-        coeff_array[17] = amrex::get<17>(coeff_tuple);
-        coeff_array[18] = amrex::get<18>(coeff_tuple);
-        coeff_array[19] = amrex::get<19>(coeff_tuple);
-        coeff_array[20] = amrex::get<20>(coeff_tuple);
-        coeff_array[21] = amrex::get<21>(coeff_tuple);
-        coeff_array[22] = amrex::get<22>(coeff_tuple);
-        coeff_array[23] = amrex::get<23>(coeff_tuple);
-        coeff_array[24] = amrex::get<24>(coeff_tuple);
-        coeff_array[25] = amrex::get<25>(coeff_tuple);
-        coeff_array[26] = amrex::get<26>(coeff_tuple);
-        coeff_array[27] = amrex::get<27>(coeff_tuple);
-        coeff_array[28] = amrex::get<28>(coeff_tuple);
-        coeff_array[29] = amrex::get<29>(coeff_tuple);
-        coeff_array[30] = amrex::get<30>(coeff_tuple);
-        coeff_array[31] = amrex::get<31>(coeff_tuple);
-        coeff_array[32] = amrex::get<32>(coeff_tuple);
-        coeff_array[33] = amrex::get<33>(coeff_tuple);
-        coeff_array[34] = amrex::get<34>(coeff_tuple);
-        coeff_array[35] = amrex::get<35>(coeff_tuple);
-        coeff_array[36] = amrex::get<36>(coeff_tuple);
-
-        std::cout << "Mcoeff:";
-        for (amrex::Real num : coeff_array) {
-            std::cout << " " << num;
+        const amrex::Real scale = 3._rt/std::sqrt(
+            pow<2>(geom[lev].ProbLength(0)) + pow<2>(geom[lev].ProbLength(1)));
+        const amrex::Real radius = amrex::min(
+            std::abs(geom[lev].ProbLo(0)), std::abs(geom[lev].ProbHi(0)),
+            std::abs(geom[lev].ProbLo(1)), std::abs(geom[lev].ProbHi(1)));
+        const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale);
+
+        MultipoleTuple coeff_tuple{};
+        {
+            HIPACE_PROFILE("Boundary::ParReduce()");
+            coeff_tuple =
+            amrex::ParReduce(MultipoleReduceOpList{}, MultipoleReduceTypeList{},
+                             staging_area, m_poisson_nguards,
+                [=] AMREX_GPU_DEVICE (int /*box_num*/, int i, int j, int k) noexcept
+                {
+                    const amrex::Real x = (i * dx + poff_x) * scale;
+                    const amrex::Real y = (j * dy + poff_y) * scale;
+                    amrex::Real s_v = arr_staging_area(i, j, k);
+                    if (x*x + y*y > cutoff_sq) s_v = 0._rt;
+                    return GetMultipoleCoeffs(s_v, x, y);
+                }
+            );
         }
-        std::cout << std::endl;
-
-        {HIPACE_PROFILE("Boundary::Dirichlet()");
-        SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
-            [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
-            {
-                return dx*dy*GetFieldMultipole(coeff_array, x*scale, y*scale);
-            }
-        );}
 
+        {
+            HIPACE_PROFILE("Boundary::SetDirichlet()");
+            SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
+                [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
+                {
+                    return dx*dy*GetFieldMultipole(coeff_tuple, x*scale, y*scale);
+                }
+            );
+        }
 
     } else if (lev == 1) {
         constexpr int interp_order = 2;
@@ -771,11 +679,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
         // number of ghost cells where ExmBy and EypBx are calculated is 0 for now
-<<<<<<< HEAD
         const amrex::Box bx = mfi.growntilebox(m_exmby_eypbx_grow);
-=======
-        const amrex::Box bx = mfi.growntilebox(amrex::IntVect{0, 0, 0});
->>>>>>> development
         const amrex::Real dx_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::y));
 
diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H
index 3fa355a84d..32ee084ddf 100644
--- a/src/fields/OpenBoundary.H
+++ b/src/fields/OpenBoundary.H
@@ -6,21 +6,64 @@
 
 template<unsigned int exp> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 amrex::Real pow (amrex::Real base) {
-    return pow<exp-1>(base) * base;
+    using namespace amrex::literals;
+    if constexpr (exp==0) {
+        return 1._rt;
+    } else if constexpr (exp==1) {
+        return base;
+    } else {
+        return pow<exp-1>(base) * base;
+    }
+    return 0._rt; //shut up compiler
 }
 
-template<> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-amrex::Real pow<0> (amrex::Real base) {
-    return 1;
-}
+using MultipoleTuple = amrex::GpuTuple<
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real>;
 
-template<> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-amrex::Real pow<1> (amrex::Real base) {
-    return base;
-}
+using MultipoleReduceOpList = amrex::TypeList<
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum,
+    amrex::ReduceOpSum>;
+
+using MultipoleReduceTypeList = amrex::TypeList<
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real, amrex::Real, amrex::Real,
+    amrex::Real>;
 
-template<class T> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-T GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+MultipoleTuple GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
 {
     using namespace amrex::literals;
     return {
@@ -64,51 +107,51 @@ T GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
     };
 }
 
-template<class T> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-amrex::Real GetFieldMultipole (T m_c, amrex::Real x_domain, amrex::Real y_domain)
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+amrex::Real GetFieldMultipole (MultipoleTuple m_c, amrex::Real x_domain, amrex::Real y_domain)
 {
     using namespace amrex::literals;
     amrex::Real radius_2 = pow<2>(x_domain) + pow<2>(y_domain);
     amrex::Real x = x_domain / radius_2;
     amrex::Real y = y_domain / radius_2;
     return
-      m_c[0] * (std::log(radius_2))
-    + m_c[1] * (-2*x)
-    + m_c[2] * (-2*y)
-    + m_c[3] * (pow<2>(x) - pow<2>(y))
-    + m_c[4] * (-4*x*y)
-    + m_c[5] * (-2.0_rt/3.0_rt*pow<3>(x) + 2*x*pow<2>(y))
-    + m_c[6] * (2*pow<2>(x)*y - 2.0_rt/3.0_rt*pow<3>(y))
-    + m_c[7] * (-1.0_rt/2.0_rt*pow<4>(x) + 3*pow<2>(x)*pow<2>(y) - 1.0_rt/2.0_rt*pow<4>(y))
-    + m_c[8] * (8*pow<3>(x)*y - 8*x*pow<3>(y))
-    + m_c[9] * (-2.0_rt/5.0_rt*pow<5>(x) + 4*pow<3>(x)*pow<2>(y) - 2*x*pow<4>(y))
-    + m_c[10] * (-2*pow<4>(x)*y + 4*pow<2>(x)*pow<3>(y) - 2.0_rt/5.0_rt*pow<5>(y))
-    + m_c[11] * ((1.0_rt/3.0_rt)*pow<6>(x) - 5*pow<4>(x)*pow<2>(y) + 5*pow<2>(x)*pow<4>(y) - 1.0_rt/3.0_rt*pow<6>(y))
-    + m_c[12] * (-12*pow<5>(x)*y + 40*pow<3>(x)*pow<3>(y) - 12*x*pow<5>(y))
-    + m_c[13] * (-2.0_rt/7.0_rt*pow<7>(x) + 6*pow<5>(x)*pow<2>(y) - 10*pow<3>(x)*pow<4>(y) + 2*x*pow<6>(y))
-    + m_c[14] * (2*pow<6>(x)*y - 10*pow<4>(x)*pow<3>(y) + 6*pow<2>(x)*pow<5>(y) - 2.0_rt/7.0_rt*pow<7>(y))
-    + m_c[15] * (-1.0_rt/4.0_rt*pow<8>(x) + 7*pow<6>(x)*pow<2>(y) - 35.0_rt/2.0_rt*pow<4>(x)*pow<4>(y) + 7*pow<2>(x)*pow<6>(y) - 1.0_rt/4.0_rt*pow<8>(y))
-    + m_c[16] * (16*pow<7>(x)*y - 112*pow<5>(x)*pow<3>(y) + 112*pow<3>(x)*pow<5>(y) - 16*x*pow<7>(y))
-    + m_c[17] * (-2.0_rt/9.0_rt*pow<9>(x) + 8*pow<7>(x)*pow<2>(y) - 28*pow<5>(x)*pow<4>(y) + (56.0_rt/3.0_rt)*pow<3>(x)*pow<6>(y) - 2*x*pow<8>(y))
-    + m_c[18] * (-2*pow<8>(x)*y + (56.0_rt/3.0_rt)*pow<6>(x)*pow<3>(y) - 28*pow<4>(x)*pow<5>(y) + 8*pow<2>(x)*pow<7>(y) - 2.0_rt/9.0_rt*pow<9>(y))
-    + m_c[19] * ((1.0_rt/5.0_rt)*pow<10>(x) - 9*pow<8>(x)*pow<2>(y) + 42*pow<6>(x)*pow<4>(y) - 42*pow<4>(x)*pow<6>(y) + 9*pow<2>(x)*pow<8>(y) - 1.0_rt/5.0_rt*pow<10>(y))
-    + m_c[20] * (-20*pow<9>(x)*y + 240*pow<7>(x)*pow<3>(y) - 504*pow<5>(x)*pow<5>(y) + 240*pow<3>(x)*pow<7>(y) - 20*x*pow<9>(y))
-    + m_c[21] * (-2.0_rt/11.0_rt*pow<11>(x) + 10*pow<9>(x)*pow<2>(y) - 60*pow<7>(x)*pow<4>(y) + 84*pow<5>(x)*pow<6>(y) - 30*pow<3>(x)*pow<8>(y) + 2*x*pow<10>(y))
-    + m_c[22] * (2*pow<10>(x)*y - 30*pow<8>(x)*pow<3>(y) + 84*pow<6>(x)*pow<5>(y) - 60*pow<4>(x)*pow<7>(y) + 10*pow<2>(x)*pow<9>(y) - 2.0_rt/11.0_rt*pow<11>(y))
-    + m_c[23] * (-1.0_rt/6.0_rt*pow<12>(x) + 11*pow<10>(x)*pow<2>(y) - 165.0_rt/2.0_rt*pow<8>(x)*pow<4>(y) + 154*pow<6>(x)*pow<6>(y) - 165.0_rt/2.0_rt*pow<4>(x)*pow<8>(y) + 11*pow<2>(x)*pow<10>(y) - 1.0_rt/6.0_rt*pow<12>(y))
-    + m_c[24] * (24*pow<11>(x)*y - 440*pow<9>(x)*pow<3>(y) + 1584*pow<7>(x)*pow<5>(y) - 1584*pow<5>(x)*pow<7>(y) + 440*pow<3>(x)*pow<9>(y) - 24*x*pow<11>(y))
-    + m_c[25] * (-2.0_rt/13.0_rt*pow<13>(x) + 12*pow<11>(x)*pow<2>(y) - 110*pow<9>(x)*pow<4>(y) + 264*pow<7>(x)*pow<6>(y) - 198*pow<5>(x)*pow<8>(y) + 44*pow<3>(x)*pow<10>(y) - 2*x*pow<12>(y))
-    + m_c[26] * (-2*pow<12>(x)*y + 44*pow<10>(x)*pow<3>(y) - 198*pow<8>(x)*pow<5>(y) + 264*pow<6>(x)*pow<7>(y) - 110*pow<4>(x)*pow<9>(y) + 12*pow<2>(x)*pow<11>(y) - 2.0_rt/13.0_rt*pow<13>(y))
-    + m_c[27] * ((1.0_rt/7.0_rt)*pow<14>(x) - 13*pow<12>(x)*pow<2>(y) + 143*pow<10>(x)*pow<4>(y) - 429*pow<8>(x)*pow<6>(y) + 429*pow<6>(x)*pow<8>(y) - 143*pow<4>(x)*pow<10>(y) + 13*pow<2>(x)*pow<12>(y) - 1.0_rt/7.0_rt*pow<14>(y))
-    + m_c[28] * (-28*pow<13>(x)*y + 728*pow<11>(x)*pow<3>(y) - 4004*pow<9>(x)*pow<5>(y) + 6864*pow<7>(x)*pow<7>(y) - 4004*pow<5>(x)*pow<9>(y) + 728*pow<3>(x)*pow<11>(y) - 28*x*pow<13>(y))
-    + m_c[29] * (-2.0_rt/15.0_rt*pow<15>(x) + 14*pow<13>(x)*pow<2>(y) - 182*pow<11>(x)*pow<4>(y) + (2002.0_rt/3.0_rt)*pow<9>(x)*pow<6>(y) - 858*pow<7>(x)*pow<8>(y) + (2002.0_rt/5.0_rt)*pow<5>(x)*pow<10>(y) - 182.0_rt/3.0_rt*pow<3>(x)*pow<12>(y) + 2*x*pow<14>(y))
-    + m_c[30] * (2*pow<14>(x)*y - 182.0_rt/3.0_rt*pow<12>(x)*pow<3>(y) + (2002.0_rt/5.0_rt)*pow<10>(x)*pow<5>(y) - 858*pow<8>(x)*pow<7>(y) + (2002.0_rt/3.0_rt)*pow<6>(x)*pow<9>(y) - 182*pow<4>(x)*pow<11>(y) + 14*pow<2>(x)*pow<13>(y) - 2.0_rt/15.0_rt*pow<15>(y))
-    + m_c[31] * (-1.0_rt/8.0_rt*pow<16>(x) + 15*pow<14>(x)*pow<2>(y) - 455.0_rt/2.0_rt*pow<12>(x)*pow<4>(y) + 1001*pow<10>(x)*pow<6>(y) - 6435.0_rt/4.0_rt*pow<8>(x)*pow<8>(y) + 1001*pow<6>(x)*pow<10>(y) - 455.0_rt/2.0_rt*pow<4>(x)*pow<12>(y) + 15*pow<2>(x)*pow<14>(y) - 1.0_rt/8.0_rt*pow<16>(y))
-    + m_c[32] * (32*pow<15>(x)*y - 1120*pow<13>(x)*pow<3>(y) + 8736*pow<11>(x)*pow<5>(y) - 22880*pow<9>(x)*pow<7>(y) + 22880*pow<7>(x)*pow<9>(y) - 8736*pow<5>(x)*pow<11>(y) + 1120*pow<3>(x)*pow<13>(y) - 32*x*pow<15>(y))
-    + m_c[33] * (-2.0_rt/17.0_rt*pow<17>(x) + 16*pow<15>(x)*pow<2>(y) - 280*pow<13>(x)*pow<4>(y) + 1456*pow<11>(x)*pow<6>(y) - 2860*pow<9>(x)*pow<8>(y) + 2288*pow<7>(x)*pow<10>(y) - 728*pow<5>(x)*pow<12>(y) + 80*pow<3>(x)*pow<14>(y) - 2*x*pow<16>(y))
-    + m_c[34] * (-2*pow<16>(x)*y + 80*pow<14>(x)*pow<3>(y) - 728*pow<12>(x)*pow<5>(y) + 2288*pow<10>(x)*pow<7>(y) - 2860*pow<8>(x)*pow<9>(y) + 1456*pow<6>(x)*pow<11>(y) - 280*pow<4>(x)*pow<13>(y) + 16*pow<2>(x)*pow<15>(y) - 2.0_rt/17.0_rt*pow<17>(y))
-    + m_c[35] * ((1.0_rt/9.0_rt)*pow<18>(x) - 17*pow<16>(x)*pow<2>(y) + 340*pow<14>(x)*pow<4>(y) - 6188.0_rt/3.0_rt*pow<12>(x)*pow<6>(y) + 4862*pow<10>(x)*pow<8>(y) - 4862*pow<8>(x)*pow<10>(y) + (6188.0_rt/3.0_rt)*pow<6>(x)*pow<12>(y) - 340*pow<4>(x)*pow<14>(y) + 17*pow<2>(x)*pow<16>(y) - 1.0_rt/9.0_rt*pow<18>(y))
-    + m_c[36] * (-36*pow<17>(x)*y + 1632*pow<15>(x)*pow<3>(y) - 17136*pow<13>(x)*pow<5>(y) + 63648*pow<11>(x)*pow<7>(y) - 97240*pow<9>(x)*pow<9>(y) + 63648*pow<7>(x)*pow<11>(y) - 17136*pow<5>(x)*pow<13>(y) + 1632*pow<3>(x)*pow<15>(y) - 36*x*pow<17>(y))
+      amrex::get<0>(m_c) * (std::log(radius_2))
+    + amrex::get<1>(m_c) * (-2*x)
+    + amrex::get<2>(m_c) * (-2*y)
+    + amrex::get<3>(m_c) * (pow<2>(x) - pow<2>(y))
+    + amrex::get<4>(m_c) * (-4*x*y)
+    + amrex::get<5>(m_c) * (-2.0_rt/3.0_rt*pow<3>(x) + 2*x*pow<2>(y))
+    + amrex::get<6>(m_c) * (2*pow<2>(x)*y - 2.0_rt/3.0_rt*pow<3>(y))
+    + amrex::get<7>(m_c) * (-1.0_rt/2.0_rt*pow<4>(x) + 3*pow<2>(x)*pow<2>(y) - 1.0_rt/2.0_rt*pow<4>(y))
+    + amrex::get<8>(m_c) * (8*pow<3>(x)*y - 8*x*pow<3>(y))
+    + amrex::get<9>(m_c) * (-2.0_rt/5.0_rt*pow<5>(x) + 4*pow<3>(x)*pow<2>(y) - 2*x*pow<4>(y))
+    + amrex::get<10>(m_c) * (-2*pow<4>(x)*y + 4*pow<2>(x)*pow<3>(y) - 2.0_rt/5.0_rt*pow<5>(y))
+    + amrex::get<11>(m_c) * ((1.0_rt/3.0_rt)*pow<6>(x) - 5*pow<4>(x)*pow<2>(y) + 5*pow<2>(x)*pow<4>(y) - 1.0_rt/3.0_rt*pow<6>(y))
+    + amrex::get<12>(m_c) * (-12*pow<5>(x)*y + 40*pow<3>(x)*pow<3>(y) - 12*x*pow<5>(y))
+    + amrex::get<13>(m_c) * (-2.0_rt/7.0_rt*pow<7>(x) + 6*pow<5>(x)*pow<2>(y) - 10*pow<3>(x)*pow<4>(y) + 2*x*pow<6>(y))
+    + amrex::get<14>(m_c) * (2*pow<6>(x)*y - 10*pow<4>(x)*pow<3>(y) + 6*pow<2>(x)*pow<5>(y) - 2.0_rt/7.0_rt*pow<7>(y))
+    + amrex::get<15>(m_c) * (-1.0_rt/4.0_rt*pow<8>(x) + 7*pow<6>(x)*pow<2>(y) - 35.0_rt/2.0_rt*pow<4>(x)*pow<4>(y) + 7*pow<2>(x)*pow<6>(y) - 1.0_rt/4.0_rt*pow<8>(y))
+    + amrex::get<16>(m_c) * (16*pow<7>(x)*y - 112*pow<5>(x)*pow<3>(y) + 112*pow<3>(x)*pow<5>(y) - 16*x*pow<7>(y))
+    + amrex::get<17>(m_c) * (-2.0_rt/9.0_rt*pow<9>(x) + 8*pow<7>(x)*pow<2>(y) - 28*pow<5>(x)*pow<4>(y) + (56.0_rt/3.0_rt)*pow<3>(x)*pow<6>(y) - 2*x*pow<8>(y))
+    + amrex::get<18>(m_c) * (-2*pow<8>(x)*y + (56.0_rt/3.0_rt)*pow<6>(x)*pow<3>(y) - 28*pow<4>(x)*pow<5>(y) + 8*pow<2>(x)*pow<7>(y) - 2.0_rt/9.0_rt*pow<9>(y))
+    + amrex::get<19>(m_c) * ((1.0_rt/5.0_rt)*pow<10>(x) - 9*pow<8>(x)*pow<2>(y) + 42*pow<6>(x)*pow<4>(y) - 42*pow<4>(x)*pow<6>(y) + 9*pow<2>(x)*pow<8>(y) - 1.0_rt/5.0_rt*pow<10>(y))
+    + amrex::get<20>(m_c) * (-20*pow<9>(x)*y + 240*pow<7>(x)*pow<3>(y) - 504*pow<5>(x)*pow<5>(y) + 240*pow<3>(x)*pow<7>(y) - 20*x*pow<9>(y))
+    + amrex::get<21>(m_c) * (-2.0_rt/11.0_rt*pow<11>(x) + 10*pow<9>(x)*pow<2>(y) - 60*pow<7>(x)*pow<4>(y) + 84*pow<5>(x)*pow<6>(y) - 30*pow<3>(x)*pow<8>(y) + 2*x*pow<10>(y))
+    + amrex::get<22>(m_c) * (2*pow<10>(x)*y - 30*pow<8>(x)*pow<3>(y) + 84*pow<6>(x)*pow<5>(y) - 60*pow<4>(x)*pow<7>(y) + 10*pow<2>(x)*pow<9>(y) - 2.0_rt/11.0_rt*pow<11>(y))
+    + amrex::get<23>(m_c) * (-1.0_rt/6.0_rt*pow<12>(x) + 11*pow<10>(x)*pow<2>(y) - 165.0_rt/2.0_rt*pow<8>(x)*pow<4>(y) + 154*pow<6>(x)*pow<6>(y) - 165.0_rt/2.0_rt*pow<4>(x)*pow<8>(y) + 11*pow<2>(x)*pow<10>(y) - 1.0_rt/6.0_rt*pow<12>(y))
+    + amrex::get<24>(m_c) * (24*pow<11>(x)*y - 440*pow<9>(x)*pow<3>(y) + 1584*pow<7>(x)*pow<5>(y) - 1584*pow<5>(x)*pow<7>(y) + 440*pow<3>(x)*pow<9>(y) - 24*x*pow<11>(y))
+    + amrex::get<25>(m_c) * (-2.0_rt/13.0_rt*pow<13>(x) + 12*pow<11>(x)*pow<2>(y) - 110*pow<9>(x)*pow<4>(y) + 264*pow<7>(x)*pow<6>(y) - 198*pow<5>(x)*pow<8>(y) + 44*pow<3>(x)*pow<10>(y) - 2*x*pow<12>(y))
+    + amrex::get<26>(m_c) * (-2*pow<12>(x)*y + 44*pow<10>(x)*pow<3>(y) - 198*pow<8>(x)*pow<5>(y) + 264*pow<6>(x)*pow<7>(y) - 110*pow<4>(x)*pow<9>(y) + 12*pow<2>(x)*pow<11>(y) - 2.0_rt/13.0_rt*pow<13>(y))
+    + amrex::get<27>(m_c) * ((1.0_rt/7.0_rt)*pow<14>(x) - 13*pow<12>(x)*pow<2>(y) + 143*pow<10>(x)*pow<4>(y) - 429*pow<8>(x)*pow<6>(y) + 429*pow<6>(x)*pow<8>(y) - 143*pow<4>(x)*pow<10>(y) + 13*pow<2>(x)*pow<12>(y) - 1.0_rt/7.0_rt*pow<14>(y))
+    + amrex::get<28>(m_c) * (-28*pow<13>(x)*y + 728*pow<11>(x)*pow<3>(y) - 4004*pow<9>(x)*pow<5>(y) + 6864*pow<7>(x)*pow<7>(y) - 4004*pow<5>(x)*pow<9>(y) + 728*pow<3>(x)*pow<11>(y) - 28*x*pow<13>(y))
+    + amrex::get<29>(m_c) * (-2.0_rt/15.0_rt*pow<15>(x) + 14*pow<13>(x)*pow<2>(y) - 182*pow<11>(x)*pow<4>(y) + (2002.0_rt/3.0_rt)*pow<9>(x)*pow<6>(y) - 858*pow<7>(x)*pow<8>(y) + (2002.0_rt/5.0_rt)*pow<5>(x)*pow<10>(y) - 182.0_rt/3.0_rt*pow<3>(x)*pow<12>(y) + 2*x*pow<14>(y))
+    + amrex::get<30>(m_c) * (2*pow<14>(x)*y - 182.0_rt/3.0_rt*pow<12>(x)*pow<3>(y) + (2002.0_rt/5.0_rt)*pow<10>(x)*pow<5>(y) - 858*pow<8>(x)*pow<7>(y) + (2002.0_rt/3.0_rt)*pow<6>(x)*pow<9>(y) - 182*pow<4>(x)*pow<11>(y) + 14*pow<2>(x)*pow<13>(y) - 2.0_rt/15.0_rt*pow<15>(y))
+    + amrex::get<31>(m_c) * (-1.0_rt/8.0_rt*pow<16>(x) + 15*pow<14>(x)*pow<2>(y) - 455.0_rt/2.0_rt*pow<12>(x)*pow<4>(y) + 1001*pow<10>(x)*pow<6>(y) - 6435.0_rt/4.0_rt*pow<8>(x)*pow<8>(y) + 1001*pow<6>(x)*pow<10>(y) - 455.0_rt/2.0_rt*pow<4>(x)*pow<12>(y) + 15*pow<2>(x)*pow<14>(y) - 1.0_rt/8.0_rt*pow<16>(y))
+    + amrex::get<32>(m_c) * (32*pow<15>(x)*y - 1120*pow<13>(x)*pow<3>(y) + 8736*pow<11>(x)*pow<5>(y) - 22880*pow<9>(x)*pow<7>(y) + 22880*pow<7>(x)*pow<9>(y) - 8736*pow<5>(x)*pow<11>(y) + 1120*pow<3>(x)*pow<13>(y) - 32*x*pow<15>(y))
+    + amrex::get<33>(m_c) * (-2.0_rt/17.0_rt*pow<17>(x) + 16*pow<15>(x)*pow<2>(y) - 280*pow<13>(x)*pow<4>(y) + 1456*pow<11>(x)*pow<6>(y) - 2860*pow<9>(x)*pow<8>(y) + 2288*pow<7>(x)*pow<10>(y) - 728*pow<5>(x)*pow<12>(y) + 80*pow<3>(x)*pow<14>(y) - 2*x*pow<16>(y))
+    + amrex::get<34>(m_c) * (-2*pow<16>(x)*y + 80*pow<14>(x)*pow<3>(y) - 728*pow<12>(x)*pow<5>(y) + 2288*pow<10>(x)*pow<7>(y) - 2860*pow<8>(x)*pow<9>(y) + 1456*pow<6>(x)*pow<11>(y) - 280*pow<4>(x)*pow<13>(y) + 16*pow<2>(x)*pow<15>(y) - 2.0_rt/17.0_rt*pow<17>(y))
+    + amrex::get<35>(m_c) * ((1.0_rt/9.0_rt)*pow<18>(x) - 17*pow<16>(x)*pow<2>(y) + 340*pow<14>(x)*pow<4>(y) - 6188.0_rt/3.0_rt*pow<12>(x)*pow<6>(y) + 4862*pow<10>(x)*pow<8>(y) - 4862*pow<8>(x)*pow<10>(y) + (6188.0_rt/3.0_rt)*pow<6>(x)*pow<12>(y) - 340*pow<4>(x)*pow<14>(y) + 17*pow<2>(x)*pow<16>(y) - 1.0_rt/9.0_rt*pow<18>(y))
+    + amrex::get<36>(m_c) * (-36*pow<17>(x)*y + 1632*pow<15>(x)*pow<3>(y) - 17136*pow<13>(x)*pow<5>(y) + 63648*pow<11>(x)*pow<7>(y) - 97240*pow<9>(x)*pow<9>(y) + 63648*pow<7>(x)*pow<11>(y) - 17136*pow<5>(x)*pow<13>(y) + 1632*pow<3>(x)*pow<15>(y) - 36*x*pow<17>(y))
     ;
 }
 

From f2ee319d421e92c4bdb7bb42cc88abcc37f5a491 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Thu, 10 Feb 2022 07:55:15 +0100
Subject: [PATCH 36/52] add4py

---
 src/fields/Fields.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 84fb1cb2a5..6f7f770d11 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -527,6 +527,7 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             std::abs(geom[lev].ProbLo(0)), std::abs(geom[lev].ProbHi(0)),
             std::abs(geom[lev].ProbLo(1)), std::abs(geom[lev].ProbHi(1)));
         const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale);
+        const amrex::Real dxdy_div_4pi = dx*dy/(4._rt * MathConst::pi);
 
         MultipoleTuple coeff_tuple{};
         {
@@ -550,7 +551,7 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
                 [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
                 {
-                    return dx*dy*GetFieldMultipole(coeff_tuple, x*scale, y*scale);
+                    return dxdy_div_4pi*GetFieldMultipole(coeff_tuple, x*scale, y*scale);
                 }
             );
         }

From 8f4e3f9bd3c24ef71b30cb1ab9033a38a76bc44b Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 22 Feb 2022 05:36:44 +0100
Subject: [PATCH 37/52] make boundary more stable

---
 src/Hipace.cpp        | 10 ++++-----
 src/fields/Fields.H   | 12 ++++++-----
 src/fields/Fields.cpp | 49 +++++++++++++++++++++----------------------
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
index 31175d1b24..401ab56d2a 100644
--- a/src/Hipace.cpp
+++ b/src/Hipace.cpp
@@ -555,7 +555,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                  ijz == ijx+4 && ijz_beam == ijx+5 && irho == ijx+6 );
             amrex::MultiFab j_slice(m_fields.getSlices(lev, WhichSlice::This),
                                     amrex::make_alias, Comps[WhichSlice::This]["jx"], 7);
-            if (!Fields::m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
+            if (!m_fields.m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonExmByAndEypBx(Geom(), m_comm_xy, lev, islice);
 
@@ -565,7 +565,7 @@ Hipace::SolveOneSlice (int islice_coarse, const int ibox,
                                              WhichSlice::This);
             m_fields.AddBeamCurrents(lev, WhichSlice::This);
 
-            if (!Fields::m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
+            if (!m_fields.m_extended_solve) j_slice.FillBoundary(Geom(lev).periodicity());
 
             m_fields.SolvePoissonEz(Geom(), lev, islice);
             m_fields.SolvePoissonBz(Geom(), lev, islice);
@@ -857,7 +857,7 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
     /* Guess Bx and By */
     m_fields.InitialBfieldGuess(relative_Bfield_error, m_predcorr_B_error_tolerance, lev);
 
-    if (!Fields::m_extended_solve) {
+    if (!m_fields.m_extended_solve) {
         amrex::ParallelContext::push(m_comm_xy);
         // exchange ExmBy EypBx Ez Bx By Bz
         m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
@@ -923,7 +923,7 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
                                          ibox, m_do_beam_jx_jy_deposition, WhichSlice::Next);
         m_fields.AddBeamCurrents(lev, WhichSlice::Next);
 
-        if (!Fields::m_extended_solve) {
+        if (!m_fields.m_extended_solve) {
             amrex::ParallelContext::push(m_comm_xy);
             // need to exchange jx jy jx_beam jy_beam
             amrex::MultiFab j_slice_next(m_fields.getSlices(lev, WhichSlice::Next),
@@ -959,7 +959,7 @@ Hipace::PredictorCorrectorLoopToSolveBxBy (const int islice_local, const int lev
         jx_beam_next.setVal(0., m_fields.m_slices_nguards);
         jy_beam_next.setVal(0., m_fields.m_slices_nguards);
 
-        if (!Fields::m_extended_solve) {
+        if (!m_fields.m_extended_solve) {
             amrex::ParallelContext::push(m_comm_xy);
             // exchange Bx By
             m_fields.getSlices(lev, WhichSlice::This).FillBoundary(Geom(lev).periodicity());
diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 2b95ebba9a..091566442d 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -294,12 +294,8 @@ public:
     static amrex::IntVect m_slices_nguards;
     /** Number of guard cells for poisson solver MultiFab */
     static amrex::IntVect m_poisson_nguards;
-    /** Number of guard cells where ExmBy and EypBx are calculated */
-    static amrex::IntVect m_exmby_eypbx_grow;
     /** If the poisson solver should include the guard cells */
-    static bool m_extended_solve;
-    /** If lev_0 should be solved with open boundary conditions */
-    static bool m_open_boundary;
+    bool m_extended_solve = false;
 
 private:
     /** Vector over levels, array of 4 slices required to compute current slice */
@@ -310,6 +306,12 @@ private:
     amrex::Vector<amrex::FArrayBox> m_tmp_densities;
     /** Stores temporary values for z interpolation in Fields::Copy */
     amrex::Gpu::DeviceVector<amrex::Real> m_rel_z_vec;
+    /** Number of guard cells where ExmBy and EypBx are calculated */
+    amrex::IntVect m_exmby_eypbx_grow;
+    /** Number of guard cells where sources for poisson equation are included */
+    amrex::IntVect m_source_nguard;
+    /** If lev_0 should be solved with open boundary conditions */
+    bool m_open_boundary = false;
 };
 
 #endif
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 6f7f770d11..443019f4d2 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -11,9 +11,6 @@ using namespace amrex::literals;
 
 amrex::IntVect Fields::m_slices_nguards = {-1, -1, -1};
 amrex::IntVect Fields::m_poisson_nguards = {-1, -1, -1};
-amrex::IntVect Fields::m_exmby_eypbx_grow = {-1, -1, -1};
-bool Fields::m_extended_solve = false;
-bool Fields::m_open_boundary = false;
 
 Fields::Fields (Hipace const* a_hipace)
     : m_slices(a_hipace->maxLevel()+1)
@@ -39,6 +36,7 @@ Fields::AllocData (
         m_slices_nguards = {nguards_xy, nguards_xy, 0};
         m_poisson_nguards = m_slices_nguards;
         m_exmby_eypbx_grow = m_slices_nguards - amrex::IntVect{1, 1, 0};
+        m_source_nguard = -m_slices_nguards;
     } else {
         // Need 1 extra guard cell transversally for transverse derivative
         int nguards_xy = std::max(1, Hipace::m_depos_order_xy);
@@ -46,6 +44,7 @@ Fields::AllocData (
         // Poisson solver same size as domain, no ghost cells
         m_poisson_nguards = {0, 0, 0};
         m_exmby_eypbx_grow = {0, 0, 0};
+        m_source_nguard = {0, 0, 0};
     }
 
     for (int islice=0; islice<WhichSlice::N; islice++) {
@@ -97,19 +96,12 @@ struct derivative_inner {
     // captured variables for GPU
     amrex::Array4<amrex::Real const> array;
     amrex::Real dx_inv;
-    int box_lo;
-    int box_hi;
 
     // derivative of field in dir direction (x or y)
-    // the field is zero-extended such that this derivative can be accessed on the same box
     AMREX_GPU_DEVICE amrex::Real operator() (int i, int j, int k) const noexcept {
         constexpr bool is_x_dir = dir == Direction::x;
         constexpr bool is_y_dir = dir == Direction::y;
-        const int ij_along_dir = is_x_dir * i + is_y_dir * j;
-        const bool lo_guard = ij_along_dir != box_lo;
-        const bool hi_guard = ij_along_dir != box_hi;
-        return (array(i+is_x_dir*hi_guard,j+is_y_dir*hi_guard,k)*hi_guard
-               -array(i-is_x_dir*lo_guard,j-is_y_dir*lo_guard,k)*lo_guard) * dx_inv;
+        return (array(i+is_x_dir,j+is_y_dir,k) - array(i-is_x_dir,j-is_y_dir,k)) * dx_inv;
     }
 };
 
@@ -127,8 +119,7 @@ struct derivative_inner<Direction::z> {
     }
 };
 
-/** \brief derivative in x or y direction. Field is zero-extended by one cell such that this
- * derivative can be accessed on the same box as the field */
+/** \brief derivative in x or y direction */
 template<int dir>
 struct derivative {
     // use brace initialization as constructor
@@ -137,9 +128,7 @@ struct derivative {
 
     // use .array(mfi) like with amrex::MultiFab
     derivative_inner<dir> array (amrex::MFIter& mfi) const {
-        amrex::Box bx = f_view[mfi].box();
-        return derivative_inner<dir>{f_view.array(mfi),
-            1._rt/(2._rt*geom.CellSize(dir)), bx.smallEnd(dir), bx.bigEnd(dir)};
+        return derivative_inner<dir>{f_view.array(mfi), 1._rt/(2._rt*geom.CellSize(dir))};
     }
 };
 
@@ -293,10 +282,16 @@ LinCombination (const amrex::IntVect box_grow, amrex::MultiFab dst,
         const auto src_a_array = src_a.array(mfi);
         const auto src_b_array = src_b.array(mfi);
         const amrex::Box bx = mfi.growntilebox(box_grow);
-        amrex::ParallelFor(bx,
+        const int box_i_lo = bx.smallEnd(Direction::x);
+        const int box_j_lo = bx.smallEnd(Direction::y);
+        const int box_i_hi = bx.bigEnd(Direction::x);
+        const int box_j_hi = bx.bigEnd(Direction::y);
+        amrex::ParallelFor(mfi.growntilebox(),
             [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept
             {
-                dst_array(i,j,k) = factor_a * src_a_array(i,j,k) + factor_b * src_b_array(i,j,k);
+                const bool inside = box_i_lo<=i && i<=box_i_hi && box_j_lo<=j && j<=box_j_hi;
+                dst_array(i,j,k) =
+                    inside ? factor_a * src_a_array(i,j,k) + factor_b * src_b_array(i,j,k) : 0._rt;
             });
     }
 }
@@ -534,18 +529,22 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             HIPACE_PROFILE("Boundary::ParReduce()");
             coeff_tuple =
             amrex::ParReduce(MultipoleReduceOpList{}, MultipoleReduceTypeList{},
-                             staging_area, m_poisson_nguards,
+                             staging_area, m_source_nguard,
                 [=] AMREX_GPU_DEVICE (int /*box_num*/, int i, int j, int k) noexcept
                 {
                     const amrex::Real x = (i * dx + poff_x) * scale;
                     const amrex::Real y = (j * dy + poff_y) * scale;
                     amrex::Real s_v = arr_staging_area(i, j, k);
-                    if (x*x + y*y > cutoff_sq) s_v = 0._rt;
+                    if (x*x + y*y > cutoff_sq) return MultipoleTuple{}; //zero
                     return GetMultipoleCoeffs(s_v, x, y);
                 }
             );
         }
 
+        if (component == "Ez" || component == "Bz") {
+            amrex::get<0>(coeff_tuple) = 0._rt;
+        }
+
         {
             HIPACE_PROFILE("Boundary::SetDirichlet()");
             SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
@@ -651,7 +650,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
     InterpolateFromLev0toLev1(geom, lev, "rho", islice, m_poisson_nguards, -m_slices_nguards);
 
     // calculating the right-hand side 1/episilon0 * -(rho-Jz/c)
-    LinCombination(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_source_nguard, getStagingArea(lev),
                    1._rt/(phys_const.c*phys_const.ep0), getField(lev, WhichSlice::This, "jz"),
                    -1._rt/(phys_const.ep0), getField(lev, WhichSlice::This, "rho"));
 
@@ -708,7 +707,7 @@ Fields::SolvePoissonEz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute 1/(episilon0 *c0 )*(d_x(jx) + d_y(jy))
     // from the slice MF, and store in the staging area of poisson_solver
-    LinCombination(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_source_nguard, getStagingArea(lev),
                    1._rt/(phys_const.ep0*phys_const.c),
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    1._rt/(phys_const.ep0*phys_const.c),
@@ -732,7 +731,7 @@ Fields::SolvePoissonBx (amrex::MultiFab& Bx_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute -mu_0*d_y(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    LinCombination(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_source_nguard, getStagingArea(lev),
                    -phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    phys_const.mu0,
@@ -757,7 +756,7 @@ Fields::SolvePoissonBy (amrex::MultiFab& By_iter, amrex::Vector<amrex::Geometry>
 
     // Right-Hand Side for Poisson equation: compute mu_0*d_x(jz) from the slice MF,
     // and store in the staging area of poisson_solver
-    LinCombination(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_source_nguard, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::x>{getField(lev, WhichSlice::This, "jz"), geom[lev]},
                    -phys_const.mu0,
@@ -784,7 +783,7 @@ Fields::SolvePoissonBz (amrex::Vector<amrex::Geometry> const& geom, const int le
 
     // Right-Hand Side for Poisson equation: compute mu_0*(d_y(jx) - d_x(jy))
     // from the slice MF, and store in the staging area of m_poisson_solver
-    LinCombination(m_poisson_nguards, getStagingArea(lev),
+    LinCombination(m_source_nguard, getStagingArea(lev),
                    phys_const.mu0,
                    derivative<Direction::y>{getField(lev, WhichSlice::This, "jx"), geom[lev]},
                    -phys_const.mu0,

From f97af3544a175a16fce4a28af9b512caa8ad98cc Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 22 Feb 2022 07:59:33 +0100
Subject: [PATCH 38/52] add doc

---
 src/fields/Fields.H       |  2 +-
 src/fields/Fields.cpp     | 22 +++++++++++++++++++---
 src/fields/OpenBoundary.H | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/fields/Fields.H b/src/fields/Fields.H
index 091566442d..1ba3a176a7 100644
--- a/src/fields/Fields.H
+++ b/src/fields/Fields.H
@@ -307,7 +307,7 @@ private:
     /** Stores temporary values for z interpolation in Fields::Copy */
     amrex::Gpu::DeviceVector<amrex::Real> m_rel_z_vec;
     /** Number of guard cells where ExmBy and EypBx are calculated */
-    amrex::IntVect m_exmby_eypbx_grow;
+    amrex::IntVect m_exmby_eypbx_nguard;
     /** Number of guard cells where sources for poisson equation are included */
     amrex::IntVect m_source_nguard;
     /** If lev_0 should be solved with open boundary conditions */
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 443019f4d2..1967075b9f 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -34,8 +34,11 @@ Fields::AllocData (
         // Need 1 extra guard cell transversally for transverse derivative
         int nguards_xy = (Hipace::m_depos_order_xy + 1) / 2 + 1;
         m_slices_nguards = {nguards_xy, nguards_xy, 0};
+        // poisson solver same size as fields
         m_poisson_nguards = m_slices_nguards;
-        m_exmby_eypbx_grow = m_slices_nguards - amrex::IntVect{1, 1, 0};
+        // one cell less for transverse derivative
+        m_exmby_eypbx_nguard = m_slices_nguards - amrex::IntVect{1, 1, 0};
+        // cut off anything near edge of charge/current deposition
         m_source_nguard = -m_slices_nguards;
     } else {
         // Need 1 extra guard cell transversally for transverse derivative
@@ -43,7 +46,7 @@ Fields::AllocData (
         m_slices_nguards = {nguards_xy, nguards_xy, 0};
         // Poisson solver same size as domain, no ghost cells
         m_poisson_nguards = {0, 0, 0};
-        m_exmby_eypbx_grow = {0, 0, 0};
+        m_exmby_eypbx_nguard = {0, 0, 0};
         m_source_nguard = {0, 0, 0};
     }
 
@@ -505,8 +508,11 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
 {
     HIPACE_PROFILE("Fields::SetBoundaryCondition()");
     if (lev == 0 && m_open_boundary) {
+        // Coarsest level: use Taylor expansion of the Green's function
+        // to get Dirichlet boundary conditions
 
         amrex::MultiFab staging_area = getStagingArea(lev);
+        // Open Boundaries only work for lev0 with everything in one box
         amrex::FArrayBox& staging_area_fab = staging_area[0];
 
         const auto arr_staging_area = staging_area_fab.array();
@@ -516,11 +522,18 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         const amrex::Real poff_y = GetPosOffset(1, geom[lev], staging_box);
         const amrex::Real dx = geom[lev].CellSize(0);
         const amrex::Real dy = geom[lev].CellSize(1);
+        // scale factor cancels out for all multipole coefficients except the 0th, for wich it adds
+        // a constant therm to the potential
         const amrex::Real scale = 3._rt/std::sqrt(
             pow<2>(geom[lev].ProbLength(0)) + pow<2>(geom[lev].ProbLength(1)));
         const amrex::Real radius = amrex::min(
             std::abs(geom[lev].ProbLo(0)), std::abs(geom[lev].ProbHi(0)),
             std::abs(geom[lev].ProbLo(1)), std::abs(geom[lev].ProbHi(1)));
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(radius > 0._rt, "The x=0, y=0 coordinate must be inside"
+            "the simulation box as it is used as the point of expansion for open boundaries");
+        // ignore everything outside of 95% the min radius as the Taylor expansion only converges
+        // outside of a circular patch containing the sources, i.e. the sources can't be further
+        // from the center than the closest boundary as it would be the case in the corners
         const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale);
         const amrex::Real dxdy_div_4pi = dx*dy/(4._rt * MathConst::pi);
 
@@ -542,6 +555,8 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         }
 
         if (component == "Ez" || component == "Bz") {
+            // Because Ez and Bz only have transverse derivatives of currents as sources, the
+            // integral over the whole box is zero, meaning they have no physical monopole component
             amrex::get<0>(coeff_tuple) = 0._rt;
         }
 
@@ -556,6 +571,7 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         }
 
     } else if (lev == 1) {
+        // Fine level: interpolate solution from coarser level to get Dirichlet boundary conditions
         constexpr int interp_order = 2;
 
         const amrex::Real ref_ratio_z = Hipace::GetRefRatio(lev)[2];
@@ -679,7 +695,7 @@ Fields::SolvePoissonExmByAndEypBx (amrex::Vector<amrex::Geometry> const& geom,
         const amrex::Array4<amrex::Real> array_EypBx = f_EypBx.array(mfi);
         const amrex::Array4<amrex::Real const> array_Psi = f_Psi.array(mfi);
         // number of ghost cells where ExmBy and EypBx are calculated is 0 for now
-        const amrex::Box bx = mfi.growntilebox(m_exmby_eypbx_grow);
+        const amrex::Box bx = mfi.growntilebox(m_exmby_eypbx_nguard);
         const amrex::Real dx_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::x));
         const amrex::Real dy_inv = 1._rt/(2._rt*geom[lev].CellSize(Direction::y));
 
diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H
index 32ee084ddf..9a9a5e3c62 100644
--- a/src/fields/OpenBoundary.H
+++ b/src/fields/OpenBoundary.H
@@ -4,6 +4,9 @@
 #include <AMReX_AmrCore.H>
 #include <cmath>
 
+/** \brief calculate low integer powers base^exp
+ * \param[in] base base of power
+ */
 template<unsigned int exp> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 amrex::Real pow (amrex::Real base) {
     using namespace amrex::literals;
@@ -62,6 +65,32 @@ using MultipoleReduceTypeList = amrex::TypeList<
     amrex::Real, amrex::Real, amrex::Real,
     amrex::Real>;
 
+// To solve a poisson equation (d^2/dx^2 + d^2/dy^2)phi = source with open boundary conditions for
+// phi(x,y), the source field at (x',y') is integrated together with the Green's function
+// G(x,y,x',y') = 1/(2*pi) * ln(sqrt((x-x')^2 + (y-y')^2)) = 1/(4*pi) * ln((x-x')^2 + (y-y')^2)
+// over x' and y'. Doing this directly would be slow (O(n^2), n=nx*ny) so only the values at the
+// boundary are obtained in this way with the rest of phi being solved by a normal poisson solver
+// using the obtained values as Dirichlet boundary conditions. As this would still be O(n^(3/2))
+// the Green's function isn't used directly, instead it is approximated by a 18th order Taylor
+// expansion in x'=0 and y'=0. This reduces time complexity to O(n) because now the integral over
+// x', y' and sum over Taylor coefficients can be swapped such that the integral does not depend on
+// x, y only x', y'. In general, such a 2D Taylor expansion would have 2^o coefficients per order o,
+// here 524287 in total. Due to Schwarz's theorem this reduces to o+1 per order, 190 in total.
+// The amount of coefficients can be further reduced to just 2 per order (1 for 0th order), 37 in
+// total because (d^2/dx'^2 + d^2/dy'^2)G(x,y,x',y') = 0 for x, y outside of the region containing
+// sources. This makes d^2/dx'^2 G = - d^2/dy'^2 G the second derivatives proportional to each
+// other, only leaving coefficients with even and odd derivatives/powers of y separate. An important
+// note about the Taylor expansion is that it only converges if (x'^2 + y'^2) < (x^2 + y^2).
+// This means the sources in the corners have to be ignored es they are further away from
+// the center than some of the boundary.
+
+/** \brief get the multipole coefficients for open boundaries,
+ * these have to be integrated over all souces
+ *
+ * \param[in] s_v source value at this location
+ * \param[in] x (normalized) x coordinate
+ * \param[in] y (normalized) y coordinate
+ */
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 MultipoleTuple GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y)
 {
@@ -107,11 +136,18 @@ MultipoleTuple GetMultipoleCoeffs (amrex::Real s_v, amrex::Real x, amrex::Real y
     };
 }
 
+/** \brief get the solution field using the multipole coefficients
+ *
+ * \param[in] m_c multipole coefficients
+ * \param[in] x_domain (normalized) x coordinate
+ * \param[in] y_domain (normalized) y coordinate
+ */
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 amrex::Real GetFieldMultipole (MultipoleTuple m_c, amrex::Real x_domain, amrex::Real y_domain)
 {
     using namespace amrex::literals;
     amrex::Real radius_2 = pow<2>(x_domain) + pow<2>(y_domain);
+    // the coordinate normalization cancels out here
     amrex::Real x = x_domain / radius_2;
     amrex::Real y = y_domain / radius_2;
     return

From ee3ecfea50e9e714453eaf675ff30d3a363ff46c Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 22 Feb 2022 10:28:31 +0100
Subject: [PATCH 39/52] test CI

---
 src/utils/Constants.cpp | 14 ++++++++++++++
 src/utils/Parser.H      | 12 ++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/utils/Constants.cpp b/src/utils/Constants.cpp
index 332a6792b0..3f8fb31cdd 100644
--- a/src/utils/Constants.cpp
+++ b/src/utils/Constants.cpp
@@ -6,3 +6,17 @@ PhysConst get_phys_const ()
     Hipace& hipace = Hipace::GetInstance();
     return hipace.get_phys_const ();
 }
+
+
+namespace Parser {
+    // Cache for evaluated constants
+    std::map<std::string, double> my_constants_cache{};
+
+    // Physical / Numerical Constants available to parsed expressions
+    std::map<std::string, double> hipace_constants
+        {
+            {"pi", MathConst::pi},
+            {"true", 1},
+            {"false", 0}
+        };
+}
diff --git a/src/utils/Parser.H b/src/utils/Parser.H
index 64079ead6d..f679548b79 100755
--- a/src/utils/Parser.H
+++ b/src/utils/Parser.H
@@ -21,16 +21,8 @@ queryWithParser (const amrex::ParmParse& pp, char const * const str, T& val);
 
 namespace Parser
 {
-    // Cache for evaluated constants
-    static std::map<std::string, double> my_constants_cache{};
-
-    // Physical / Numerical Constants available to parsed expressions
-    static std::map<std::string, double> hipace_constants
-        {
-            {"pi", MathConst::pi},
-            {"true", 1},
-            {"false", 0}
-        };
+    extern std::map<std::string, double> my_constants_cache;
+    extern std::map<std::string, double> hipace_constants;
 
     /** \brief add Physical constants to Parser constants
      *

From 8d7e99a591445d32fa19f7835472fdf3a84fbd08 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 23 Feb 2022 12:17:28 +0100
Subject: [PATCH 40/52] add more doc

---
 docs/source/run/parameters.rst | 11 +++++++++++
 src/fields/Fields.cpp          |  2 +-
 src/utils/Constants.cpp        | 14 --------------
 src/utils/Parser.H             | 12 ++++++++++--
 4 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
index 4db3119feb..6d75236b56 100644
--- a/docs/source/run/parameters.rst
+++ b/docs/source/run/parameters.rst
@@ -146,6 +146,17 @@ Modeling ion motion is not yet supported by the explicit solver
     The small dst is quicker for simulations with :math:`\geq 511` transverse grid points.
     The default is set accordingly.
 
+* ``fields.extended_solve`` (`bool`) optional (default `0`)
+    Extends the area of the FFT Poisson solver to the ghost cells. This can reduce artefacts
+    originating from the boundary for long simulations.
+
+* ``fields.open_boundary`` (`bool`) optional (default `0`)
+    Uses a Taylor approximation of the Greens function to solve the Poisson equations with
+    open boundary conditions. It's Recommended to use this together with
+    `fields.extended_solve = true` and `geometry.is_periodic = false false false`. Not implemented
+    for the explicit Helmholtz solver.
+
+
 Predictor-corrector loop parameters
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 1967075b9f..367b22d10c 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -547,8 +547,8 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
                 {
                     const amrex::Real x = (i * dx + poff_x) * scale;
                     const amrex::Real y = (j * dy + poff_y) * scale;
-                    amrex::Real s_v = arr_staging_area(i, j, k);
                     if (x*x + y*y > cutoff_sq) return MultipoleTuple{}; //zero
+                    amrex::Real s_v = arr_staging_area(i, j, k);
                     return GetMultipoleCoeffs(s_v, x, y);
                 }
             );
diff --git a/src/utils/Constants.cpp b/src/utils/Constants.cpp
index 3f8fb31cdd..332a6792b0 100644
--- a/src/utils/Constants.cpp
+++ b/src/utils/Constants.cpp
@@ -6,17 +6,3 @@ PhysConst get_phys_const ()
     Hipace& hipace = Hipace::GetInstance();
     return hipace.get_phys_const ();
 }
-
-
-namespace Parser {
-    // Cache for evaluated constants
-    std::map<std::string, double> my_constants_cache{};
-
-    // Physical / Numerical Constants available to parsed expressions
-    std::map<std::string, double> hipace_constants
-        {
-            {"pi", MathConst::pi},
-            {"true", 1},
-            {"false", 0}
-        };
-}
diff --git a/src/utils/Parser.H b/src/utils/Parser.H
index f679548b79..64079ead6d 100755
--- a/src/utils/Parser.H
+++ b/src/utils/Parser.H
@@ -21,8 +21,16 @@ queryWithParser (const amrex::ParmParse& pp, char const * const str, T& val);
 
 namespace Parser
 {
-    extern std::map<std::string, double> my_constants_cache;
-    extern std::map<std::string, double> hipace_constants;
+    // Cache for evaluated constants
+    static std::map<std::string, double> my_constants_cache{};
+
+    // Physical / Numerical Constants available to parsed expressions
+    static std::map<std::string, double> hipace_constants
+        {
+            {"pi", MathConst::pi},
+            {"true", 1},
+            {"false", 0}
+        };
 
     /** \brief add Physical constants to Parser constants
      *

From ed39bb192aa8ebd266149b0d68e843ec77245718 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 23 Feb 2022 13:36:25 +0100
Subject: [PATCH 41/52] remove detailed profiling

---
 docs/source/run/parameters.rst |  2 +-
 src/fields/Fields.cpp          | 43 ++++++++++++++--------------------
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
index 6d75236b56..1127cf94fe 100644
--- a/docs/source/run/parameters.rst
+++ b/docs/source/run/parameters.rst
@@ -152,7 +152,7 @@ Modeling ion motion is not yet supported by the explicit solver
 
 * ``fields.open_boundary`` (`bool`) optional (default `0`)
     Uses a Taylor approximation of the Greens function to solve the Poisson equations with
-    open boundary conditions. It's Recommended to use this together with
+    open boundary conditions. It's recommended to use this together with
     `fields.extended_solve = true` and `geometry.is_periodic = false false false`. Not implemented
     for the explicit Helmholtz solver.
 
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index 367b22d10c..d32a900eed 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -537,22 +537,18 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         const amrex::Real cutoff_sq = pow<2>(0.95_rt * radius * scale);
         const amrex::Real dxdy_div_4pi = dx*dy/(4._rt * MathConst::pi);
 
-        MultipoleTuple coeff_tuple{};
-        {
-            HIPACE_PROFILE("Boundary::ParReduce()");
-            coeff_tuple =
-            amrex::ParReduce(MultipoleReduceOpList{}, MultipoleReduceTypeList{},
-                             staging_area, m_source_nguard,
-                [=] AMREX_GPU_DEVICE (int /*box_num*/, int i, int j, int k) noexcept
-                {
-                    const amrex::Real x = (i * dx + poff_x) * scale;
-                    const amrex::Real y = (j * dy + poff_y) * scale;
-                    if (x*x + y*y > cutoff_sq) return MultipoleTuple{}; //zero
-                    amrex::Real s_v = arr_staging_area(i, j, k);
-                    return GetMultipoleCoeffs(s_v, x, y);
-                }
-            );
-        }
+        MultipoleTuple coeff_tuple =
+        amrex::ParReduce(MultipoleReduceOpList{}, MultipoleReduceTypeList{},
+                         staging_area, m_source_nguard,
+            [=] AMREX_GPU_DEVICE (int /*box_num*/, int i, int j, int k) noexcept
+            {
+                const amrex::Real x = (i * dx + poff_x) * scale;
+                const amrex::Real y = (j * dy + poff_y) * scale;
+                if (x*x + y*y > cutoff_sq) return MultipoleTuple{}; //zero
+                amrex::Real s_v = arr_staging_area(i, j, k);
+                return GetMultipoleCoeffs(s_v, x, y);
+            }
+        );
 
         if (component == "Ez" || component == "Bz") {
             // Because Ez and Bz only have transverse derivatives of currents as sources, the
@@ -560,15 +556,12 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             amrex::get<0>(coeff_tuple) = 0._rt;
         }
 
-        {
-            HIPACE_PROFILE("Boundary::SetDirichlet()");
-            SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
-                [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
-                {
-                    return dxdy_div_4pi*GetFieldMultipole(coeff_tuple, x*scale, y*scale);
-                }
-            );
-        }
+        SetDirichletBoundaries(arr_staging_area, staging_box, geom[lev],
+            [=] AMREX_GPU_DEVICE (amrex::Real x, amrex::Real y) noexcept
+            {
+                return dxdy_div_4pi*GetFieldMultipole(coeff_tuple, x*scale, y*scale);
+            }
+        );
 
     } else if (lev == 1) {
         // Fine level: interpolate solution from coarser level to get Dirichlet boundary conditions

From e04d16425d85c956394abd9eda6eb8971e66efc0 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Wed, 23 Feb 2022 17:02:59 +0100
Subject: [PATCH 42/52] use old amrex to fix CI

---
 .github/workflows/linux.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index d82489c4cf..6b1466525a 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -39,7 +39,8 @@ jobs:
         cmake ..                                   \
             -DHiPACE_COMPUTE=OMP                   \
             -DCMAKE_INSTALL_PREFIX=/tmp/my-hipace  \
-            -DCMAKE_CXX_STANDARD=17
+            -DCMAKE_CXX_STANDARD=17                \
+            -DHiPACE_amrex_branch=22.01
         make -j 2 VERBOSE=ON
         export OMP_NUM_THREADS=2
         ctest --output-on-failure

From 99c9ec509f2e2d8f63a262656ca007a9fc2c355a Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 8 Mar 2022 22:07:22 +0100
Subject: [PATCH 43/52] add some suggestions

---
 docs/source/run/parameters.rst | 6 +++---
 src/fields/Fields.cpp          | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
index 1127cf94fe..a9284c5d15 100644
--- a/docs/source/run/parameters.rst
+++ b/docs/source/run/parameters.rst
@@ -147,14 +147,14 @@ Modeling ion motion is not yet supported by the explicit solver
     The default is set accordingly.
 
 * ``fields.extended_solve`` (`bool`) optional (default `0`)
-    Extends the area of the FFT Poisson solver to the ghost cells. This can reduce artefacts
+    Extends the area of the FFT Poisson solver to the ghost cells. This can reduce artifacts
     originating from the boundary for long simulations.
 
 * ``fields.open_boundary`` (`bool`) optional (default `0`)
     Uses a Taylor approximation of the Greens function to solve the Poisson equations with
     open boundary conditions. It's recommended to use this together with
-    `fields.extended_solve = true` and `geometry.is_periodic = false false false`. Not implemented
-    for the explicit Helmholtz solver.
+    ``fields.extended_solve = true`` and ``geometry.is_periodic = false false false``.
+    Not implemented for the explicit Helmholtz solver.
 
 
 Predictor-corrector loop parameters
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index d32a900eed..380cc84221 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -512,7 +512,8 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         // to get Dirichlet boundary conditions
 
         amrex::MultiFab staging_area = getStagingArea(lev);
-        // Open Boundaries only work for lev0 with everything in one box
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(staging_area.size() == 1,
+            "Open Boundaries only work for lev0 with everything in one box");
         amrex::FArrayBox& staging_area_fab = staging_area[0];
 
         const auto arr_staging_area = staging_area_fab.array();
@@ -523,7 +524,7 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
         const amrex::Real dx = geom[lev].CellSize(0);
         const amrex::Real dy = geom[lev].CellSize(1);
         // scale factor cancels out for all multipole coefficients except the 0th, for wich it adds
-        // a constant therm to the potential
+        // a constant term to the potential
         const amrex::Real scale = 3._rt/std::sqrt(
             pow<2>(geom[lev].ProbLength(0)) + pow<2>(geom[lev].ProbLength(1)));
         const amrex::Real radius = amrex::min(

From d54741500e3ddc7da425f0b88e5718789d2b4ec3 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Tue, 15 Mar 2022 12:11:05 +0100
Subject: [PATCH 44/52] add code generator

---
 src/utils/MakeOpenBoundary.py | 156 ++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 src/utils/MakeOpenBoundary.py

diff --git a/src/utils/MakeOpenBoundary.py b/src/utils/MakeOpenBoundary.py
new file mode 100644
index 0000000000..326f6e024c
--- /dev/null
+++ b/src/utils/MakeOpenBoundary.py
@@ -0,0 +1,156 @@
+# code generator for nth order taylor expansion of the green's function used in OpenBoundary.H
+order = 18
+
+import numpy as np
+import sympy as sp
+from sympy.printing import cxx
+from sympy.printing import cxxcode
+from sympy.printing import ccode
+from typing import Any, Dict, Set, Tuple
+from functools import wraps
+from sympy.core import Add, Expr, Mul, Pow, S, sympify, Float
+from sympy.core.basic import Basic
+from sympy.core.compatibility import default_sort_key
+from sympy.core.function import Lambda
+from sympy.core.mul import _keep_coeff
+from sympy.core.symbol import Symbol
+from sympy.printing.str import StrPrinter
+from sympy.printing.precedence import precedence
+from sympy.codegen.ast import (
+    Assignment, Pointer, Variable, Declaration, Type,
+    real, complex_, integer, bool_, float32, float64, float80,
+    complex64, complex128, intc, value_const, pointer_const,
+    int8, int16, int32, int64, uint8, uint16, uint32, uint64, untyped,
+    none
+)
+
+from sympy import init_printing
+from sympy.codegen.ast import real
+init_printing()
+
+v_xfnorm = sp.Symbol('xf')
+v_yfnorm = sp.Symbol('yf')
+
+v_xsnorm = sp.Symbol('xs')
+v_ysnorm = sp.Symbol('ys')
+
+v_cx = sp.Symbol('x')
+v_cy = sp.Symbol('y')
+
+v_sval = sp.Symbol('s_v')
+v_radius_2 = sp.Symbol('radius_2')
+
+class MyCxxPrinter(sp.printing.cxx.CXX11CodePrinter):
+    type_literal_suffixes = {
+        float32: 'F',
+        float64: '_rt',
+        float80: 'L'
+    }
+
+    def _print_Pow(self, expr):
+        b, e = expr.as_base_exp()
+        return "pow<"+self._print(e)+">("+self._print(b)+")"
+
+def printcxxpow(expr):
+    return MyCxxPrinter().doprint(expr)
+
+
+print("Generating Code up to", order, "order (exponential time complexity)")
+
+# greens function
+f_exact = sp.log((v_xfnorm-v_xsnorm)**2 + (v_yfnorm-v_ysnorm)**2)
+print("Green's Function:",f_exact,'\n')
+
+#https://stackoverflow.com/questions/22857162/multivariate-taylor-approximation-in-sympy
+def Taylor_polynomial_sympy(function_expression, variable_list, evaluation_point, degree):
+    """
+    Mathematical formulation reference:
+    https://math.libretexts.org/Bookshelves/Calculus/Supplemental_Modules_(Calculus)/Multivariable_Calculus/3%3A_Topics_in_Partial_Derivatives/Taylor__Polynomials_of_Functions_of_Two_Variables
+    :param function_expression: Sympy expression of the function
+    :param variable_list: list. All variables to be approximated (to be "Taylorized")
+    :param evaluation_point: list. Coordinates, where the function will be expressed
+    :param degree: int. Total degree of the Taylor polynomial
+    :return: Returns a Sympy expression of the Taylor series up to a given degree, of a given multivariate expression, approximated as a multivariate polynomial evaluated at the evaluation_point
+    """
+    from sympy import factorial, Matrix, prod
+    import itertools
+
+    n_var = len(variable_list)
+    point_coordinates = [(i, j) for i, j in (zip(variable_list, evaluation_point))]  # list of tuples with variables and their evaluation_point coordinates, to later perform substitution
+
+    deriv_orders = list(itertools.product(range(degree + 1), repeat=n_var))  # list with exponentials of the partial derivatives
+    deriv_orders = [deriv_orders[i] for i in range(len(deriv_orders)) if sum(deriv_orders[i]) <= degree]  # Discarding some higher-order terms
+    n_terms = len(deriv_orders)
+    deriv_orders_as_input = [list(sum(list(zip(variable_list, deriv_orders[i])), ())) for i in range(n_terms)]  # Individual degree of each partial derivative, of each term
+
+    polynomial = 0
+    for i in range(n_terms):
+        partial_derivatives_at_point = function_expression.diff(*deriv_orders_as_input[i]).subs(point_coordinates)  # e.g. df/(dx*dy**2)
+        denominator = prod([factorial(j) for j in deriv_orders[i]])  # e.g. (1! * 2!)
+        distances_powered = prod([(Matrix(variable_list) - Matrix(evaluation_point))[j] ** deriv_orders[i][j] for j in range(n_var)])  # e.g. (x-x0)*(y-y0)**2
+        polynomial += partial_derivatives_at_point / denominator * distances_powered
+    return polynomial
+
+
+print("Calculating Taylor expansion...")
+f_approx = Taylor_polynomial_sympy(f_exact, (v_xsnorm,v_ysnorm), (0,0), order)
+
+
+
+def get_order(fn):
+    seper = sp.separatevars(fn,(v_xsnorm,v_ysnorm),dict=True)
+    res = (seper[v_xsnorm] * seper[v_ysnorm]).subs(v_xsnorm,2).subs(v_ysnorm,2.0001)
+    return res
+
+print("Sorting...")
+f_approx_sum = list(f_approx.args)
+f_approx_sum.sort(key=get_order)
+
+f_approx_all = [sp.separatevars(fn,(v_xsnorm,v_ysnorm),dict=True) for fn in f_approx_sum]
+
+print("Simplifying...")
+for i in range(len(f_approx_all)):
+    f_approx_all[i][v_xsnorm] *= f_approx_all[i][v_ysnorm]
+    f_approx_all[i][v_xsnorm] = sp.simplify(f_approx_all[i][v_xsnorm])
+    f_approx_all[i]["coeff"] = sp.simplify(f_approx_all[i]["coeff"])
+
+print("Reducing...")
+for i in range(len(f_approx_all)):
+    j = i
+    while j<len(f_approx_all):
+        if i!=j and i<len(f_approx_all) and j<len(f_approx_all):
+            c = f_approx_all[j]["coeff"] / f_approx_all[i]["coeff"]
+            if c.is_constant():
+                c = sp.simplify(c)
+                if np.abs(c)>1:
+                    f_approx_all[i][v_xsnorm] = f_approx_all[i][v_xsnorm] + c*f_approx_all[j][v_xsnorm]
+                    f_approx_all.remove(f_approx_all[j])
+                else:
+                    f_approx_all[i][v_xsnorm] = f_approx_all[i][v_xsnorm] / c + f_approx_all[j][v_xsnorm]
+                    f_approx_all[i]["coeff"] *= c
+                    f_approx_all.remove(f_approx_all[j])
+            else:
+                j+=1
+        else:
+            j+=1
+
+
+print("Simplifying...\n")
+for i in range(len(f_approx_all)):
+    f_approx_all[i][v_xsnorm] = sp.simplify(f_approx_all[i][v_xsnorm])
+    f_approx_all[i]["coeff"] = sp.simplify(f_approx_all[i]["coeff"])
+
+i=0
+for seper in f_approx_all:
+    print("    "+printcxxpow( seper[v_xsnorm] \
+        .subs( v_xsnorm, v_cx).subs( v_ysnorm, v_cy).expand()*v_sval )+",")
+    i+=1
+
+print('\n')
+
+i=0
+for seper in f_approx_all:
+    print("    + amrex::get<"+str(i)+">(m_c) * ("+printcxxpow(seper["coeff"] \
+        .subs(v_xfnorm, v_cx/(v_cx*v_cx+v_cy*v_cy)).subs(v_yfnorm, v_cy/(v_cx*v_cx+v_cy*v_cy)) \
+        .simplify().subs(sp.log(1/(v_cx*v_cx + v_cy*v_cy)), sp.log(v_radius_2)).expand())+")")
+    i+=1

From 84b5613301008e9c8d517fdba0eaf978fa232e76 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Fri, 25 Mar 2022 18:01:12 +0100
Subject: [PATCH 45/52] allow for offset fixed ppc beam

---
 src/particles/BeamParticleContainer.H       |  1 +
 src/particles/BeamParticleContainer.cpp     |  4 +++-
 src/particles/BeamParticleContainerInit.cpp | 16 ++++++++++++----
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/particles/BeamParticleContainer.H b/src/particles/BeamParticleContainer.H
index 80231a9633..446ec60b16 100644
--- a/src/particles/BeamParticleContainer.H
+++ b/src/particles/BeamParticleContainer.H
@@ -55,6 +55,7 @@ public:
         const amrex::Real     a_zmin,
         const amrex::Real     a_zmax,
         const amrex::Real     a_radius,
+        const amrex::Array<amrex::Real, 3> a_position_mean,
         const amrex::Real     a_min_density,
         const amrex::Vector<int>& random_ppc);
 
diff --git a/src/particles/BeamParticleContainer.cpp b/src/particles/BeamParticleContainer.cpp
index 49f980568f..17b47bb200 100644
--- a/src/particles/BeamParticleContainer.cpp
+++ b/src/particles/BeamParticleContainer.cpp
@@ -74,13 +74,15 @@ BeamParticleContainer::InitData (const amrex::Geometry& geom)
         getWithParser(pp, "zmin", m_zmin);
         getWithParser(pp, "zmax", m_zmax);
         getWithParser(pp, "radius", m_radius);
+        amrex::Array<amrex::Real, AMREX_SPACEDIM> position_mean{0., 0., 0.};
+        queryWithParser(pp, "position_mean", position_mean);
         queryWithParser(pp, "min_density", m_min_density);
         amrex::Vector<int> random_ppc {false, false, false};
         queryWithParser(pp, "random_ppc", random_ppc);
         const GetInitialDensity get_density(m_name);
         const GetInitialMomentum get_momentum(m_name);
         InitBeamFixedPPC(m_ppc, get_density, get_momentum, geom, m_zmin,
-                         m_zmax, m_radius, m_min_density, random_ppc);
+                         m_zmax, m_radius, position_mean, m_min_density, random_ppc);
 
     } else if (m_injection_type == "fixed_weight") {
 
diff --git a/src/particles/BeamParticleContainerInit.cpp b/src/particles/BeamParticleContainerInit.cpp
index 9eaaf76cf6..55bdf8c189 100644
--- a/src/particles/BeamParticleContainerInit.cpp
+++ b/src/particles/BeamParticleContainerInit.cpp
@@ -72,6 +72,7 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                   const amrex::Real a_zmin,
                   const amrex::Real a_zmax,
                   const amrex::Real a_radius,
+                  const amrex::Array<amrex::Real, 3> a_position_mean,
                   const amrex::Real a_min_density,
                   const amrex::Vector<int>& random_ppc)
 {
@@ -105,6 +106,9 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
     const amrex::Real scale_fac = Hipace::m_normalized_units ?
         1./num_ppc*cr[0]*cr[1]*cr[2] : dx[0]*dx[1]*dx[2]/num_ppc;
 
+    const amrex::Real x_mean = a_position_mean[0];
+    const amrex::Real y_mean = a_position_mean[1];
+
     // First: loop over all cells, and count the particles effectively injected.
     amrex::Box domain_box = a_geom.Domain();
     domain_box.coarsen(cr);
@@ -137,7 +141,8 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     // If particles are evenly spaced, discard particles
                     // individually if they are out of bounds
                     if (z >= a_zmax || z < a_zmin ||
-                        (x*x+y*y) > a_radius*a_radius) continue;
+                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius)
+                        continue;
                 } else {
                     // If particles are randomly spaced, discard particles
                     // if the cell is outside the domain
@@ -145,7 +150,8 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     amrex::Real yc = plo[1]+j*dx[1];
                     amrex::Real zc = plo[2]+k*dx[2];
                     if (zc >= a_zmax || zc < a_zmin ||
-                        (xc*xc+yc*yc) > a_radius*a_radius) continue;
+                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius)
+                        continue;
                 }
 
                 const amrex::Real density = get_density(x, y, z);
@@ -218,7 +224,8 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     // If particles are evenly spaced, discard particles
                     // individually if they are out of bounds
                     if (z >= a_zmax || z < a_zmin ||
-                        (x*x+y*y) > a_radius*a_radius) continue;
+                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius)
+                        continue;
                 } else {
                     // If particles are randomly spaced, discard particles
                     // if the cell is outside the domain
@@ -226,7 +233,8 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     amrex::Real yc = plo[1]+j*dx[1];
                     amrex::Real zc = plo[2]+k*dx[2];
                     if (zc >= a_zmax || zc < a_zmin ||
-                        (xc*xc+yc*yc) > a_radius*a_radius) continue;
+                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius)
+                        continue;
                 }
 
                 const amrex::Real density = get_density(x, y, z);

From d6932f213710d4669569b087ddbd7ca4dbe8c743 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 15:09:58 +0100
Subject: [PATCH 46/52] Update enforcePeriodic call (change in AMReX)

---
 src/particles/pusher/GetAndSetPosition.H | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/particles/pusher/GetAndSetPosition.H b/src/particles/pusher/GetAndSetPosition.H
index 8883affd0b..041a3e75ee 100644
--- a/src/particles/pusher/GetAndSetPosition.H
+++ b/src/particles/pusher/GetAndSetPosition.H
@@ -185,7 +185,9 @@ struct EnforceBC
     {
         using namespace amrex::literals;
 
-        const bool shifted = enforcePeriodic(m_structs[ip], m_plo, m_phi, m_periodicity);
+        // TODO: The second m_phi should be amrex::Geometry RoundoffHiArray(),
+        // however there is no Geometry object to get this.
+        const bool shifted = enforcePeriodic(m_structs[ip], m_plo, m_phi, m_phi, m_periodicity);
         const bool invalid = (shifted && !m_is_per[0]);
         if (invalid) {
             m_weights[ip] = 0.0_rt;

From 1aa044b6e74597a7633c6631845af1507719dec4 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 19:53:58 +0100
Subject: [PATCH 47/52] add CI test

---
 CMakeLists.txt                                |   6 +
 .../beam_in_vacuum/analysis_open_boundary.py  | 179 ++++++++++++++++++
 src/fields/Fields.cpp                         |   9 +-
 src/fields/OpenBoundary.H                     |   7 +
 src/utils/MakeOpenBoundary.py                 |   9 +
 ...n_vacuum_open_boundary.normalized.1Rank.sh |  47 +++++
 ...vacuum_open_boundary.normalized.1Rank.json |  30 +++
 tests/checksum/reset_all_benchmarks.sh        |  12 ++
 8 files changed, 298 insertions(+), 1 deletion(-)
 create mode 100644 examples/beam_in_vacuum/analysis_open_boundary.py
 create mode 100644 tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh
 create mode 100644 tests/checksum/benchmarks_json/beam_in_vacuum_open_boundary.normalized.1Rank.json

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e1e77c488a..8769fbcd46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -375,6 +375,12 @@ if(BUILD_TESTING)
                  WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
         )
 
+        add_test(NAME beam_in_vacuum_open_boundary.normalized.1Rank
+                 COMMAND ${HiPACE_SOURCE_DIR}/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh
+                         $<TARGET_FILE:HiPACE> ${HiPACE_SOURCE_DIR}
+                 WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
+        )
+
     endif()
 endif()
 
diff --git a/examples/beam_in_vacuum/analysis_open_boundary.py b/examples/beam_in_vacuum/analysis_open_boundary.py
new file mode 100644
index 0000000000..81da300f15
--- /dev/null
+++ b/examples/beam_in_vacuum/analysis_open_boundary.py
@@ -0,0 +1,179 @@
+#! /usr/bin/env python3
+
+# Copyright 2022
+#
+# This file is part of HiPACE++.
+#
+# Authors: AlexanderSinn
+# License: BSD-3-Clause-LBNL
+
+
+# This script compares the transverse field By with the theoretical value, plots both
+# the simulation result and the theory on the same plot, and asserts that the
+# difference is small.
+#
+# To use it, run the simulation and execute this script with
+# > ../../build/bin/hipace inputs_SI
+# > python analysis.py
+
+import numpy as np
+import scipy.constants as scc
+import argparse
+import sys
+from openpmd_viewer import OpenPMDTimeSeries
+import matplotlib
+import matplotlib.pyplot as plt
+
+parser = argparse.ArgumentParser(description='Script to analyze the correctness of the beam in vacuum')
+parser.add_argument('--normalized-units',
+                    dest='norm_units',
+                    action='store_true',
+                    default=False,
+                    help='Run the analysis in normalized units')
+parser.add_argument('--do-plot',
+                    dest='do_plot',
+                    action='store_true',
+                    default=False,
+                    help='Plot figures and save them to file')
+parser.add_argument('--output-dir',
+                    dest='output_dir',
+                    default='diags/hdf5',
+                    help='Path to the directory containing output files')
+args = parser.parse_args()
+
+ts = OpenPMDTimeSeries(args.output_dir)
+
+if args.norm_units:
+    c = 1.
+    jz0 = -1.
+    rho0 = -1.
+    mu_0 = 1.
+    eps_0 = 1.
+    R = 1.
+else:
+    # Density of the can beam
+    dens = 2.8239587008591567e23 # at this density, 1/kp = 10um, allowing for an easy comparison with normalized units
+    # Define array for transverse coordinate and theory for By and Bx
+    jz0 = - scc.e * scc.c * dens
+    rho0 = - scc.e * dens
+    c = scc.c
+    mu_0 = scc.mu_0
+    eps_0 = scc.epsilon_0
+    # Radius of the can beam
+    R = 10.e-6
+
+x_beam_mid = 2
+y_beam_mid = -1
+x_domain_len = 8
+y_domain_len = 8
+
+# Load HiPACE++ data for By in SI units
+Bx_sim, Bx_meta = ts.get_field(field='Bx', iteration=0, slice_across=['x','z'], slice_relative_position=[2*x_beam_mid/x_domain_len,0])
+By_sim, By_meta = ts.get_field(field='By', iteration=0, slice_across=['y','z'], slice_relative_position=[2*y_beam_mid/y_domain_len,0])
+jz_sim = ts.get_field(field='jz_beam', iteration=0, slice_across=['y','z'], slice_relative_position=[2*y_beam_mid/y_domain_len,0])[0]
+rho_sim = ts.get_field(field='rho', iteration=0, slice_across=['y','z'], slice_relative_position=[2*y_beam_mid/y_domain_len,0])[0]
+Ex_sim = ts.get_field(field='ExmBy', iteration=0, slice_across=['y','z'], slice_relative_position=[2*y_beam_mid/y_domain_len,0])[0] + c*By_sim
+Ey_sim = ts.get_field(field='EypBx', iteration=0, slice_across=['x','z'], slice_relative_position=[2*x_beam_mid/x_domain_len,0])[0] - c*Bx_sim
+y = Bx_meta.y
+x = By_meta.x
+
+By_th = mu_0 * jz0 * (x-x_beam_mid) / 2.
+By_th[abs(x-x_beam_mid)>=R] = mu_0 * jz0 * R**2/(2*(x[abs(x-x_beam_mid)>R]-x_beam_mid))
+Ex_th = rho0 / eps_0 * (x-x_beam_mid) / 2.
+Ex_th[abs(x-x_beam_mid)>=R] = rho0 / eps_0 * R**2/(2*(x[abs(x-x_beam_mid)>R]-x_beam_mid))
+
+Bx_th = -mu_0 * jz0 * (y-y_beam_mid) / 2.
+Bx_th[abs(y-y_beam_mid)>=R] = -mu_0 * jz0 * R**2/(2*(y[abs(y-y_beam_mid)>R]-y_beam_mid))
+Ey_th = rho0 / eps_0 * (y-y_beam_mid) / 2.
+Ey_th[abs(y-y_beam_mid)>=R] = rho0 / eps_0 * R**2/(2*(y[abs(y-y_beam_mid)>R]-y_beam_mid))
+
+jz_th = np.ones_like(x) * jz0
+jz_th[abs(x-x_beam_mid)>=R] = 0.
+rho_th = np.ones_like(x) * rho0
+rho_th[abs(x-x_beam_mid)>=R] = 0.
+
+# Plot simulation result and theory
+if args.do_plot:
+    matplotlib.rcParams.update({'font.size': 14})
+    plt.figure(figsize=(12,4))
+
+    if not args.norm_units:
+        plt.subplot(131)
+        plt.plot(1.e6*y, Bx_sim, '+-', label='HiPACE++')
+        plt.plot(1.e6*y, Bx_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-50., 50.)
+        plt.xlabel('y (um)')
+        plt.ylabel('Bx (T)')
+
+        plt.subplot(132)
+        plt.plot(1.e6*x, By_sim, '+-', label='HiPACE++')
+        plt.plot(1.e6*x, By_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-50., 50.)
+        plt.xlabel('x (um)')
+        plt.ylabel('By (T)')
+
+        plt.subplot(133)
+        plt.plot(1.e6*x, jz_sim, '+-', label='HiPACE++')
+        plt.plot(1.e6*x, jz_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-50., 50.)
+        plt.xlabel('x (um)')
+        plt.ylabel('jz (A/m2)')
+    else:
+        plt.subplot(131)
+        plt.plot(y, Bx_sim, '+-', label='HiPACE++')
+        plt.plot(y, Bx_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-5., 5.)
+        plt.xlabel('kp y')
+        plt.ylabel('c Bx / E0')
+
+        plt.subplot(132)
+        plt.plot(x, By_sim, '+-', label='HiPACE++')
+        plt.plot(x, By_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-5., 5.)
+        plt.xlabel('kp x')
+        plt.ylabel('c By / E0')
+
+        plt.subplot(133)
+        plt.plot(x, jz_sim, '+-', label='HiPACE++')
+        plt.plot(x, jz_th, 'k--', label='theory')
+        plt.grid()
+        plt.legend()
+        plt.xlim(-5., 5.)
+        plt.xlabel('kp x')
+        plt.ylabel('jz /IA')
+
+    plt.tight_layout()
+
+    plt.savefig("beam_in_vacuum.png", bbox_inches="tight")
+
+# Assert that the simulation result is close enough to theory
+error_jz = np.sum((jz_sim-jz_th)**2) / np.sum((jz_th)**2)
+print("total relative error jz: " + str(error_jz) + " (tolerance = 0.1)")
+
+error_Bx = np.sum((Bx_sim-Bx_th)**2) / np.sum((Bx_th)**2)
+print("total relative error Bx: " + str(error_Bx) + " (tolerance = 0.005)")
+
+error_By = np.sum((By_sim-By_th)**2) / np.sum((By_th)**2)
+print("total relative error By: " + str(error_By) + " (tolerance = 0.015)")
+
+error_Ex = np.sum((Ex_sim-Ex_th)**2) / np.sum((Ex_th)**2)
+print("total relative error Ex: " + str(error_Ex) + " (tolerance = 0.015)")
+
+error_Ey = np.sum((Ey_sim-Ey_th)**2) / np.sum((Ey_th)**2)
+print("total relative error Ey: " + str(error_Ey) + " (tolerance = 0.005)")
+
+assert(error_jz < .1)
+assert(error_Bx < .005)
+assert(error_By < .015)
+assert(error_Ex < .015)
+assert(error_Ey < .005)
diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp
index f3e13e1287..4fe76965bb 100644
--- a/src/fields/Fields.cpp
+++ b/src/fields/Fields.cpp
@@ -553,7 +553,14 @@ Fields::SetBoundaryCondition (amrex::Vector<amrex::Geometry> const& geom, const
             {
                 const amrex::Real x = (i * dx + poff_x) * scale;
                 const amrex::Real y = (j * dy + poff_y) * scale;
-                if (x*x + y*y > cutoff_sq) return MultipoleTuple{}; //zero
+                if (x*x + y*y > cutoff_sq)  {
+                    return MultipoleTuple{0._rt,
+                        0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt,
+                        0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt,
+                        0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt,
+                        0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt
+                    };
+                }
                 amrex::Real s_v = arr_staging_area(i, j, k);
                 return GetMultipoleCoeffs(s_v, x, y);
             }
diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H
index 9a9a5e3c62..f6668bc3f6 100644
--- a/src/fields/OpenBoundary.H
+++ b/src/fields/OpenBoundary.H
@@ -1,3 +1,10 @@
+/* Copyright 2022
+ *
+ * This file is part of HiPACE++.
+ *
+ * Authors: AlexanderSinn
+ * License: BSD-3-Clause-LBNL
+ */
 #ifndef OPEN_BOUNDARY_H_
 #define OPEN_BOUNDARY_H_
 
diff --git a/src/utils/MakeOpenBoundary.py b/src/utils/MakeOpenBoundary.py
index 326f6e024c..6af28762b8 100644
--- a/src/utils/MakeOpenBoundary.py
+++ b/src/utils/MakeOpenBoundary.py
@@ -1,3 +1,12 @@
+#! /usr/bin/env python3
+
+# Copyright 2022
+#
+# This file is part of HiPACE++.
+#
+# Authors: AlexanderSinn
+# License: BSD-3-Clause-LBNL
+
 # code generator for nth order taylor expansion of the green's function used in OpenBoundary.H
 order = 18
 
diff --git a/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh b/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh
new file mode 100644
index 0000000000..124a83d1c1
--- /dev/null
+++ b/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh
@@ -0,0 +1,47 @@
+#! /usr/bin/env bash
+
+# Copyright 2022
+#
+# This file is part of HiPACE++.
+#
+# Authors: AlexanderSinn
+# License: BSD-3-Clause-LBNL
+
+
+# This file is part of the HiPACE++ test suite.
+# It runs a Hipace simulation for a can beam in vacuum with open boundary conditions,
+# and compares the result of the simulation to theory.
+
+# abort on first encounted error
+set -eu -o pipefail
+
+# Read input parameters
+HIPACE_EXECUTABLE=$1
+HIPACE_SOURCE_DIR=$2
+
+HIPACE_EXAMPLE_DIR=${HIPACE_SOURCE_DIR}/examples/beam_in_vacuum
+HIPACE_TEST_DIR=${HIPACE_SOURCE_DIR}/tests
+
+FILE_NAME=`basename "$0"`
+TEST_NAME="${FILE_NAME%.*}"
+
+# Run the simulation
+mpiexec -n 1 $HIPACE_EXECUTABLE $HIPACE_EXAMPLE_DIR/inputs_normalized \
+        plasmas.sort_bin_size = 8 \
+        hipace.depos_order_xy=0 \
+        geometry.is_periodic = false false false \
+        fields.extended_solve = true \
+        fields.open_boundary = true \
+        geometry.prob_lo     = -4.   -4.   -2.  \
+        geometry.prob_hi     =  4.    4.    2.  \
+        beam.position_mean = 2. -1. 0. \
+        hipace.file_prefix=$TEST_NAME
+
+# Compare the result with theory
+$HIPACE_EXAMPLE_DIR/analysis_open_boundary.py --normalized-units --output-dir=$TEST_NAME
+
+# Compare the results with checksum benchmark
+$HIPACE_TEST_DIR/checksum/checksumAPI.py \
+    --evaluate \
+    --file_name $TEST_NAME \
+    --test-name $TEST_NAME
diff --git a/tests/checksum/benchmarks_json/beam_in_vacuum_open_boundary.normalized.1Rank.json b/tests/checksum/benchmarks_json/beam_in_vacuum_open_boundary.normalized.1Rank.json
new file mode 100644
index 0000000000..40b88706ab
--- /dev/null
+++ b/tests/checksum/benchmarks_json/beam_in_vacuum_open_boundary.normalized.1Rank.json
@@ -0,0 +1,30 @@
+{
+  "beam": {
+    "charge": 308912.0,
+    "id": 47713466328,
+    "mass": 308912.0,
+    "ux": 0.0,
+    "uy": 0.0,
+    "uz": 308912000.0,
+    "w": 77228.0,
+    "x": 617824.0,
+    "y": 308912.0,
+    "z": 308912.0
+  },
+  "lev=0": {
+    "Bx": 150648.61843864,
+    "By": 151536.7415268,
+    "Bz": 0.0,
+    "ExmBy": 0.0,
+    "EypBx": 0.0,
+    "Ez": 0.0,
+    "Psi": 0.0,
+    "jx": 0.0,
+    "jx_beam": 0.0,
+    "jy": 0.0,
+    "jy_beam": 0.0,
+    "jz": 77227.961386029,
+    "jz_beam": 77227.961386029,
+    "rho": 0.0
+  }
+}
diff --git a/tests/checksum/reset_all_benchmarks.sh b/tests/checksum/reset_all_benchmarks.sh
index 23a246c45c..6eb426a700 100755
--- a/tests/checksum/reset_all_benchmarks.sh
+++ b/tests/checksum/reset_all_benchmarks.sh
@@ -341,3 +341,15 @@ then
                      --file_name ${build_dir}/bin/collisions.SI.1Rank \
                      --test-name collisions.SI.1Rank
 fi
+
+# beam_in_vacuum_open_boundary.normalized.1Rank
+if [[ $all_tests = true ]] || [[ $one_test_name = "beam_in_vacuum_open_boundary.normalized.1Rank" ]]
+then
+    cd $build_dir
+    ctest --output-on-failure -R beam_in_vacuum_open_boundary.normalized.1Rank \
+        || echo "ctest command failed, maybe just because checksums are different. Keep going"
+    cd $checksum_dir
+    ./checksumAPI.py --reset-benchmark \
+                     --file_name ${build_dir}/bin/beam_in_vacuum_open_boundary.normalized.1Rank \
+                     --test-name beam_in_vacuum_open_boundary.normalized.1Rank
+fi

From 8bb6429e0e81aec7016331e4e59910ee7679d59e Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 20:00:40 +0100
Subject: [PATCH 48/52] Test CI performance

---
 tests/checksum/backend/openpmd_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/checksum/backend/openpmd_backend.py b/tests/checksum/backend/openpmd_backend.py
index b6fb600f15..da8f212686 100644
--- a/tests/checksum/backend/openpmd_backend.py
+++ b/tests/checksum/backend/openpmd_backend.py
@@ -17,6 +17,7 @@ def __init__(self, filename):
         ''' Constructor: store the dataset object
         '''
 
+        # test
         self.dataset = OpenPMDTimeSeries(filename)
 
     def fields_list(self):

From bd6964f7d95527e057c07d41c296af57168f3dd8 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 20:23:50 +0100
Subject: [PATCH 49/52] change openpmd-viewer backend in checksum to h5py

---
 tests/checksum/backend/openpmd_backend.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/checksum/backend/openpmd_backend.py b/tests/checksum/backend/openpmd_backend.py
index da8f212686..363f322192 100644
--- a/tests/checksum/backend/openpmd_backend.py
+++ b/tests/checksum/backend/openpmd_backend.py
@@ -17,8 +17,7 @@ def __init__(self, filename):
         ''' Constructor: store the dataset object
         '''
 
-        # test
-        self.dataset = OpenPMDTimeSeries(filename)
+        self.dataset = OpenPMDTimeSeries(filename, backend='h5py')
 
     def fields_list(self):
         ''' Return the list of fields defined on the grid

From 43b3fb609aab959cc9cbcebb59a4eacdc3780b6e Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 21:06:20 +0100
Subject: [PATCH 50/52] change permissions

---
 examples/beam_in_vacuum/analysis_open_boundary.py      | 4 ++--
 tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh

diff --git a/examples/beam_in_vacuum/analysis_open_boundary.py b/examples/beam_in_vacuum/analysis_open_boundary.py
index 81da300f15..bfa5afae23 100644
--- a/examples/beam_in_vacuum/analysis_open_boundary.py
+++ b/examples/beam_in_vacuum/analysis_open_boundary.py
@@ -1,10 +1,10 @@
 #! /usr/bin/env python3
 
-# Copyright 2022
+# Copyright 2020-2022
 #
 # This file is part of HiPACE++.
 #
-# Authors: AlexanderSinn
+# Authors: AlexanderSinn, MaxThevenet, Severin Diederichs
 # License: BSD-3-Clause-LBNL
 
 
diff --git a/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh b/tests/beam_in_vacuum_open_boundary.normalized.1Rank.sh
old mode 100644
new mode 100755

From 02533a277b2619eaf5022100013f55e1749c0e89 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Sat, 26 Mar 2022 21:22:54 +0100
Subject: [PATCH 51/52] change permissions again

---
 examples/beam_in_vacuum/analysis_open_boundary.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 examples/beam_in_vacuum/analysis_open_boundary.py

diff --git a/examples/beam_in_vacuum/analysis_open_boundary.py b/examples/beam_in_vacuum/analysis_open_boundary.py
old mode 100644
new mode 100755

From 7033464599dd9a84c1ec6f48a01b5370eb572408 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <alexander.sinn@desy.de>
Date: Mon, 28 Mar 2022 11:56:16 +0200
Subject: [PATCH 52/52] add brackets

---
 src/particles/BeamParticleContainerInit.cpp | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/particles/BeamParticleContainerInit.cpp b/src/particles/BeamParticleContainerInit.cpp
index 55bdf8c189..547c01dcda 100644
--- a/src/particles/BeamParticleContainerInit.cpp
+++ b/src/particles/BeamParticleContainerInit.cpp
@@ -141,8 +141,9 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     // If particles are evenly spaced, discard particles
                     // individually if they are out of bounds
                     if (z >= a_zmax || z < a_zmin ||
-                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius)
-                        continue;
+                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius) {
+                            continue;
+                        }
                 } else {
                     // If particles are randomly spaced, discard particles
                     // if the cell is outside the domain
@@ -150,8 +151,9 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     amrex::Real yc = plo[1]+j*dx[1];
                     amrex::Real zc = plo[2]+k*dx[2];
                     if (zc >= a_zmax || zc < a_zmin ||
-                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius)
-                        continue;
+                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius) {
+                            continue;
+                        }
                 }
 
                 const amrex::Real density = get_density(x, y, z);
@@ -224,8 +226,9 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     // If particles are evenly spaced, discard particles
                     // individually if they are out of bounds
                     if (z >= a_zmax || z < a_zmin ||
-                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius)
-                        continue;
+                        ((x-x_mean)*(x-x_mean)+(y-y_mean)*(y-y_mean)) > a_radius*a_radius) {
+                            continue;
+                        }
                 } else {
                     // If particles are randomly spaced, discard particles
                     // if the cell is outside the domain
@@ -233,8 +236,9 @@ InitBeamFixedPPC (const amrex::IntVect& a_num_particles_per_cell,
                     amrex::Real yc = plo[1]+j*dx[1];
                     amrex::Real zc = plo[2]+k*dx[2];
                     if (zc >= a_zmax || zc < a_zmin ||
-                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius)
-                        continue;
+                        ((xc-x_mean)*(xc-x_mean)+(yc-y_mean)*(yc-y_mean)) > a_radius*a_radius) {
+                            continue;
+                        }
                 }
 
                 const amrex::Real density = get_density(x, y, z);