AMReX-Codes · ax3l · Feb 8, 2023 · Oct 17, 2022 · Feb 4, 2023 · Feb 7, 2023
diff --git a/src/Base/Array4.cpp b/src/Base/Array4.cpp
@@ -159,7 +159,7 @@ void make_Array4(py::module &m, std::string typestr)
         // __array_function__ feature requires NumPy 1.16 or later.
 
 
-        // Nvidia GPUs: __cuda_array_interface__ v2
+        // Nvidia GPUs: __cuda_array_interface__ v3
         // https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html
         .def_property_readonly("__cuda_array_interface__", [](Array4<T> const & a4) {
             auto d = array_interface(a4);

diff --git a/src/Base/PODVector.cpp b/src/Base/PODVector.cpp
@@ -16,11 +16,33 @@
 namespace py = pybind11;
 using namespace amrex;
 
+namespace
+{
+    /** CPU: __array_interface__ v3
+     *
+     * https://numpy.org/doc/stable/reference/arrays.interface.html
+     */
+    template <class T, class Allocator = std::allocator<T> >
+    py::dict
+    array_interface(PODVector<T, Allocator> const & podvector)
+    {
+        auto d = py::dict();
+        bool const read_only = false;
+        d["data"] = py::make_tuple(std::intptr_t(podvector.dataPtr()), read_only);
+        d["shape"] = py::make_tuple(podvector.size());
+        d["strides"] = py::none();
+        d["typestr"] = py::format_descriptor<T>::format();
+        d["version"] = 3;
+        return d;
+    }
+}
+
 template <class T, class Allocator = std::allocator<T> >
-void make_PODVector(py::module &m, std::string typestr)
+void make_PODVector(py::module &m, std::string typestr, std::string allocstr)
 {
-    using PODVector_type=PODVector<T, Allocator>;
-    auto const podv_name = std::string("PODVector_").append(typestr);
+    using PODVector_type = PODVector<T, Allocator>;
+    auto const podv_name = std::string("PODVector_").append(typestr)
+                           .append("_").append(allocstr);
 
     py::class_<PODVector_type>(m, podv_name.c_str())
         .def("__repr__",
@@ -60,12 +82,26 @@ void make_PODVector(py::module &m, std::string typestr)
         // swap
 
         .def_property_readonly("__array_interface__", [](PODVector_type const & podvector) {
-            auto d = py::dict();
-            bool const read_only = false;
-            d["data"] = py::make_tuple(std::intptr_t(podvector.dataPtr()), read_only);
-            d["shape"] = py::make_tuple(podvector.size());
-            d["strides"] = py::none();
-            d["typestr"] = py::format_descriptor<T>::format();
+            return array_interface(podvector);
+        })
+        .def_property_readonly("__cuda_array_interface__", [](PODVector_type const & podvector) {
+            // Nvidia GPUs: __cuda_array_interface__ v3
+            // https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html
+            auto d = array_interface(podvector);
+
+            // data:
+            // Because the user of the interface may or may not be in the same context, the most common case is to use cuPointerGetAttribute with CU_POINTER_ATTRIBUTE_DEVICE_POINTER in the CUDA driver API (or the equivalent CUDA Runtime API) to retrieve a device pointer that is usable in the currently active context.
+            // TODO For zero-size arrays, use 0 here.
+
+            // None or integer
+            // An optional stream upon which synchronization must take place at the point of consumption, either by synchronizing on the stream or enqueuing operations on the data on the given stream. Integer values in this entry are as follows:
+            //   0: This is disallowed as it would be ambiguous between None and the default stream, and also between the legacy and per-thread default streams. Any use case where 0 might be given should either use None, 1, or 2 instead for clarity.
+            //   1: The legacy default stream.
+            //   2: The per-thread default stream.
+            //   Any other integer: a cudaStream_t represented as a Python integer.
+            //   When None, no synchronization is required.
+            d["stream"] = py::none();
+
             d["version"] = 3;
             return d;
         })
@@ -75,6 +111,20 @@ void make_PODVector(py::module &m, std::string typestr)
     ;
 }
 
+template <class T>
+void make_PODVector(py::module &m, std::string typestr)
+{
+    // see Src/Base/AMReX_GpuContainers.H
+    make_PODVector<T, std::allocator<T>> (m, typestr, "std");
+    make_PODVector<T, amrex::ArenaAllocator<T>> (m, typestr, "arena");
+    make_PODVector<T, amrex::PinnedArenaAllocator<T>> (m, typestr, "pinned");
+#ifdef AMREX_USE_GPU
+    make_PODVector<T, amrex::DeviceArenaAllocator<T>> (m, typestr, "device");
+    make_PODVector<T, amrex::ManagedArenaAllocator<T>> (m, typestr, "managed");
+    make_PODVector<T, amrex::AsyncArenaAllocator<T>> (m, typestr, "async");
+#endif
+}
+
 void init_PODVector(py::module& m) {
     make_PODVector<ParticleReal> (m, "real");
     make_PODVector<int> (m, "int");

diff --git a/src/Base/Vector.cpp b/src/Base/Vector.cpp
@@ -20,11 +20,31 @@
 namespace py = pybind11;
 using namespace amrex;
 
+namespace
+{
+    /** CPU: __array_interface__ v3
+     *
+     * https://numpy.org/doc/stable/reference/arrays.interface.html
+     */
+    template <class T, class Allocator = std::allocator<T> >
+    py::dict
+    array_interface(Vector<T, Allocator> const & vector)
+    {
+        auto d = py::dict();
+        bool const read_only = false;
+        d["data"] = py::make_tuple(std::intptr_t(vector.dataPtr()), read_only);
+        d["shape"] = py::make_tuple(vector.size());
+        d["strides"] = py::none();
+        d["typestr"] = py::format_descriptor<T>::format();
+        d["version"] = 3;
+        return d;
+    }
+}
 
 template <class T, class Allocator = std::allocator<T> >
 void make_Vector(py::module &m, std::string typestr)
 {
-    using Vector_type=Vector<T, Allocator>;
+    using Vector_type = Vector<T, Allocator>;
     auto const v_name = std::string("Vector_").append(typestr);
 
     py::class_<Vector_type>(m, v_name.c_str())
@@ -47,15 +67,30 @@ void make_Vector(py::module &m, std::string typestr)
         .def("size", &Vector_type::size)
 
         .def_property_readonly("__array_interface__", [](Vector_type const & vector) {
-            auto d = py::dict();
-            bool const read_only = false;
-            d["data"] = py::make_tuple(std::intptr_t(vector.dataPtr()), read_only);
-            d["shape"] = py::make_tuple(vector.size());
-            d["strides"] = py::none();
-            d["typestr"] = py::format_descriptor<T>::format();
+            return array_interface(vector);
+        })
+        .def_property_readonly("__cuda_array_interface__", [](Vector_type const & vector) {
+            // Nvidia GPUs: __cuda_array_interface__ v3
+            // https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html
+            auto d = array_interface(vector);
+
+            // data:
+            // Because the user of the interface may or may not be in the same context, the most common case is to use cuPointerGetAttribute with CU_POINTER_ATTRIBUTE_DEVICE_POINTER in the CUDA driver API (or the equivalent CUDA Runtime API) to retrieve a device pointer that is usable in the currently active context.
+            // TODO For zero-size arrays, use 0 here.
+
+            // None or integer
+            // An optional stream upon which synchronization must take place at the point of consumption, either by synchronizing on the stream or enqueuing operations on the data on the given stream. Integer values in this entry are as follows:
+            //   0: This is disallowed as it would be ambiguous between None and the default stream, and also between the legacy and per-thread default streams. Any use case where 0 might be given should either use None, 1, or 2 instead for clarity.
+            //   1: The legacy default stream.
+            //   2: The per-thread default stream.
+            //   Any other integer: a cudaStream_t represented as a Python integer.
+            //   When None, no synchronization is required.
+            d["stream"] = py::none();
+
             d["version"] = 3;
             return d;
         })
+
         // setter & getter
         .def("__setitem__", [](Vector_type & vector, int const idx, T const value){ vector[idx] = value; })
         .def("__getitem__", [](Vector_type & v, int const idx){ return v[idx]; })

diff --git a/src/Particle/ArrayOfStructs.cpp b/src/Particle/ArrayOfStructs.cpp
@@ -14,16 +14,63 @@
 namespace py = pybind11;
 using namespace amrex;
 
+namespace
+{
+    /** CPU: __array_interface__ v3
+     *
+     * https://numpy.org/doc/stable/reference/arrays.interface.html
+     */
+    template <int NReal, int NInt,
+              template<class> class Allocator=DefaultAllocator>
+    py::dict
+    array_interface(ArrayOfStructs<NReal, NInt, Allocator> const & aos)
+    {
+        using ParticleType = Particle<NReal, NInt>;
+        using RealType     = typename ParticleType::RealType;
+
+        auto d = py::dict();
+        bool const read_only = false;
+        d["data"] = py::make_tuple(std::intptr_t(aos.dataPtr()), read_only);
+        d["shape"] = py::make_tuple(aos.size());
+        d["strides"] = py::make_tuple(sizeof(ParticleType));
+        d["typestr"] = "|V" + std::to_string(sizeof(ParticleType));
+        py::list descr;
+        descr.append(py::make_tuple("x", py::format_descriptor<RealType>::format()));
+#if (AMREX_SPACEDIM >= 2)
+        descr.append(py::make_tuple("y", py::format_descriptor<RealType>::format()));
+#endif
+#if (AMREX_SPACEDIM >= 3)
+        descr.append(py::make_tuple("z", py::format_descriptor<RealType>::format()));
+#endif
+        if (NReal > 0) {
+            for(int ii=0; ii < NReal; ii++) {
+                descr.append(py::make_tuple("rdata_"+std::to_string(ii),py::format_descriptor<RealType>::format()));
+            }
+        }
+        descr.append(py::make_tuple("cpuid", py::format_descriptor<uint64_t>::format()) );
+        if (NInt > 0) {
+            for(int ii=0; ii < NInt; ++ii) {
+                descr.append(py::make_tuple("idata_"+std::to_string(ii),py::format_descriptor<int>::format()));
+            }
+        }
+
+        d["descr"] = descr;
+        d["version"] = 3;
+        return d;
+    }
+}
 
 template <int NReal, int NInt,
           template<class> class Allocator=DefaultAllocator>
-void make_ArrayOfStructs(py::module &m)
+void make_ArrayOfStructs(py::module &m, std::string allocstr)
 {
-    using AOSType = ArrayOfStructs<NReal, NInt>;
+    using AOSType = ArrayOfStructs<NReal, NInt, Allocator>;
     using ParticleType  = Particle<NReal, NInt>;
-    using RealType      = typename ParticleType::RealType;
 
-    auto const aos_name = std::string("ArrayOfStructs_").append(std::to_string(NReal) + "_" + std::to_string(NInt));
+    auto const aos_name = std::string("ArrayOfStructs_")
+                          .append(std::to_string(NReal)).append("_")
+                          .append(std::to_string(NInt)).append("_")
+                          .append(allocstr);
     py::class_<AOSType>(m, aos_name.c_str())
         .def(py::init())
         // TODO:
@@ -41,35 +88,29 @@ void make_ArrayOfStructs(py::module &m)
         .def("push_back", &AOSType::push_back)
         .def("pop_back", &AOSType::pop_back)
         .def("back", py::overload_cast<>(&AOSType::back),"get back member.  Problem!!!!! this is perfo")
+
         // setter & getter
         .def_property_readonly("__array_interface__", [](AOSType const & aos) {
-            auto d = py::dict();
-            bool const read_only = false;
-            d["data"] = py::make_tuple(std::intptr_t(aos.dataPtr()), read_only);
-            d["shape"] = py::make_tuple(aos.size());
-            d["strides"] = py::make_tuple(sizeof(ParticleType));
-            d["typestr"] = "|V" + std::to_string(sizeof(ParticleType));
-            py::list descr;
-            descr.append(py::make_tuple("x", py::format_descriptor<RealType>::format()));
-#if (AMREX_SPACEDIM >= 2)
-            descr.append(py::make_tuple("y", py::format_descriptor<RealType>::format()));
-#endif
-#if (AMREX_SPACEDIM >= 3)
-            descr.append(py::make_tuple("z", py::format_descriptor<RealType>::format()));
-#endif
-            if (NReal > 0) {
-                for(int ii=0; ii < NReal; ii++) {
-                    descr.append(py::make_tuple("rdata_"+std::to_string(ii),py::format_descriptor<RealType>::format()));
-                }
-            }
-            descr.append(py::make_tuple("cpuid", py::format_descriptor<uint64_t>::format()) );
-            if (NInt > 0) {
-                for(int ii=0; ii < NInt; ++ii) {
-                    descr.append(py::make_tuple("idata_"+std::to_string(ii),py::format_descriptor<int>::format()));
-                }
-            }
+            return array_interface(aos);
+        })
+        .def_property_readonly("__cuda_array_interface__", [](AOSType const & aos) {
+            // Nvidia GPUs: __cuda_array_interface__ v3
+            // https://numba.readthedocs.io/en/latest/cuda/cuda_array_interface.html
+            auto d = array_interface(aos);
+
+            // data:
+            // Because the user of the interface may or may not be in the same context, the most common case is to use cuPointerGetAttribute with CU_POINTER_ATTRIBUTE_DEVICE_POINTER in the CUDA driver API (or the equivalent CUDA Runtime API) to retrieve a device pointer that is usable in the currently active context.
+            // TODO For zero-size arrays, use 0 here.
+
+            // None or integer
+            // An optional stream upon which synchronization must take place at the point of consumption, either by synchronizing on the stream or enqueuing operations on the data on the given stream. Integer values in this entry are as follows:
+            //   0: This is disallowed as it would be ambiguous between None and the default stream, and also between the legacy and per-thread default streams. Any use case where 0 might be given should either use None, 1, or 2 instead for clarity.
+            //   1: The legacy default stream.
+            //   2: The per-thread default stream.
+            //   Any other integer: a cudaStream_t represented as a Python integer.
+            //   When None, no synchronization is required.
+            d["stream"] = py::none();
 
-            d["descr"] = descr;
             d["version"] = 3;
             return d;
         })
@@ -79,9 +120,22 @@ void make_ArrayOfStructs(py::module &m)
     ;
 }
 
+template <int NReal, int NInt>
+void make_ArrayOfStructs(py::module &m)
+{
+    // see Src/Base/AMReX_GpuContainers.H
+    make_ArrayOfStructs<NReal, NInt, std::allocator> (m, "std");
+    make_ArrayOfStructs<NReal, NInt, amrex::ArenaAllocator> (m, "arena");
+    make_ArrayOfStructs<NReal, NInt, amrex::PinnedArenaAllocator> (m, "pinned");
+#ifdef AMREX_USE_GPU
+    make_ArrayOfStructs<NReal, NInt, amrex::DeviceArenaAllocator> (m, "device");
+    make_ArrayOfStructs<NReal, NInt, amrex::ManagedArenaAllocator> (m, "managed");
+    make_ArrayOfStructs<NReal, NInt, amrex::AsyncArenaAllocator> (m, "async");
+#endif
+}
+
 void init_ArrayOfStructs(py::module& m) {
-    make_ArrayOfStructs< 0, 0> (m);
-    make_ArrayOfStructs< 7, 0> (m);
-    make_ArrayOfStructs< 1, 1> (m);
-    make_ArrayOfStructs< 2, 1> (m);
+    make_ArrayOfStructs<0, 0> (m);  // WarpX 22.07, ImpactX 22.07, HiPACE++ 22.07
+    make_ArrayOfStructs<1, 1> (m);  // test in ParticleContainer
+    make_ArrayOfStructs<2, 1> (m);  // test
 }