llvm
diff --git a/‎flang/docs/OpenMP-descriptor-management.md‎
Lines changed: 50 additions & 20 deletions b/‎flang/docs/OpenMP-descriptor-management.md‎
Lines changed: 50 additions & 20 deletions
diff --git a/‎flang/include/flang/Optimizer/CodeGen/CodeGenOpenMP.h‎
Lines changed: 2 additions & 3 deletions b/‎flang/include/flang/Optimizer/CodeGen/CodeGenOpenMP.h‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎flang/include/flang/Optimizer/Transforms/Passes.td‎
Lines changed: 3 additions & 2 deletions b/‎flang/include/flang/Optimizer/Transforms/Passes.td‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎flang/lib/Optimizer/CodeGen/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎flang/lib/Optimizer/CodeGen/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang/lib/Optimizer/Transforms/OMPDescriptorMapInfoGen.cpp‎
Lines changed: 24 additions & 17 deletions b/‎flang/lib/Optimizer/Transforms/OMPDescriptorMapInfoGen.cpp‎
Lines changed: 24 additions & 17 deletions
diff --git a/‎flang/test/Fir/convert-to-llvm-openmp-and-fir.fir‎
Lines changed: 19 additions & 0 deletions b/‎flang/test/Fir/convert-to-llvm-openmp-and-fir.fir‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎flang/test/Integration/OpenMP/map-types-and-sizes.f90‎
Lines changed: 1 addition & 1 deletion b/‎flang/test/Integration/OpenMP/map-types-and-sizes.f90‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang/test/Lower/OpenMP/allocatable-array-bounds.f90‎
Lines changed: 1 addition & 1 deletion b/‎flang/test/Lower/OpenMP/allocatable-array-bounds.f90‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎flang/test/Transforms/omp-descriptor-map-info-gen.fir‎
Lines changed: 27 additions & 0 deletions b/‎flang/test/Transforms/omp-descriptor-map-info-gen.fir‎
Lines changed: 27 additions & 0 deletions
@@ -8,17 +8,12 @@
 
 # OpenMP dialect: Fortran descriptor type mapping for offload
 
-The descriptor mapping for OpenMP currently works differently to the planned direction for OpenACC, however, 
-it is possible and would likely be ideal to align the method with OpenACC in the future. However, at least 
-currently the OpenMP specification is less descriptive and has less stringent rules around descriptor based
-types so does not require as complex a set of descriptor management rules (although, in certain cases 
-for the interim adopting OpenACC's rules where it makes sense could be useful).
-
 The initial method for mapping Fortran types tied to descriptors for OpenMP offloading is to treat these types 
 as a special case of OpenMP record type (C/C++ structure/class, Fortran derived type etc.) mapping as far as the 
 runtime is concerned. Where the box (descriptor information) is the holding container and the underlying 
 data pointer is contained within the container, and we must generate explicit maps for both the pointer member and
-the container. As an example, a small C++ program that is equivalent to the concept described:
+the container. As an example, a small C++ program that is equivalent to the concept described, with the 
+`mock_descriptor` class being representative of the class utilised for descriptors in Clang:
 
 ```C++
 struct mock_descriptor {
@@ -49,15 +44,15 @@ Currently, Flang will lower these descriptor types in the OpenMP lowering (lower
 to all other map types, generating an omp.MapInfoOp containing relevant information required for lowering
 the OpenMP dialect to LLVM-IR during the final stages of the MLIR lowering. However, after 
 the lowering to FIR/HLFIR has been performed an OpenMP dialect specific pass for Fortran, 
-OMPDescriptorMapInfoGenPass (Optimizer/OMPDescriptorMapInfoGen.cpp) will expand the 
-omp.MapInfoOp's containing descriptors (which currently will be a BoxType or BoxAddrOp) into multiple 
+`OMPDescriptorMapInfoGenPass` (Optimizer/OMPDescriptorMapInfoGen.cpp) will expand the 
+`omp.MapInfoOp`'s containing descriptors (which currently will be a `BoxType` or `BoxAddrOp`) into multiple 
 mappings, with one extra per pointer member in the descriptor that is supported on top of the original
 descriptor map operation. These pointers members are linked to the parent descriptor by adding them to 
 the member field of the original descriptor map operation, they are then inserted into the relevant map
-owning operation's (omp.TargetOp, omp.DataOp etc.) map operand list and in cases where the owning operation
-is IsolatedFromAbove, it also inserts them as BlockArgs to canonicalize the mappings and simplify lowering.
+owning operation's (`omp.TargetOp`, `omp.DataOp` etc.) map operand list and in cases where the owning operation
+is `IsolatedFromAbove`, it also inserts them as `BlockArgs` to canonicalize the mappings and simplify lowering.
 
-An example transformation by the OMPDescriptorMapInfoGenPass:
+An example transformation by the `OMPDescriptorMapInfoGenPass`:
 
 ```
 
@@ -83,13 +78,48 @@ omp.target map_entries(%13 -> %arg1, %14 -> %arg2, %15 -> %arg3 : !fir.llvm_ptr<
 
 In later stages of the compilation flow when the OpenMP dialect is being lowered to LLVM-IR these descriptor
 mappings are treated as if they were structure mappings with explicit member maps on the same directive as 
-their parent was mapped.
-
-This method is generic in the sense that the OpenMP diaelct doesn't need to understand that it is mapping a 
+their parent was mapped. 
+
+This implementation utilises the member field of the `map_info` operation to indicate that the pointer 
+descriptor elements which are contained in their own `map_info` operation are part of their respective 
+parent descriptor. This allows the descriptor containing the descriptor pointer member to be mapped
+as a composite entity during lowering, with the correct mappings being generated to tie them together,
+allowing the OpenMP runtime to map them correctly, attaching the pointer member to the parent
+structure so it can be accessed during execution. If we opt to not treat the descriptor as a single 
+entity we have issues with the member being correctly attached to the parent and being accessible,
+this can cause runtime segfaults on the device when we try to access the data through the parent. It
+may be possible to avoid this member mapping, treating them as individual entities, but treating a 
+composite mapping as an individual mapping could lead to problems such as the runtime taking 
+liberties with the mapping it usually wouldn't if it knew they were linked, we would also have to 
+be careful to maintian the correct order of mappings as we lower, if we misorder the maps, it'd be
+possible to overwrite already written data, e.g. if we write the descriptor data pointer first, and
+then the containing descriptor, we would overwrite the descriptor data pointer with the incorrect 
+address.
+
+This method is generic in the sense that the OpenMP dialect doesn't need to understand that it is mapping a 
 Fortran type containing a descriptor, it just thinks it's a record type from either Fortran or C++. However,
 it is a little rigid in how the descriptor mappings are handled as there is no specialisation or possibility
-to specialise the mappings for possible edge cases without poluting the dialect or lowering with further
-knowledge of Fortran and the FIR dialect. In the case that this kind of specialisation is required or 
-desired then the methodology described by OpenACC which utilises runtime functions to handle specialised mappings
-for dialects may be a more desirable approach to move towards. For the moment this method appears sufficient as 
-far as the OpenMP specification and current testing can show.
+to specialise the mappings for possible edge cases without polluting the dialect or lowering with further
+knowledge of Fortran and the FIR dialect.
+
+# OpenMP dialect differences from OpenACC dialect
+
+The descriptor mapping for OpenMP currently works differently to the planned direction for OpenACC, however, 
+it is possible and would likely be ideal to align the method with OpenACC in the future. 
+
+Currently the OpenMP specification is less descriptive and has less stringent rules around descriptor based
+types so does not require as complex a set of descriptor management rules as OpenACC (although, in certain 
+cases for the interim adopting OpenACC's rules where it makes sense could be useful). To handle the more 
+complex descriptor mapping rules OpenACC has opted to utilise a more runtime oriented approach, where 
+specialized runtime functions for handling descriptor mapping for OpenACC are created and these runtime 
+function handles are attatched to a special OpenACC dialect operation. When this operation is lowered it 
+will lower to the attatched OpenACC descriptor mapping runtime function. This sounds like it will work 
+(no implementation yet) similarly to some of the existing HLFIR operations which optionally lower to 
+Fortran runtime calls. 
+
+This methodology described by OpenACC which utilises runtime functions to handle specialised mappings allows
+more flexibility as a significant amount of the mapping logic can be moved into the runtime from the compiler.
+It also allows specialisation of the mapping for fortran specific types. This may be a desireable approach
+to take for OpenMP in the future, in particular if we find need to specialise mapping further for 
+descriptors or other Fortran types. However, for the moment the currently chosen implementation for OpenMP
+appears sufficient as far as the OpenMP specification and current testing can show.
@@ -1,5 +1,4 @@
-//=== Optimizer/CodeGen/CodeGenOpenMP.h - OpenMP code generation -*- C++
-//-*-===//
+//===------- Optimizer/CodeGen/CodeGenOpenMP.h - OpenMP codegen -*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -24,4 +23,4 @@ void populateOpenMPFIRToLLVMConversionPatterns(
 
 } // namespace fir
 
-#endif // FORTRAN_OPTIMIZER_CODEGEN_CODEGENOPENMP_H
+#endif // FORTRAN_OPTIMIZER_CODEGEN_CODEGENOPENMP_H
@@ -322,8 +322,9 @@ def OMPDescriptorMapInfoGenPass
     : Pass<"omp-descriptor-map-info-gen", "mlir::ModuleOp"> {
   let summary = "expands OpenMP MapInfo operations containing descriptors";
   let description = [{
-    Expands MapInfo operations containing descriptor types into multiple MapInfo's for each pointer element in 
-    the descriptor that requires explicit individual mapping by the OpenMP runtime.
+    Expands MapInfo operations containing descriptor types into multiple 
+    MapInfo's for each pointer element in the descriptor that requires 
+    explicit individual mapping by the OpenMP runtime.
   }];
   let constructor = "::fir::createOMPDescriptorMapInfoGenPass()";
   let dependentDialects = ["mlir::omp::OpenMPDialect"];
 
@@ -1,8 +1,8 @@
 add_flang_library(FIRCodeGen
-  CodeGenOpenMP.cpp
   BoxedProcedure.cpp
   CGOps.cpp
   CodeGen.cpp
+  CodeGenOpenMP.cpp
   PreCGRewrite.cpp
   TBAABuilder.cpp
   Target.cpp
 
@@ -27,7 +27,6 @@ class OMPDescriptorMapInfoGenPass
   void genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
                                fir::FirOpBuilder &builder,
                                mlir::Operation *target) {
-    llvm::SmallVector<mlir::Value> descriptorBaseAddrMembers;
     mlir::Location loc = builder.getUnknownLoc();
     mlir::Value descriptor = op.getVarPtr();
 
@@ -60,7 +59,8 @@ class OMPDescriptorMapInfoGenPass
     mlir::Value baseAddrAddr = builder.create<fir::BoxOffsetOp>(
         loc, descriptor, fir::BoxFieldAttr::base_addr);
 
-    descriptorBaseAddrMembers.push_back(builder.create<mlir::omp::MapInfoOp>(
+    // Member of the descriptor pointing at the allocated data
+    mlir::Value baseAddr = builder.create<mlir::omp::MapInfoOp>(
         loc, baseAddrAddr.getType(), baseAddrAddr,
         llvm::cast<mlir::omp::PointerLikeType>(
             fir::unwrapRefType(baseAddrAddr.getType()))
@@ -70,38 +70,36 @@ class OMPDescriptorMapInfoGenPass
                                op.getMapType().value()),
         builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
             mlir::omp::VariableCaptureKind::ByRef),
-        builder.getStringAttr("")));
+        builder.getStringAttr("") /*name*/);
 
     // TODO: map the addendum segment of the descriptor, similarly to the
     // above base address/data pointer member.
 
     op.getVarPtrMutable().assign(descriptor);
     op.setVarType(fir::unwrapRefType(descriptor.getType()));
-    op.getMembersMutable().append(descriptorBaseAddrMembers);
+    op.getMembersMutable().append(baseAddr);
     op.getBoundsMutable().assign(llvm::SmallVector<mlir::Value>{});
 
     if (auto mapClauseOwner =
             llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
       llvm::SmallVector<mlir::Value> newMapOps;
-      for (size_t i = 0; i < mapClauseOwner.getMapOperands().size(); ++i) {
-        if (mapClauseOwner.getMapOperands()[i] == op) {
+      mlir::OperandRange mapOperandsArr = mapClauseOwner.getMapOperands();
+
+      for (size_t i = 0; i < mapOperandsArr.size(); ++i) {
+        if (mapOperandsArr[i] == op) {
           // Push new implicit maps generated for the descriptor.
-          newMapOps.push_back(descriptorBaseAddrMembers[0]);
+          newMapOps.push_back(baseAddr);
 
           // for TargetOp's which have IsolatedFromAbove we must align the
           // new additional map operand with an appropriate BlockArgument,
           // as the printing and later processing currently requires a 1:1
           // mapping of BlockArgs to MapInfoOp's at the same placement in
           // each array (BlockArgs and MapOperands).
-          if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
-            targetOp.getRegion().insertArgument(
-                i, descriptorBaseAddrMembers[0].getType(), loc);
-          }
-
-          newMapOps.push_back(mapClauseOwner.getMapOperands()[i]);
-        } else {
-          newMapOps.push_back(mapClauseOwner.getMapOperands()[i]);
+          if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target))
+            targetOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
         }
+
+        newMapOps.push_back(mapOperandsArr[i]);
       }
 
       mapClauseOwner.getMapOperandsMutable().assign(newMapOps);
@@ -121,8 +119,17 @@ class OMPDescriptorMapInfoGenPass
           mlir::isa_and_present<fir::BoxAddrOp>(
               op.getVarPtr().getDefiningOp())) {
         builder.setInsertionPoint(op);
-        // Currently a MapInfoOp argument can only show up on a single target
-        // user so we can retrieve and use the first user.
+        // TODO: Currently only supports a single user for the MapInfoOp, this
+        // is fine for the moment as the Fortran Frontend will generate a
+        // new MapInfoOp per Target operation for the moment. However, when/if
+        // we optimise/cleanup the IR, it likely isn't too difficult to
+        // extend this function, it would require some modification to create a
+        // single new MapInfoOp per new MapInfoOp generated and share it across
+        // all users appropriately, making sure to only add a single member link
+        // per new generation for the original originating descriptor MapInfoOp.
+        assert(llvm::hasSingleElement(op->getUsers()) &&
+               "OMPDescriptorMapInfoGen currently only supports single users "
+               "of a MapInfoOp");
         genDescriptorMemberMaps(op, builder, *op->getUsers().begin());
       }
     });
 
@@ -893,3 +893,22 @@ func.func @omp_critical_() {
   }
   return
 }
+
+// -----
+
+// CHECK-LABEL:  llvm.func @omp_map_info_descriptor_type_conversion
+// CHECK-SAME:   %[[ARG_0:.*]]: !llvm.ptr)
+
+func.func @omp_map_info_descriptor_type_conversion(%arg0 : !fir.ref<!fir.box<!fir.heap<i32>>>) {
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG_0]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+  %0 = fir.box_offset %arg0 base_addr : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.llvm_ptr<!fir.ref<i32>>
+  // CHECK: %[[MEMBER_MAP:.*]] = omp.map_info var_ptr(%[[GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+  %1 = omp.map_info var_ptr(%0 : !fir.llvm_ptr<!fir.ref<i32>>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
+  // CHECK: %[[DESC_MAP:.*]] = omp.map_info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, delete) capture(ByRef) members(%[[MEMBER_MAP]] : !llvm.ptr) -> !llvm.ptr {name = ""}
+  %2 = omp.map_info var_ptr(%arg0 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) map_clauses(always, delete) capture(ByRef) members(%1 : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>> {name = ""}
+  // CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]] : !llvm.ptr) 
+  omp.target_exit_data   map_entries(%2 : !fir.ref<!fir.box<!fir.heap<i32>>>)
+  return 
+}
+
+// -----
@@ -109,4 +109,4 @@ end subroutine mapType_char
 !CHECK: %[[SIZE_DIFF:.*]] = sub i64 %[[ALLOCA_GEP_INT]], %[[ALLOCA_INT]]
 !CHECK: %[[DIV:.*]] = sdiv exact i64 %[[SIZE_DIFF]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 !CHECK: %[[OFFLOAD_SIZE_ARR:.*]] = getelementptr inbounds [3 x i64], ptr %.offload_sizes, i32 0, i32 0
-!CHECK: store i64 %[[DIV]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
+!CHECK: store i64 %[[DIV]], ptr %[[OFFLOAD_SIZE_ARR]], align 8
@@ -114,4 +114,4 @@ subroutine call_assumed_shape_and_size_array
     allocate(arr_read_write(20))
     call assumed_size_array(arr_read_write(10:20))
     deallocate(arr_read_write)
-end subroutine call_assumed_shape_and_size_array
+end subroutine call_assumed_shape_and_size_array
@@ -0,0 +1,27 @@
+// RUN: fir-opt --omp-descriptor-map-info-gen %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+  func.func @test_descriptor_expansion_pass() {
+    %0 = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "test", uniq_name = "_QFEtest"}
+    %1 = fir.zero_bits !fir.heap<i32>
+    %2 = fir.embox %1 : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+    fir.store %2 to %0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+    %3:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEtest"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+    %4 = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFEtest.alloc"}
+    %5 = fir.embox %4 : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+    fir.store %5 to %3#1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+    %6 = omp.map_info var_ptr(%3#1 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<i32>>> {name = "test"}
+    omp.target map_entries(%6 -> %arg0 : !fir.ref<!fir.box<!fir.heap<i32>>>) {
+    ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>):
+      omp.terminator
+    }
+    return 
+  }
+}
+
+// CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEtest"} : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> (!fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.heap<i32>>>)
+// CHECK: %[[BASE_ADDR_OFF:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.llvm_ptr<!fir.ref<i32>>
+// CHECK: %[[DESC_MEMBER_MAP:.*]] = omp.map_info var_ptr(%[[BASE_ADDR_OFF]] : !fir.llvm_ptr<!fir.ref<i32>>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
+// CHECK: %[[DESC_PARENT_MAP:.*]] = omp.map_info var_ptr(%[[DECLARE]]#1 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[DESC_MEMBER_MAP]] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>> {name = "test"}
+// CHECK: omp.target map_entries(%[[DESC_MEMBER_MAP]] -> %{{.*}}, %[[DESC_PARENT_MAP]] -> %{{.*}} : !fir.llvm_ptr<!fir.ref<i32>>, !fir.ref<!fir.box<!fir.heap<i32>>>) {
+// CHECK: ^bb0(%{{.*}}: !fir.llvm_ptr<!fir.ref<i32>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<i32>>>):