Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H

#include "llvm/ADT/SetVector.h"
#include "llvm/Frontend/Atomic/Atomic.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
Expand Down Expand Up @@ -2279,6 +2280,7 @@ class OpenMPIRBuilder {
PostOutlineCBTy PostOutlineCB;
BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
SetVector<Value *> Inputs, Outputs;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I know the other members don't have documentation but I think it would be good to add something because I don't think the use of these is immediately obvious.
  2. I don't think we can use Outputs. IIRC there's an assertion somewhere that there are no live out values. Better to remove it.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Added a comment about the use of Inputs
  2. We still need some definition of Outputs somewhere as the CodeExtractor's extractCodeRegion expects there to be a SetVector for both Inputs and Outputs. The API gives 2 options, one where you just pass the CEAC value, and another that includes the inputs and outputs. I am happy to exclude the Outputs from the OutlineInfo struct, but there will need to be a SetVector made before extracting the code region from OpenMPIRBuilder::finalize.


/// Collect all blocks in between EntryBB and ExitBB in both the given
/// vector and set.
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Transforms/Utils/CodeExtractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ class CodeExtractorAnalysisCache {
///
/// \param CEAC - Cache to speed up operations for the CodeExtractor when
/// hoisting, and extracting lifetime values and assumes.
/// \param Inputs [out] - filled with values marked as inputs to the
/// \param Inputs [in/out] - filled with values marked as inputs to the
/// newly outlined function.
/// \param Outputs [out] - filled with values marked as outputs to the
/// newly outlined function.
Expand Down
170 changes: 102 additions & 68 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,18 +395,19 @@ Value *createFakeIntVal(IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
llvm::SmallVectorImpl<Instruction *> &ToBeDeleted,
OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
const Twine &Name = "", bool AsPtr = true) {
const Twine &Name = "", bool AsPtr = true, bool Is64Bit = false) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const Twine &Name = "", bool AsPtr = true, bool Is64Bit = false) {
const Twine &Name = "", bool AsPtr = true, IntegerType *IntTy) {
Builder.restoreIP(OuterAllocaIP);
IntTy = IntTy ? IntTy : Builder.getInt32Ty();

More flexible.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Builder.restoreIP(OuterAllocaIP);
IntegerType *IntTy = Is64Bit ? Builder.getInt64Ty() : Builder.getInt32Ty();
Instruction *FakeVal;
AllocaInst *FakeValAddr =
Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr");
Builder.CreateAlloca(IntTy, nullptr, Name + ".addr");
ToBeDeleted.push_back(FakeValAddr);

if (AsPtr) {
FakeVal = FakeValAddr;
} else {
FakeVal =
Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val");
Builder.CreateLoad(IntTy, FakeValAddr, Name + ".val");
ToBeDeleted.push_back(FakeVal);
}

Expand All @@ -415,10 +416,10 @@ Value *createFakeIntVal(IRBuilderBase &Builder,
Instruction *UseFakeVal;
if (AsPtr) {
UseFakeVal =
Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use");
Builder.CreateLoad(IntTy, FakeVal, Name + ".use");
} else {
UseFakeVal =
cast<BinaryOperator>(Builder.CreateAdd(FakeVal, Builder.getInt32(10)));
cast<BinaryOperator>(Builder.CreateAdd(FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do this from IntTy with llvm::ConstantInt::get or IRBuilderBase::getIntN

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done with ConstantInt::get

}
ToBeDeleted.push_back(UseFakeVal);
return FakeVal;
Expand Down Expand Up @@ -751,7 +752,7 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
for (auto *V : OI.ExcludeArgsFromAggregate)
Extractor.excludeArgFromAggregate(V);

Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);

// Forward target-cpu, target-features attributes to the outlined function.
auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu");
Expand Down Expand Up @@ -1979,22 +1980,38 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
OI.ExitBB = TaskloopExitBB;

// Add the thread ID argument.
SmallVector<Instruction *, 4> ToBeDeleted;
SmallVector<Instruction *> ToBeDeleted;
// dummy instruction to be used as a fake argument
OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false));
Value *FakeLB = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"lb", false, true);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"lb", false, true);
"lb", /*AsPtr=*/false, Builder.getInt64Ty());

It isn't obvious what the bool values mean without some extra help

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Value *FakeUB = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"ub", false, true);
Value *FakeStep = createFakeIntVal(Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"step", false, true);
/* For Taskloop, we want to force the bounds being the first 3 inputs in the aggregate struct*/
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: llvm style is to use C++ style comments

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

OI.Inputs.insert(FakeLB);
OI.Inputs.insert(FakeUB);
OI.Inputs.insert(FakeStep);

OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
TaskloopAllocaBB, CLI, Loc,
ToBeDeleted](Function &OutlinedFn) mutable {
ToBeDeleted, FakeLB, FakeUB, FakeStep](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
"there must be a single user for the outlined function");
CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());

// HasShareds is true if any variables are captured in the outlined region,
// false otherwise.
bool HasShareds = StaleCI->arg_size() > 1;
/* Create the casting for the Bounds Values that can be used when outlining to replace the uses of the fakes with real values */
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: comment style

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

BasicBlock *CodeReplBB = StaleCI->getParent();
IRBuilderBase::InsertPoint CurrentIp = Builder.saveIP();
Builder.SetInsertPoint(CodeReplBB->getFirstInsertionPt());
Value *CastedLBVal = Builder.CreateIntCast(LBVal, Builder.getInt64Ty(), true, "lb64");
Value *CastedUBVal = Builder.CreateIntCast(UBVal, Builder.getInt64Ty(), true, "ub64");
Value *CastedStepVal = Builder.CreateIntCast(StepVal, Builder.getInt64Ty(), true, "step64");
Builder.restoreIP(CurrentIp);

Builder.SetInsertPoint(StaleCI);

// Gather the arguments for emitting the runtime call for
Expand All @@ -2015,20 +2032,17 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
Value *TaskSize = Builder.getInt64(
divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we are utilizing structArg to store the the loopbound values, the struct __OMP_STRUCT_TYPE(Taskloop, kmp_task_info, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr, Int64, Int64, Int64) is no longer needed.

The required size for storing loop bounds can be reserved in kmp_task_t by strutArg itself.


Value *SharedsSize = Builder.getInt64(0);
if (HasShareds) {
AllocaInst *ArgStructAlloca =
dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
assert(ArgStructAlloca &&
"Unable to find the alloca instruction corresponding to arguments "
"for extracted function");
StructType *ArgStructType =
dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
assert(ArgStructType && "Unable to find struct type corresponding to "
"arguments for extracted function");
SharedsSize =
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
}
AllocaInst *ArgStructAlloca =
dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
assert(ArgStructAlloca &&
"Unable to find the alloca instruction corresponding to arguments "
"for extracted function");
StructType *ArgStructType =
dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
assert(ArgStructType && "Unable to find struct type corresponding to "
"arguments for extracted function");
Value *SharedsSize =
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));

// Emit the @__kmpc_omp_task_alloc runtime call
// The runtime call returns a pointer to an area where the task captured
Expand All @@ -2038,31 +2052,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
/*task_func=*/&OutlinedFn});

Value *Shareds = StaleCI->getArgOperand(1);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be moved above line 2035 to make that section clearer

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've moved this above the declaration for ArgStructAlloca so this can use the Shareds variable rather than calling the getArgOperand function

Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
SharedsSize);
// Get the pointer to loop lb, ub, step from task ptr
// and set up the lowerbound,upperbound and step values
llvm::Value *lb =
Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5);
Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty());
Builder.CreateStore(LbVal_ext, lb);

llvm::Value *ub =
Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6);
Value *UbVal_ext = Builder.CreateSExt(UBVal, Builder.getInt64Ty());
Builder.CreateStore(UbVal_ext, ub);

llvm::Value *step =
Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7);
Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty());
Builder.CreateStore(Step_ext, step);
llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step);
llvm::Value *Lb = Builder.CreateStructGEP(ArgStructType, TaskShareds, 0);
Builder.CreateStore(CastedLBVal, Lb);
Copy link

@kaviya2510 kaviya2510 Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Builder.CreateStore(CastedLBVal, Lb);
auto *Idx0 = Builder.getInt32(0);
llvm::Value *Lb = Builder.CreateGEP(ArgStructType, TaskShareds, {Idx0, Builder.getInt32(0)});

The values of lb,ub and step are already populated in StructArg. You can directly access it and pass the pointer to the runtime call __kmpc_taskloop(...)


if (HasShareds) {
Value *Shareds = StaleCI->getArgOperand(1);
Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
SharedsSize);
}
llvm::Value *Ub = Builder.CreateStructGEP(ArgStructType, TaskShareds, 1);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
llvm::Value *Ub = Builder.CreateStructGEP(ArgStructType, TaskShareds, 1);
llvm::Value *Ub =Builder.CreateGEP(ArgStructType, TaskShareds, {Idx0, Builder.getInt32(1)});

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GEP to StructArg and get the upper bound value.

Builder.CreateStore(CastedUBVal, Ub);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here. Remove the store instruction.


llvm::Value *Step = Builder.CreateStructGEP(ArgStructType, TaskShareds, 2);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
llvm::Value *Step = Builder.CreateStructGEP(ArgStructType, TaskShareds, 2);
llvm::Value *Step =Builder.CreateGEP(ArgStructType, TaskShareds, {Idx0, Builder.getInt32(2)});

Builder.CreateStore(CastedStepVal, Step);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the store.

llvm::Value *Loadstep = Builder.CreateLoad(Builder.getInt64Ty(), Step);

// set up the arguments for emitting kmpc_taskloop runtime call
// setting default values for ifval, nogroup, sched, grainsize, task_dup
Expand All @@ -2074,8 +2079,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
// TODO: Handle the case when TaskDup pointer isn't empty
Value *TaskDup = Constant::getNullValue(Builder.getPtrTy());

Value *Args[] = {Ident, ThreadID, TaskData, IfVal, lb, ub,
loadstep, NoGroup, Sched, GrainSize, TaskDup};
Value *Args[] = {Ident, ThreadID, TaskData, IfVal, Lb, Ub,
Loadstep, NoGroup, Sched, GrainSize, TaskDup};

// taskloop runtime call
Function *TaskloopFn =
Expand All @@ -2091,32 +2096,58 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(

Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());

if (HasShareds) {
LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
OutlinedFn.getArg(1)->replaceUsesWithIf(
Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
}
LoadInst *SharedsOutlined =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
OutlinedFn.getArg(1)->replaceUsesWithIf(
SharedsOutlined,
[SharedsOutlined](Use &U) { return U.getUser() != SharedsOutlined; });

Value *IV = CLI->getIndVar();
Type *IVTy = IV->getType();
Constant *One = ConstantInt::get(IVTy, 1);

Value *TaskLB = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
OutlinedFn.getArg(1), 5, "gep_lb");
Value *LoadTaskLB = Builder.CreateLoad(Builder.getInt64Ty(), TaskLB);
Value *LowerBound = Builder.CreateTrunc(LoadTaskLB, IVTy, "lb");

Value *TaskUB = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
OutlinedFn.getArg(1), 6, "gep_ub");
Value *LoadTaskUB = Builder.CreateLoad(Builder.getInt64Ty(), TaskUB);
Value *UpperBound = Builder.CreateTrunc(LoadTaskUB, IVTy, "ub");
Constant *One = ConstantInt::get(Builder.getInt64Ty(), 1);

// When outlining, CodeExtractor will create GEP's to the LowerBound and
// UpperBound. These GEP's can be reused for loading the tasks respective
// bounds.
Value *TaskLB = nullptr;
Value *TaskUB = nullptr;
Value *LoadTaskLB = nullptr;
Value *LoadTaskUB = nullptr;
for (Instruction &I : *TaskloopAllocaBB) {
if (I.getOpcode() == Instruction::GetElementPtr) {
GetElementPtrInst &Gep = cast<GetElementPtrInst>(I);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Gep.getOperand(2))) {
switch (CI->getZExtValue()) {
case 0:
TaskLB = &I;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would also be good to check that the value being indexed is the right one, not just the numeric value of the index.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a check to make sure the GEP Instruction being checked is using the Shared's as its first operand.

break;
case 1:
TaskUB = &I;
break;
}
}
} else if (I.getOpcode() == Instruction::Load) {
LoadInst &Load = cast<LoadInst>(I);
if (Load.getPointerOperand() == TaskLB) {
assert(TaskLB != nullptr && "Expected value for TaskLB");
LoadTaskLB = &I;
} else if (Load.getPointerOperand() == TaskUB) {
assert(TaskUB != nullptr && "Expected value for TaskUB");
LoadTaskUB = &I;
}
}
}

Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());

Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound);
assert(LoadTaskLB != nullptr && "Expected value for LoadTaskLB");
assert(LoadTaskUB != nullptr && "Expected value for LoadTaskUB");
Value *TripCountMinusOne = Builder.CreateSub(LoadTaskUB, LoadTaskLB);
Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
Value *CastedTripCount = Builder.CreateIntCast(TripCount, IVTy, true);
Value *CastedTaskLB = Builder.CreateIntCast(LoadTaskLB, IVTy, true);
// set the trip count in the CLI
CLI->setTripCount(TripCount);
CLI->setTripCount(CastedTripCount);

Builder.SetInsertPoint(CLI->getBody(),
CLI->getBody()->getFirstInsertionPt());
Expand All @@ -2127,12 +2158,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
if (Add->getOpcode() == llvm::Instruction::Add) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tom raised a concern that this add instruction pattern might also match other unrelated add instructions, and we discussed this in my PR: llvm#166903 (comment)

He suggested looking at the wsloop and distribute implementations for guidance on how this is handled there. I have not had a chance to dig into that yet. Could you please check this once?

if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) {
// update the starting index of the loop
Add->setOperand(1, LowerBound);
Add->setOperand(1, CastedTaskLB);
}
}
}
}

FakeLB->replaceAllUsesWith(CastedLBVal);
FakeUB->replaceAllUsesWith(CastedUBVal);
FakeStep->replaceAllUsesWith(CastedStepVal);
for (Instruction *I : llvm::reverse(ToBeDeleted)) {
I->eraseFromParent();
}
Expand Down