Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -12994,6 +12994,7 @@ class Sema final {

bool isKnownGoodSYCLDecl(const Decl *D);
void checkSYCLDeviceVarDecl(VarDecl *Var);
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj);
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
void MarkDevice();

Expand Down
146 changes: 102 additions & 44 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,34 @@ static int64_t getIntExprValue(const Expr *E, ASTContext &Ctx) {
return E->getIntegerConstantExpr(Ctx)->getSExtValue();
}

// Collect function attributes related to SYCL
static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
llvm::SmallVector<Attr *, 4> &Attrs,
bool DirectlyCalled = true) {
if (!FD->hasAttrs())
return;

llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
return isa<IntelReqdSubGroupSizeAttr, ReqdWorkGroupSizeAttr,
SYCLIntelKernelArgsRestrictAttr, SYCLIntelNumSimdWorkItemsAttr,
SYCLIntelSchedulerTargetFmaxMhzAttr,
SYCLIntelMaxWorkGroupSizeAttr, SYCLIntelMaxGlobalWorkDimAttr,
SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A);
});

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects called directly from a kernel.
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (DirectlyCalled) {
Attrs.push_back(A);
} else {
S.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
}

class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Used to keep track of the constexpr depth, so we know whether to skip
// diagnostics.
Expand Down Expand Up @@ -477,7 +505,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Returns the kernel body function found during traversal.
FunctionDecl *
CollectPossibleKernelAttributes(FunctionDecl *SYCLKernel,
llvm::SmallPtrSet<Attr *, 4> &Attrs) {
llvm::SmallVector<Attr *, 4> &Attrs) {
typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
Expand Down Expand Up @@ -508,55 +536,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
"function can be called");
KernelBody = FD;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

if (auto *A = FD->getAttr<IntelReqdSubGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelKernelArgsRestrictAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNumSimdWorkItemsAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelSchedulerTargetFmaxMhzAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNoGlobalWorkOffsetAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLSimdAttr>())
Attrs.insert(A);

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects that are called directly from a kernel
// (i.e. the one passed to the single_task or parallel_for functions).
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
} else {
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
// Gather all attributes of FD that are SYCL related.
// Some attributes are allowed only on lambda functions and function
// objects called directly from a kernel (i.e. the one passed to the
// single_task or parallel_for functions).
bool DirectlyCalled = (ParentFD == SYCLKernel);
collectSYCLAttributes(SemaRef, FD, Attrs, DirectlyCalled);

// Attribute "loop_fuse" can be applied explicitly on kernel function.
// Attribute should not be propagated from device functions to kernel.
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
Attrs.push_back(A);
}
}

Expand Down Expand Up @@ -3149,6 +3145,62 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
KernelFunc->setInvalidDecl();
}

// For a wrapped parallel_for, copy attributes from original
// kernel to wrapped kernel.
void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) {
// Get the operator() function of the wrapper
CXXMethodDecl *OpParens = nullptr;
for (auto *MD : KernelObj->methods()) {
if (MD->getOverloadedOperator() == OO_Call) {
OpParens = MD;
break;
}
}
assert(OpParens && "invalid kernel object");

typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
WorkList.push_back({OpParens, nullptr});
FunctionDecl *KernelBody = nullptr;

CallGraph SYCLCG;
SYCLCG.addToCallGraph(getASTContext().getTranslationUnitDecl());
while (!WorkList.empty()) {
FunctionDecl *FD = WorkList.back().first;
FunctionDecl *ParentFD = WorkList.back().second;

if ((ParentFD == OpParens) && isSYCLKernelBodyFunction(FD)) {
KernelBody = FD;
break;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

CallGraphNode *N = SYCLCG.getNode(FD);
if (!N)
continue;

for (const CallGraphNode *CI : *N) {
if (auto *Callee = dyn_cast<FunctionDecl>(CI->getDecl())) {
Callee = Callee->getMostRecentDecl();
if (!Visited.count(Callee))
WorkList.push_back({Callee, FD});
}
}
}

assert(KernelBody && "improper parallel_for wrap");
if (KernelBody) {
llvm::SmallVector<Attr *, 4> Attrs;
collectSYCLAttributes(*this, KernelBody, Attrs);
if (!Attrs.empty())
llvm::for_each(Attrs, [OpParens](Attr *A) { OpParens->addAttr(A); });
}
}

// Generates the OpenCL kernel using KernelCallerFunc (kernel caller
// function) defined is SYCL headers.
// Generated OpenCL kernel contains the body of the kernel caller function,
Expand Down Expand Up @@ -3181,14 +3233,20 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc,
if (KernelObj->isInvalidDecl())
return;

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

// Calculate both names, since Integration headers need both.
std::string CalculatedName, StableName;
std::tie(CalculatedName, StableName) =
constructKernelName(*this, KernelCallerFunc, MC);
StringRef KernelName(getLangOpts().SYCLUnnamedLambda ? StableName
: CalculatedName);

// Attributes of a user-written SYCL kernel must be copied to the internally
// generated alternative kernel, identified by a known string in its name.
if (StableName.find("__pf_kernel_wrapper") != std::string::npos)
copySYCLKernelAttrs(KernelObj);

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(),
KernelCallerFunc->isInlined(),
IsSIMDKernel);
Expand Down Expand Up @@ -3226,7 +3284,7 @@ void Sema::MarkDevice(void) {
Marker.CollectKernelSet(SYCLKernel, SYCLKernel, VisitedSet);

// Let's propagate attributes from device functions to a SYCL kernels
llvm::SmallPtrSet<Attr *, 4> Attrs;
llvm::SmallVector<Attr *, 4> Attrs;
// This function collects all kernel attributes which might be applied to
// a device functions, but need to be propagated down to callers, i.e.
// SYCL kernels
Expand Down
22 changes: 22 additions & 0 deletions sycl/test/kernel_param/attr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include
// RUN: FileCheck %s --input-file %t.ll

// Check copying of parallel_for kernel attributes to wrapper kernel.

#include <CL/sycl.hpp>
using namespace cl::sycl;

int main() {
range<1> Size{10};
{
queue myQueue;
myQueue.submit([&](handler &cgh) {
cgh.parallel_for<class C>(Size, [=](item<1> ITEM)
[[intel::reqd_work_group_size(4)]]{});
});
}

return 0;
}

// CHECK: define {{.*}}spir_kernel void @{{.*}}__pf_kernel_wrapper{{.*}}reqd_work_group_size