Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ CODEGENOPT(NoImplicitFloat , 1, 0) ///< Set when -mno-implicit-float is enable
CODEGENOPT(NullPointerIsValid , 1, 0) ///< Assume Null pointer deference is defined.
CODEGENOPT(OpenCLCorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
CODEGENOPT(HIPCorrectlyRoundedDivSqrt, 1, 1) ///< -fno-hip-fp32-correctly-rounded-divide-sqrt
CODEGENOPT(SYCLFp32PrecSqrt, 1, 0) ///< -fsycl-fp32-prec-sqrt
CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4732,6 +4732,9 @@ def fno_sycl_device_lib_EQ : CommaJoined<["-"], "fno-sycl-device-lib=">, Group<s
Values<"libc, libm-fp32, libm-fp64, all">, HelpText<"Control exclusion of "
"device libraries from device binary linkage. Valid arguments "
"are libc, libm-fp32, libm-fp64, all">;
def fsycl_fp32_prec_sqrt : Flag<["-"], "fsycl-fp32-prec-sqrt">, Group<sycl_Group>, Flags<[CC1Option]>,
HelpText<"SYCL only. Specify that single precision floating-point sqrt is correctly rounded.">,
MarshallingInfoFlag<CodeGenOpts<"SYCLFp32PrecSqrt">>;

//===----------------------------------------------------------------------===//
// FLangOption + CoreOption + NoXarchOption
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Driver/ToolChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,8 @@ class ToolChain {

/// Get paths of HIP device libraries.
virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const;
getHIPDeviceLibs(const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadingKind) const;

/// Return sanitizers which are available in this toolchain.
virtual SanitizerMask getSupportedSanitizers() const;
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,8 @@ void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const {
ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs,
const Action::OffloadKind OffloadKind) const {
return {};
}

Expand Down
12 changes: 9 additions & 3 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,9 +895,9 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
return false;
}

llvm::SmallVector<std::string, 12>
ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
const std::string &GPUArch) const {
llvm::SmallVector<std::string, 12> ROCMToolChain::getCommonDeviceLibNames(
const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
const Action::OffloadKind DeviceOffloadingKind) const {
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);

Expand All @@ -923,6 +923,12 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
bool CorrectSqrt = DriverArgs.hasFlag(
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);

if (DeviceOffloadingKind == Action::OFK_SYCL) {
// When using SYCL, sqrt is only correctly rounded if the flag is specified
CorrectSqrt = DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt);
}

bool Wave64 = isWave64(DriverArgs, Kind);

return RocmInstallation.getCommonBitcodeLibs(
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
// Returns a list of device library names shared by different languages
llvm::SmallVector<std::string, 12>
getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
const std::string &GPUArch) const;
const std::string &GPUArch,
const Action::OffloadKind DeviceOffloadingKind) const;
};

} // end namespace toolchains
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
// - write an opt pass that sets that on every function it sees and pipe
// the device-libs bitcode through that on the way to this llvm-link
SmallVector<std::string, 12> BCLibs =
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str(),
Action::OFK_OpenMP);
llvm::for_each(BCLibs, [&](StringRef BCFile) {
CmdArgs.push_back(Args.MakeArgString(BCFile));
});
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,10 @@ void CudaToolChain::addClangTargetOptions(
if (DeviceOffloadingKind == Action::OFK_SYCL) {
toolchains::SYCLToolChain::AddSYCLIncludeArgs(getDriver(), DriverArgs,
CC1Args);

if (DriverArgs.hasArg(options::OPT_fsycl_fp32_prec_sqrt)) {
CC1Args.push_back("-fcuda-prec-sqrt");
}
}

auto NoLibSpirv = DriverArgs.hasArg(options::OPT_fno_sycl_libspirv,
Expand Down
18 changes: 11 additions & 7 deletions clang/lib/Driver/ToolChains/HIPAMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,12 @@ void HIPAMDToolChain::addClangTargetOptions(
CC1Args.push_back(DriverArgs.MakeArgString(LibSpirvFile));
}

llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
});
llvm::for_each(
getHIPDeviceLibs(DriverArgs, DeviceOffloadingKind), [&](auto BCFile) {
CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
: "-mlink-bitcode-file");
CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
});
}

llvm::opt::DerivedArgList *
Expand Down Expand Up @@ -355,7 +356,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
HIPAMDToolChain::getHIPDeviceLibs(
const llvm::opt::ArgList &DriverArgs,
const Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return {};
Expand Down Expand Up @@ -412,7 +415,8 @@ HIPAMDToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
BCLibs.push_back(RocmInstallation.getHIPPath());

// Add common device libraries like ocml etc.
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(),
DeviceOffloadingKind))
BCLibs.push_back(StringRef(N));

// Add instrument lib.
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Driver/ToolChains/HIPAMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain {
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12> getHIPDeviceLibs(
const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadingKind) const override;

SanitizerMask getSupportedSanitizers() const override;

Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Driver/ToolChains/HIPSPV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void HIPSPVToolChain::addClangTargetOptions(
CC1Args.append(
{"-fvisibility", "hidden", "-fapply-global-visibility-to-externs"});

llvm::for_each(getHIPDeviceLibs(DriverArgs),
llvm::for_each(getHIPDeviceLibs(DriverArgs, DeviceOffloadingKind),
[&](const BitCodeLibraryInfo &BCFile) {
CC1Args.append({"-mlink-builtin-bitcode",
DriverArgs.MakeArgString(BCFile.Path)});
Expand Down Expand Up @@ -206,7 +206,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
}

llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPSPVToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
HIPSPVToolChain::getHIPDeviceLibs(
const llvm::opt::ArgList &DriverArgs,
const Action::OffloadKind DeviceOffloadingKind) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return {};
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/HIPSPV.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain {
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;
getHIPDeviceLibs(const llvm::opt::ArgList &Args,
const Action::OffloadKind DeviceOffloadingKind) const override;

SanitizerMask getSupportedSanitizers() const override;

Expand Down
24 changes: 24 additions & 0 deletions clang/test/Driver/sycl-amdgcn-sqrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// REQUIRES: clang-driver
// REQUIRES: amdgpu-registered-target
// REQUIRES: !system-windows

// RUN: %clang -### \
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa \
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
// RUN: -fsycl-fp32-prec-sqrt \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s

// CHECK-CORRECT: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_on.bc"

// RUN: %clang -### \
// RUN: -fsycl -fsycl-targets=amdgcn-amd-amdhsa \
// RUN: -Xsycl-target-backend --offload-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-APPROX %s

// CHECK-APPROX: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_correctly_rounded_sqrt_off.bc"

void func(){};
17 changes: 17 additions & 0 deletions clang/test/Driver/sycl-no-prec-sqrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// REQUIRES: clang-driver

// RUN: %clang -### -fsycl \
// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s

// RUN: %clang -### -fsycl -fsycl-targets=spir64_gen \
// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s
//
// RUN: %clang -### -fsycl -fsycl-targets=spir64_x86_64 \
// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s
//
// RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga \
// RUN: -fsycl-fp32-prec-sqrt %s 2>&1 | FileCheck %s

// CHECK: warning: argument unused during compilation: '-fsycl-fp32-prec-sqrt'

void func(){};
20 changes: 20 additions & 0 deletions clang/test/Driver/sycl-nvptx-sqrt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// REQUIRES: clang-driver
// REQUIRES: nvptx-registered-target
// REQUIRES: !system-windows

// RUN: %clang -### \
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \
// RUN: -fsycl-fp32-prec-sqrt \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-CORRECT %s

// CHECK-CORRECT: "-fcuda-prec-sqrt"

// RUN: %clang -### \
// RUN: -fsycl -fsycl-targets=nvptx64-nvidia-cuda \
// RUN: %s \
// RUN: 2>&1 | FileCheck --check-prefix=CHECK-APPROX %s

// CHECK-APPROX-NOT: "-fcuda-prec-sqrt"

void func(){};
2 changes: 1 addition & 1 deletion sycl/doc/GetStartedGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,7 @@ which contains all the symbols required.
project and may cause compilation issues on some platforms
* `sycl::sqrt` is not correctly rounded by default as the SYCL specification
allows lower precision, when porting from CUDA it may be helpful to use
`-Xclang -fcuda-prec-sqrt` to use the correctly rounded square root, this is
`-fsycl-fp32-prec-sqrt` to use the correctly rounded square root, this is
significantly slower but matches the default precision used by `nvcc`, and
this `clang++` flag is equivalent to the `nvcc` `-prec-sqrt` flag, except that
it defaults to `false`.
Expand Down
8 changes: 8 additions & 0 deletions sycl/doc/UsersManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,14 @@ and not recommended to use in production environment.
options (e.g. -c, -E, -S) may interfere with the expected output set during
the host compilation. Doing so is considered undefined behavior.

**`-fsycl-fp32-prec-sqrt`**

Enable use of correctly rounded `sycl::sqrt` function as defined by IEE754.
Without this flag, the default precision requirement for `sycl::sqrt` is 3
ULP.

NOTE: This flag is currently only supported with the CUDA and HIP targets.

# Example: SYCL device code compilation

To invoke SYCL device compiler set `-fsycl-device-only` flag.
Expand Down