Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sycl/source/detail/kernel_program_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ KernelProgramCache::~KernelProgramCache() {
Plugin.call<PiApiKind::piKernelRelease>(Kern);
}
}
MKernelsPerProgramCache.erase(KernIt);
}

const detail::plugin &Plugin = MParentContext->getPlugin();
Expand Down
7 changes: 5 additions & 2 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1802,7 +1802,11 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
// Cache supports key with once device only, but here we have multiple
// devices a program is built for, so add the program to the cache for all
// other devices.
auto CacheOtherDevices = [ResProgram]() { return ResProgram; };
const detail::plugin &Plugin = ContextImpl->getPlugin();
auto CacheOtherDevices = [ResProgram, &Plugin]() {
Plugin.call<PiApiKind::piProgramRetain>(ResProgram);
return ResProgram;
};

// The program for device "0" is already added to the cache during the first
// call to getOrBuild, so starting with "1"
Expand All @@ -1820,7 +1824,6 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage,
// devive_image_impl shares ownership of PIProgram with, at least, program
// cache. The ref counter will be descremented in the destructor of
// device_image_impl
const detail::plugin &Plugin = ContextImpl->getPlugin();
Plugin.call<PiApiKind::piProgramRetain>(ResProgram);

DeviceImageImplPtr ExecImpl = std::make_shared<detail::device_image_impl>(
Expand Down
1 change: 1 addition & 0 deletions sycl/unittests/kernel-and-program/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
add_sycl_unittest(KernelAndProgramTests OBJECT
Cache.cpp
MultipleDevsCache.cpp
KernelRelease.cpp
KernelInfo.cpp
DeviceInfo.cpp
Expand Down
12 changes: 1 addition & 11 deletions sycl/unittests/kernel-and-program/Cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#define SYCL2020_DISABLE_DEPRECATION_WARNINGS

#include "CL/sycl/detail/pi.h"
#include "HelperKernelInfo.hpp"
#include "detail/context_impl.hpp"
#include "detail/kernel_program_cache.hpp"
#include "detail/program_impl.hpp"
Expand Down Expand Up @@ -37,17 +38,6 @@ class TestKernel2 {
__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
struct MockKernelInfo {
static constexpr unsigned getNumParams() { return 0; }
static const kernel_param_desc_t &getParamDesc(int) {
static kernel_param_desc_t Dummy;
return Dummy;
}
static constexpr bool isESIMD() { return false; }
static constexpr bool callsThisItem() { return false; }
static constexpr bool callsAnyThisFreeFunction() { return false; }
};

template <> struct KernelInfo<TestKernel> : public MockKernelInfo {
static constexpr const char *getName() { return "TestKernel"; }
};
Expand Down
29 changes: 29 additions & 0 deletions sycl/unittests/kernel-and-program/HelperKernelInfo.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//==----------------------- HelperKernelInfo.hpp ---------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
struct MockKernelInfo {
static constexpr unsigned getNumParams() { return 0; }
static const kernel_param_desc_t &getParamDesc(int) {
static kernel_param_desc_t Dummy;
return Dummy;
}
static constexpr bool isESIMD() { return false; }
static constexpr bool callsThisItem() { return false; }
static constexpr bool callsAnyThisFreeFunction() { return false; }
};

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
214 changes: 214 additions & 0 deletions sycl/unittests/kernel-and-program/MultipleDevsCache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
//==--- KPCache.cpp --- KernelProgramCache for multiple devices unit test --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#define SYCL2020_DISABLE_DEPRECATION_WARNINGS

#include "HelperKernelInfo.hpp"
#include "detail/context_impl.hpp"
#include "detail/kernel_bundle_impl.hpp"
#include "detail/kernel_program_cache.hpp"
#include <CL/sycl.hpp>
#include <helpers/CommonRedefinitions.hpp>
#include <helpers/PiImage.hpp>
#include <helpers/PiMock.hpp>

#include <gtest/gtest.h>

#include <iostream>

using namespace sycl;

class MultTestKernel {
public:
void operator()(cl::sycl::item<1>){};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a const operator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this operator

};

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
template <> struct KernelInfo<MultTestKernel> : public MockKernelInfo {
static constexpr const char *getName() { return "MultTestKernel"; }
};
} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)

static sycl::unittest::PiImage generateDefaultImage() {
using namespace sycl::unittest;

PiPropertySet PropSet;

std::vector<unsigned char> Bin{0, 1, 2, 3, 4, 5}; // Random data

PiArray<PiOffloadEntry> Entries = makeEmptyKernels({"MultTestKernel"});

PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format
__SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec
"", // Compile options
"", // Link options
std::move(Bin),
std::move(Entries),
std::move(PropSet)};

return Img;
}

static sycl::unittest::PiImage Img = generateDefaultImage();
static sycl::unittest::PiImageArray<1> ImgArray{&Img};

static pi_result redefinedContextCreate(
const pi_context_properties *properties, pi_uint32 num_devices,
const pi_device *devices,
void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb,
void *user_data),
void *user_data, pi_context *ret_context) {
*ret_context = reinterpret_cast<pi_context>(123);
return PI_SUCCESS;
}

static pi_result redefinedContextRelease(pi_context context) {
return PI_SUCCESS;
}

static pi_result redefinedDevicesGet(pi_platform platform,
pi_device_type device_type,
pi_uint32 num_entries, pi_device *devices,
pi_uint32 *num_devices) {
if (num_devices) {
*num_devices = static_cast<pi_uint32>(2);
return PI_SUCCESS;
}

devices[0] = reinterpret_cast<pi_device>(1111);
devices[1] = reinterpret_cast<pi_device>(2222);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should only be performed if devices is not nil.

Copy link
Contributor

@cperkinsintel cperkinsintel Dec 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And not just not nullptr. If I'm not mistaken, the maximum possible device entries is bound by num_entries so the subscripts should check to make sure they are less than num_entries. Though, in the case of mocks and unit tests, it's less clear to me where this value for num_entries comes from and if it is a problem to blindly set devices[1].

Would this test fail if run with the SYCL_DEVICE_FILTER set to a single device? (And, do we even care?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added check for nullptr and for num_entries.
piDevicesGet first time is called with num_entries=0 and not null num_devices. Ather the call num_entries takes value of num_devices. With second call num_devices parameter passes with nullptr, so we can create num_entries devices. I rewrite num_devices to 2, so when redefinedDevicesGet is called a second time RT thinks that we have two devices.
This test emulates two devices, so we don't care about real count of devices.

return PI_SUCCESS;
}

static pi_result redefinedDeviceGetInfo(pi_device device,
pi_device_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) {
if (param_name == PI_DEVICE_INFO_TYPE) {
auto *Result = reinterpret_cast<_pi_device_type *>(param_value);
*Result = PI_DEVICE_TYPE_GPU;
}
if (param_name == PI_DEVICE_INFO_COMPILER_AVAILABLE) {
auto *Result = reinterpret_cast<pi_bool *>(param_value);
*Result = true;
}
return PI_SUCCESS;
}

static pi_result redefinedDeviceRetain(pi_device device) { return PI_SUCCESS; }

static pi_result redefinedDeviceRelease(pi_device device) { return PI_SUCCESS; }

static pi_result redefinedQueueCreate(pi_context context, pi_device device,
pi_queue_properties properties,
pi_queue *queue) {
*queue = reinterpret_cast<pi_queue>(1234);
return PI_SUCCESS;
}

static pi_result redefinedQueueRelease(pi_queue command_queue) {
return PI_SUCCESS;
}

static size_t ProgramNum = 12345;
static pi_result redefinedProgramCreate(pi_context context, const void *il,
size_t length,
pi_program *res_program) {
size_t CurrentProgram = ProgramNum;
*res_program = reinterpret_cast<pi_program>(CurrentProgram);
++ProgramNum;
return PI_SUCCESS;
}

static int RetainCounter = 0;
static pi_result redefinedProgramRetain(pi_program program) {
++RetainCounter;
return PI_SUCCESS;
}

static int KernelReleaseCounter = 0;
static pi_result redefinedKernelRelease(pi_kernel kernel) {
++KernelReleaseCounter;
return PI_SUCCESS;
}

class MultipleDeviceCacheTest : public ::testing::Test {
public:
MultipleDeviceCacheTest() : Plt{default_selector()} {}

protected:
void SetUp() override {
if (Plt.is_host() || Plt.get_backend() != backend::opencl) {
return;
}

Mock = std::make_unique<unittest::PiMock>(Plt);

setupDefaultMockAPIs(*Mock);
Mock->redefine<detail::PiApiKind::piDevicesGet>(redefinedDevicesGet);
Mock->redefine<detail::PiApiKind::piDeviceGetInfo>(redefinedDeviceGetInfo);
Mock->redefine<detail::PiApiKind::piDeviceRetain>(redefinedDeviceRetain);
Mock->redefine<detail::PiApiKind::piDeviceRelease>(redefinedDeviceRelease);
Mock->redefine<detail::PiApiKind::piContextCreate>(redefinedContextCreate);
Mock->redefine<detail::PiApiKind::piContextRelease>(
redefinedContextRelease);
Mock->redefine<detail::PiApiKind::piQueueCreate>(redefinedQueueCreate);
Mock->redefine<detail::PiApiKind::piQueueRelease>(redefinedQueueRelease);
Mock->redefine<detail::PiApiKind::piProgramRetain>(redefinedProgramRetain);
Mock->redefine<detail::PiApiKind::piProgramCreate>(redefinedProgramCreate);
Mock->redefine<detail::PiApiKind::piKernelRelease>(redefinedKernelRelease);
}

protected:
std::unique_ptr<unittest::PiMock> Mock;
platform Plt;
};

// Test that program is retained for each device and each kernel is released
// once
TEST_F(MultipleDeviceCacheTest, ProgramRetain) {
if (Plt.is_host() || Plt.get_backend() != backend::opencl) {
return;
}
{
std::vector<sycl::device> Devices = Plt.get_devices(info::device_type::gpu);
sycl::context Context(Devices);
sycl::queue Queue(Context, Devices[0]);
assert(Devices.size() == 2);

auto Bundle = cl::sycl::get_kernel_bundle<sycl::bundle_state::input>(
Queue.get_context());

Queue.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<MultTestKernel>(cl::sycl::nd_range<1>(10, 10),
MultTestKernel{});
});

auto BundleObject = cl::sycl::build(Bundle, Bundle.get_devices());
auto KernelID = cl::sycl::get_kernel_id<MultTestKernel>();
auto Kernel = BundleObject.get_kernel(KernelID);
auto BundleImpl = getSyclObjImpl(Bundle);
int NumRetains = BundleImpl->size() * 2;

EXPECT_EQ(RetainCounter, NumRetains)
<< "Expect " << NumRetains << " piProgramRetain calls";

auto CtxImpl = detail::getSyclObjImpl(Context);
detail::KernelProgramCache::KernelCacheT &KernelCache =
CtxImpl->getKernelProgramCache().acquireKernelsPerProgramCache().get();

EXPECT_EQ(KernelCache.size(), (size_t)2) << "Expect 2 kernels in cache";
}
// Cache is cleared here, check kernel release
EXPECT_EQ(KernelReleaseCounter, 3) << "Expect 3 piKernelRelease calls";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, add comment describing how number 3 was retrieved. Why is it expected and not 2 or four?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And why not test to see if KernelReleaseCounter is equal to the RetainCounter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added clarifying comments

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And why not test to see if KernelReleaseCounter is equal to the RetainCounter?

AFAIR, there's some mem-leak test which checks for retain-release parity.
Although, I'm not sure it checks for kernels.

}