diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc index 1fa6c9fe10d48..8a124ad549c69 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_bindless_images.asciidoc @@ -1335,51 +1335,63 @@ For the forms that take a USM pointer, the image memory must also have been allocated within the same context and device of the `queue`. The USM memory must be accessible on the queue's device. -The `ext_oneapi_copy` function variants that don't take offsets and extents may -fail in the following scenarios: +The `ext_oneapi_copy` function variants that do not take offsets and extents +must ensure that the following conditions are met: -1. The `Src` and `Dest` memory was not allocated on the same device and -context of the queue. +1. The `Src` and `Dest` memory was allocated on the same device and context. -2. The `Src` and `Dest` memory regions, where `Src` or `Dest` can be either -on the host or device, do not have the same memory capacity, where the capacity -is calculate from the `width`, `height`, `depth`, `channel_order`, and +2. The `Src` and `Dest` memory regions, where `Src` or `Dest` can be either +on the host or device, have the same memory capacity, where the capacity +is calculated from the `width`, `height`, `depth`, `channel_order`, and `channel_type` members of the `image_descriptor` parameter. -The `ext_oneapi_copy` function variants that do take offsets and extents may -fail in the following scenarios: +The `ext_oneapi_copy` function variants that do take offsets and extents must +ensure that the following conditions are met. If a condition names a specific +parameter, it is only applicable to the function variants that take that +parameter. -1. The `Src` and `Dest` memory was not allocated on the same device and -context of the queue. +1. The `Src` and `Dest` memory was allocated on the same device and context. -2. The image descriptor passed does not match the image descriptor used to -allocate the image on the device. +2. The image descriptors passed match the image descriptors used to allocate +the image's memory on the device. -3. the `CopyExtent` describes a memory region larger than that which was -allocated on either the host or the device. +3. The `CopyExtent` describes a memory region that is not larger than that which +was allocated on either the host or the device. -4. The `HostExtent` describes a memory region larger than that which was -allocated on the host. +4. The `HostExtent` describes a memory region that is not larger than that which +was allocated on the host. -5. The `SrcExtent` describes a memory region larger than that which was -allocated, where `Src` can be either the host or device. +5. The `SrcExtent` describes a memory region that is not larger than that which +was allocated, where `Src` can be either on the host or on the device. -6. The `DestExtent` describes a memory region larger than that which was -allocated, where `Dest` can be either the host or device. +6. The `DestExtent` describes a memory region that is not larger than that which +was allocated, where `Dest` can be either on the host or on the device. -7. If `SrcOffset + CopyExtent` moves the memory sub-region outside the bounds -of the memory described by `Src`, irrespective of whether `Src` is on the host -or the device. +7. The `DeviceRowPitch` adheres to the alignment requirements outlined in the +"Pitch alignment restrictions and queries" section. -8. If `DestOffset + CopyExtent` moves the memory sub-region outside the bounds -of the memory described by `Dest`, irrespective of whether `Dest` is on the -host or the device. +8. The `DeviceRowPitch` is greater than or equal to the width of the image on +the device. -9. The `DeviceRowPitch` does not adhere to the alignment requirements -outlined in section "Pitch alignment restrictions and queries" +9. For the relevant dimensions, `SrcOffset + CopyExtent` does not move the +memory sub-region outside the bounds of the memory described by `Src`, +irrespective of whether `Src` is on the host or the device. The relevant +dimensions are `x` for 1D images; `x` and `y` for 2D images; and `x`, `y`, and +`z` for 3D images. `x`, `y`, and `z` correspond to indices `0`, `1`, and `2` of +the `SrcOffset` and `CopyExtent` parameters, respectively. + +10. For the relevant dimensions, `DestOffset + CopyExtent` does not move the +memory sub-region outside the bounds of the memory described by `Dest`, +irrespective of whether `Dest` is on the host or the device. The relevant +dimensions are `x` for 1D images, `x` and `y` for 2D images, and `x`, `y`, and +`z` for 3D images. `x`, `y`, and `z` correspond to indices `0`, `1`, and `2` of +the `SrcOffset` and `CopyExtent` parameters, respectively. + +11. The `CopyExtent`'s' `x`, `y`, and `z` dimensions must not be `0`. They must +be greater than or equal to `1`. Even if the image is 1D or 2D, the remaining +non-relevant dimension's values must be set to `1` in the `CopyExtent` +parameter. -10. The value of `DeviceRowPitch` is smaller than the width of the image on -the device. If copying of an image fails, `ext_oneapi_copy` will throw a `sycl::exception` with error code `sycl::errc::invalid`, and relay an error message back to the diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 33f50af815a5b..346a396b464e6 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -98,6 +98,33 @@ void *getValueFromDynamicParameter( // Bindless image helpers +constexpr size_t get_channel_size( + const sycl::ext::oneapi::experimental::image_descriptor &Desc) { + switch (Desc.channel_type) { + case sycl::image_channel_type::fp16: + return sizeof(sycl::half); + case sycl::image_channel_type::fp32: + return sizeof(float); + case sycl::image_channel_type::snorm_int8: + case sycl::image_channel_type::unorm_int8: + case sycl::image_channel_type::signed_int8: + case sycl::image_channel_type::unsigned_int8: + return sizeof(uint8_t); + case sycl::image_channel_type::snorm_int16: + case sycl::image_channel_type::unorm_int16: + case sycl::image_channel_type::signed_int16: + case sycl::image_channel_type::unsigned_int16: + return sizeof(uint16_t); + case sycl::image_channel_type::signed_int32: + case sycl::image_channel_type::unsigned_int32: + return sizeof(uint32_t); + default: + throw sycl::exception(make_error_code(errc::invalid), + "Unsupported channel type"); + return 0; + } +} + // Fill image type and return depth or array_size static unsigned int fill_image_type(const ext::oneapi::experimental::image_descriptor &Desc, @@ -257,16 +284,8 @@ fill_copy_args(detail::handler_impl *impl, impl->MDstImageDesc.depth = DestExtent[2]; } - if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { - impl->MSrcImageDesc.rowPitch = 0; - impl->MDstImageDesc.rowPitch = DestPitch; - } else if (impl->MImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - impl->MSrcImageDesc.rowPitch = SrcPitch; - impl->MDstImageDesc.rowPitch = 0; - } else { - impl->MSrcImageDesc.rowPitch = SrcPitch; - impl->MDstImageDesc.rowPitch = DestPitch; - } + impl->MSrcImageDesc.rowPitch = SrcPitch; + impl->MDstImageDesc.rowPitch = DestPitch; } static void @@ -279,9 +298,11 @@ fill_copy_args(detail::handler_impl *impl, sycl::range<3> DestExtent = {0, 0, 0}, sycl::range<3> CopyExtent = {0, 0, 0}) { - fill_copy_args(impl, Desc, Desc, ImageCopyFlags, 0 /*SrcPitch*/, - 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, - CopyExtent); + size_t SrcPitch = SrcExtent[0] * Desc.num_channels * get_channel_size(Desc); + size_t DestPitch = DestExtent[0] * Desc.num_channels * get_channel_size(Desc); + + fill_copy_args(impl, Desc, Desc, ImageCopyFlags, SrcPitch, DestPitch, + SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); } static void @@ -309,8 +330,13 @@ fill_copy_args(detail::handler_impl *impl, sycl::range<3> DestExtent = {0, 0, 0}, sycl::range<3> CopyExtent = {0, 0, 0}) { - fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, 0 /*SrcPitch*/, - 0 /*DestPitch*/, SrcOffset, SrcExtent, DestOffset, DestExtent, + size_t SrcPitch = + SrcExtent[0] * SrcImgDesc.num_channels * get_channel_size(SrcImgDesc); + size_t DestPitch = + DestExtent[0] * DestImgDesc.num_channels * get_channel_size(DestImgDesc); + + fill_copy_args(impl, SrcImgDesc, DestImgDesc, ImageCopyFlags, SrcPitch, + DestPitch, SrcOffset, SrcExtent, DestOffset, DestExtent, CopyExtent); } @@ -1618,10 +1644,17 @@ void handler::ext_oneapi_copy( get_pointer_type(Dest, createSyclObjFromImpl(impl->get_context()))); - if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || - ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { - detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, + // Calculate host pitch, where host memory is always assumed to be tightly + // packed. + size_t HostRowPitch = + Desc.width * Desc.num_channels * detail::get_channel_size(Desc); + + if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, HostRowPitch, DeviceRowPitch); + } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { + detail::fill_copy_args(get_impl(), Desc, ImageCopyFlags, DeviceRowPitch, + HostRowPitch); } else { throw sycl::exception(make_error_code(errc::invalid), "Copy Error: This copy function only performs host " @@ -1650,14 +1683,19 @@ void handler::ext_oneapi_copy( get_pointer_type(Dest, createSyclObjFromImpl(impl->get_context()))); + // Calculate host pitch, where host memory is always assumed to be tightly + // packed. + size_t HostRowPitch = HostExtent[0] * DeviceImgDesc.num_channels * + detail::get_channel_size(DeviceImgDesc); + // Fill the host extent based on the type of copy. if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, - DeviceRowPitch, DeviceRowPitch, SrcOffset, - HostExtent, DestOffset, {0, 0, 0}, CopyExtent); + HostRowPitch, DeviceRowPitch, SrcOffset, HostExtent, + DestOffset, {0, 0, 0}, CopyExtent); } else if (ImageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) { detail::fill_copy_args(get_impl(), DeviceImgDesc, ImageCopyFlags, - DeviceRowPitch, DeviceRowPitch, SrcOffset, {0, 0, 0}, + DeviceRowPitch, HostRowPitch, SrcOffset, {0, 0, 0}, DestOffset, HostExtent, CopyExtent); } else { throw sycl::exception(make_error_code(errc::invalid), diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp similarity index 64% rename from sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp rename to sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp index 357d67d78de9b..ac93ae5db9e67 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_1D.cpp @@ -4,13 +4,14 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include -#include #include #include #include -// Uncomment to print additional test information +#include +#include + +// Uncomment to print additional test information. // #define VERBOSE_PRINT namespace syclexp = sycl::ext::oneapi::experimental; @@ -21,45 +22,71 @@ void copy_image_mem_handle_to_image_mem_handle( const std::vector &dataIn2, sycl::device dev, sycl::queue q, std::vector &out) { - // Check that output image is double size of input images + // Check that output image is double size of input images. assert(outDesc.width == dataInDesc.width * 2); syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); - // Copy input data to device - q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); - q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; - q.wait_and_throw(); + // Copy first half of dataIn1 to first quarter of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, {dataInDesc.width, 0, 0}, + imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, copyExtent); - // Extent to copy - sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + // Copy second half of dataIn1 to second quarter of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {dataInDesc.width / 2, 0, 0}, + {dataInDesc.width, 0, 0}, imgMemSrc1.get_handle(), + {dataInDesc.width / 2, 0, 0}, dataInDesc, copyExtent); + + // Copy first half of dataIn2 to third quarter of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, {dataInDesc.width, 0, 0}, + imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, copyExtent); - // Copy first half of imgMemSrcOne to first quarter of imgMemDst + // Copy second half of dataIn2 to fourth quarter of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {dataInDesc.width / 2, 0, 0}, + {dataInDesc.width, 0, 0}, imgMemSrc2.get_handle(), + {dataInDesc.width / 2, 0, 0}, dataInDesc, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device. + // Copy first half of imgMemSrc1 to first quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); - // Copy second half of imgMemSrcOne to second quarter of imgMemDst + // Copy second half of imgMemSrc1 to second quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, dataInDesc, imgMemDst.get_handle(), {outDesc.width / 4, 0, 0}, outDesc, copyExtent); - // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + // Copy first half of imgMemSrc2 to third quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, copyExtent); - // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + // Copy second half of imgMemSrc2 to fourth quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, dataInDesc, imgMemDst.get_handle(), {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); + // Copy device data back to host. + // Extent to copy. + copyExtent = {outDesc.width / 2, 1, 1}; + + // Copy first half of imgMemDst to first half of out data. + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, 0}, outDesc, out.data(), + {0, 0, 0}, {outDesc.width / 2, 0, 0}, copyExtent); + + // Copy second half of imgMemDst to second half of out data. + q.ext_oneapi_copy(imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, + out.data(), {outDesc.width / 2, 0, 0}, + {outDesc.width / 2, 0, 0}, copyExtent); q.wait_and_throw(); } @@ -71,48 +98,74 @@ void copy_image_mem_handle_to_usm(const syclexp::image_descriptor &dataInDesc, sycl::device dev, sycl::queue q, std::vector &out) { - // Check that output image is double size of input images + // Check that output image is double size of input images. assert(outDesc.width == dataInDesc.width * 2); syclexp::image_mem imgMemSrc1(dataInDesc, dev, q.get_context()); syclexp::image_mem imgMemSrc2(dataInDesc, dev, q.get_context()); - // Allocate 1D device USM memory. Pitch set to zero as it is a 1D image + // Allocate 1D device USM memory. Pitch set to zero as it is a 1D image. size_t pitch = 0; size_t elements = outDesc.width * outDesc.num_channels; void *imgMemDst = sycl::malloc_device(elements, q); - // Copy input data to device - q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1.get_handle(), dataInDesc); - q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2.get_handle(), dataInDesc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; - q.wait_and_throw(); + // Copy first half of dataIn1 to first half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, {dataInDesc.width, 0, 0}, + imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, copyExtent); - // Extent to copy - sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + // Copy second half of dataIn1 to second half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {dataInDesc.width / 2, 0, 0}, + {dataInDesc.width, 0, 0}, imgMemSrc1.get_handle(), + {dataInDesc.width / 2, 0, 0}, dataInDesc, copyExtent); + + // Copy first half of dataIn2 to first half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, {dataInDesc.width, 0, 0}, + imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, copyExtent); - // Copy first half of imgMemSrcOne to first quarter of imgMemDst + // Copy second half of dataIn2 to second half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {dataInDesc.width / 2, 0, 0}, + {dataInDesc.width, 0, 0}, imgMemSrc2.get_handle(), + {dataInDesc.width / 2, 0, 0}, dataInDesc, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device. + // Copy first half of imgMemSrc1 to first quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst, {0, 0, 0}, outDesc, pitch, copyExtent); - // Copy second half of imgMemSrcOne to second quarter of imgMemDst + // Copy second half of imgMemSrc1 to second quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1.get_handle(), {dataInDesc.width / 2, 0, 0}, dataInDesc, imgMemDst, {outDesc.width / 4, 0, 0}, outDesc, pitch, copyExtent); - // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + // Copy first half of imgMemSrc2 to third quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2.get_handle(), {0, 0, 0}, dataInDesc, imgMemDst, {outDesc.width / 2, 0, 0}, outDesc, pitch, copyExtent); - // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + // Copy second half of imgMemSrc2 to fourth quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2.get_handle(), {dataInDesc.width / 2, 0, 0}, dataInDesc, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, outDesc, pitch, copyExtent); q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst, out.data(), outDesc, pitch); + // Copy device data back to host. + // Extent to copy. + copyExtent = {outDesc.width / 2, 1, 1}; + + // Copy first half of imgMemDst to first half of out data. + q.ext_oneapi_copy(imgMemDst, {0, 0, 0}, out.data(), {0, 0, 0}, outDesc, pitch, + {outDesc.width, 0, 0}, copyExtent); + + // Copy second half of imgMemDst to second half of out data. + q.ext_oneapi_copy(imgMemDst, {outDesc.width / 2, 0, 0}, out.data(), + {outDesc.width / 2, 0, 0}, outDesc, pitch, + {outDesc.width, 0, 0}, copyExtent); q.wait_and_throw(); @@ -126,7 +179,7 @@ void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &dataInDesc, sycl::device dev, sycl::queue q, std::vector &out) { - // Check that output image is double size of input images + // Check that output image is double size of input images. assert(outDesc.width == dataInDesc.width * 2); size_t pitchSrc1 = 0; @@ -137,38 +190,66 @@ void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &dataInDesc, syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); - // Copy input data to device - q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1, dataInDesc, pitchSrc1); - q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2, dataInDesc, pitchSrc2); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; - q.wait_and_throw(); + // Copy first half of dataIn1 to first half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, imgMemSrc1, {0, 0, 0}, + dataInDesc, pitchSrc1, {dataInDesc.width, 0, 0}, + copyExtent); - // Extent to copy - sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + // Copy second half of dataIn1 to second half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {dataInDesc.width / 2, 0, 0}, imgMemSrc1, + {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc1, + {dataInDesc.width, 0, 0}, copyExtent); + + // Copy first half of dataIn2 to first half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, imgMemSrc2, {0, 0, 0}, + dataInDesc, pitchSrc2, {dataInDesc.width, 0, 0}, + copyExtent); + + // Copy second half of dataIn2 to second half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {dataInDesc.width / 2, 0, 0}, imgMemSrc2, + {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc2, + {dataInDesc.width, 0, 0}, copyExtent); - // Copy first half of imgMemSrcOne to first quarter of imgMemDst + q.wait_and_throw(); + + // Copy data from device to device. + // Copy first half of imgMemSrcOne to first quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1, {0, 0, 0}, dataInDesc, pitchSrc1, imgMemDst.get_handle(), {0, 0, 0}, outDesc, copyExtent); - // Copy second half of imgMemSrcOne to second quarter of imgMemDst + // Copy second half of imgMemSrcOne to second quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1, {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc1, imgMemDst.get_handle(), {outDesc.width / 4, 0, 0}, outDesc, copyExtent); - // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + // Copy first half of imgMemSrcTwo to third quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2, {0, 0, 0}, dataInDesc, pitchSrc2, imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, copyExtent); - // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2, {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc2, imgMemDst.get_handle(), {(outDesc.width / 4) * 3, 0, 0}, outDesc, copyExtent); q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), outDesc); + // Copy device data back to host. + // Extent to copy. + copyExtent = {outDesc.width / 2, 1, 1}; + + // Copy first half of imgMemDst to first half of out data. + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, 0}, outDesc, out.data(), + {0, 0, 0}, {outDesc.width, 0, 0}, copyExtent); + + // Copy second half of imgMemDst to second half of out data. + q.ext_oneapi_copy(imgMemDst.get_handle(), {outDesc.width / 2, 0, 0}, outDesc, + out.data(), {outDesc.width / 2, 0, 0}, + {outDesc.width, 0, 0}, copyExtent); q.wait_and_throw(); @@ -182,7 +263,7 @@ void copy_usm_to_usm(const syclexp::image_descriptor &dataInDesc, const std::vector &dataIn2, sycl::device dev, sycl::queue q, std::vector &out) { - // Check that output image is double size of input images + // Check that output image is double size of input images. assert(outDesc.width == dataInDesc.width * 2); size_t pitchSrc1 = 0; @@ -191,43 +272,69 @@ void copy_usm_to_usm(const syclexp::image_descriptor &dataInDesc, void *imgMemSrc1 = sycl::malloc_device(elementsSrc, q); void *imgMemSrc2 = sycl::malloc_device(elementsSrc, q); - // syclexp::image_mem imgMemDst(outDesc, dev, q.get_context()); - size_t pitchDst = 0; size_t elementsDst = outDesc.width * outDesc.num_channels; void *imgMemDst = sycl::malloc_device(elementsDst, q); - // Copy input data to device - q.ext_oneapi_copy(dataIn1.data(), imgMemSrc1, dataInDesc, pitchSrc1); - q.ext_oneapi_copy(dataIn2.data(), imgMemSrc2, dataInDesc, pitchSrc2); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; - q.wait_and_throw(); + // Copy first half of dataIn1 to first half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {0, 0, 0}, imgMemSrc1, {0, 0, 0}, + dataInDesc, pitchSrc1, {dataInDesc.width, 0, 0}, + copyExtent); - // Extent to copy - sycl::range copyExtent = {dataInDesc.width / 2, 1, 1}; + // Copy second half of dataIn1 to second half of imgMemSrc1. + q.ext_oneapi_copy(dataIn1.data(), {dataInDesc.width / 2, 0, 0}, imgMemSrc1, + {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc1, + {dataInDesc.width, 0, 0}, copyExtent); - // Copy first half of imgMemSrcOne to first quarter of imgMemDst + // Copy first half of dataIn2 to first half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {0, 0, 0}, imgMemSrc2, {0, 0, 0}, + dataInDesc, pitchSrc2, {dataInDesc.width, 0, 0}, + copyExtent); + + // Copy second half of dataIn2 to second half of imgMemSrc2. + q.ext_oneapi_copy(dataIn2.data(), {dataInDesc.width / 2, 0, 0}, imgMemSrc2, + {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc2, + {dataInDesc.width, 0, 0}, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device. + // Copy first half of imgMemSrc1 to first quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1, {0, 0, 0}, dataInDesc, pitchSrc1, imgMemDst, {0, 0, 0}, outDesc, pitchDst, copyExtent); - // Copy second half of imgMemSrcOne to second quarter of imgMemDst + // Copy second half of imgMemSrc1 to second quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc1, {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc1, imgMemDst, {outDesc.width / 4, 0, 0}, outDesc, pitchDst, copyExtent); - // Copy first half of imgMemSrcTwo to third quarter of imgMemDst + // Copy first half of imgMemSrc2 to third quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2, {0, 0, 0}, dataInDesc, pitchSrc2, imgMemDst, {outDesc.width / 2, 0, 0}, outDesc, pitchDst, copyExtent); - // Copy second half of imgMemSrcTwo to fourth quarter of imgMemDst + // Copy second half of imgMemSrc2 to fourth quarter of imgMemDst. q.ext_oneapi_copy(imgMemSrc2, {dataInDesc.width / 2, 0, 0}, dataInDesc, pitchSrc2, imgMemDst, {(outDesc.width / 4) * 3, 0, 0}, outDesc, pitchDst, copyExtent); q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst, out.data(), outDesc, pitchDst); + // Copy device data back to host. + // Extent to copy. + copyExtent = {outDesc.width / 2, 1, 1}; + + // Copy first half of imgMemDst to first half of out data. + q.ext_oneapi_copy(imgMemDst, {0, 0, 0}, out.data(), {0, 0, 0}, outDesc, + pitchDst, {outDesc.width, 0, 0}, copyExtent); + + // Copy second half of imgMemDst to second half of out data. + q.ext_oneapi_copy(imgMemDst, {outDesc.width / 2, 0, 0}, out.data(), + {outDesc.width / 2, 0, 0}, outDesc, pitchDst, + {outDesc.width, 0, 0}, copyExtent); q.wait_and_throw(); @@ -409,12 +516,11 @@ bool check_test(const std::vector &out, return validated; } -template +template bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { std::vector dataIn1(dims.size() / 2); std::vector dataIn2(dims.size() / 2); - std::vector out(dims.size()); + std::vector out(dims.size(), 0); std::vector expected(dims.size()); @@ -439,16 +545,22 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<1> dims) { validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_image_mem_handle_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_usm_to_image_mem_handle(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_usm_to_usm(dataInDesc, outDesc, dataIn1, dataIn2, dev, q, out); validated = validated && check_test(out, expected); @@ -483,7 +595,5 @@ int main() { return 1; } - std::cout << "Tests passed\n"; - return 0; } diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp similarity index 61% rename from sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp rename to sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp index d13a9146b2784..6d61a2a2a259a 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_2D.cpp @@ -10,7 +10,7 @@ #include #include -// Uncomment to print additional test information +// Uncomment to print additional test information. // #define VERBOSE_PRINT namespace syclexp = sycl::ext::oneapi::experimental; @@ -21,15 +21,30 @@ void copy_image_mem_handle_to_image_mem_handle( syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + sycl::range hostExtent = {desc.width, desc.height, 0}; - q.wait_and_throw(); + // Copy four quarters of input data into device image memory. + q.ext_oneapi_copy(dataIn.data(), {0, 0, 0}, hostExtent, + imgMemSrc.get_handle(), {0, 0, 0}, desc, copyExtent); - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, 0}, hostExtent, + imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, 0}, hostExtent, + imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); - // Copy four quarters of square into output image + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, desc.height / 2, 0}, + hostExtent, imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device, using four sub-region copies. q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); @@ -48,8 +63,21 @@ void copy_image_mem_handle_to_image_mem_handle( q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + // Copy device data back to host. + // Copy four quarters of device imgMemDst data to host out. + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + out.data(), {desc.width / 2, 0, 0}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + out.data(), {0, desc.height / 2, 0}, hostExtent, + copyExtent); + + q.ext_oneapi_copy( + imgMemDst.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, + out.data(), {desc.width / 2, desc.height / 2, 0}, hostExtent, copyExtent); q.wait_and_throw(); } @@ -60,19 +88,33 @@ void copy_image_mem_handle_to_usm(const syclexp::image_descriptor &desc, std::vector &out) { syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); - // Allocate 2D device USM memory + // Allocate 2D device USM memory. size_t pitch = 0; void *imgMemDst = syclexp::pitched_alloc_device(&pitch, desc, q); - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; - q.wait_and_throw(); + // Copy four quarters of input data into device image memory. + q.ext_oneapi_copy(dataIn.data(), {0, 0, 0}, {desc.width, desc.height, 0}, + imgMemSrc.get_handle(), {0, 0, 0}, desc, copyExtent); - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, 0}, + {desc.width, desc.height, 0}, imgMemSrc.get_handle(), + {desc.width / 2, 0, 0}, desc, copyExtent); - // Copy four quarters of square into output image + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, 0}, + {desc.width, desc.height, 0}, imgMemSrc.get_handle(), + {0, desc.height / 2, 0}, desc, copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, desc.height / 2, 0}, + {desc.width, desc.height, 0}, imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device, using four sub-region copies. q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, imgMemDst, {0, 0, 0}, desc, pitch, copyExtent); @@ -89,8 +131,22 @@ void copy_image_mem_handle_to_usm(const syclexp::image_descriptor &desc, q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst, out.data(), desc, pitch); + // Copy device data back to host. + // Copy four quarters of device imgMemDst data to host out. + q.ext_oneapi_copy(imgMemDst, {0, 0, 0}, out.data(), {0, 0, 0}, desc, pitch, + {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst, {desc.width / 2, 0, 0}, out.data(), + {desc.width / 2, 0, 0}, desc, pitch, + {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst, {0, desc.height / 2, 0}, out.data(), + {0, desc.height / 2, 0}, desc, pitch, + {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst, {desc.width / 2, desc.height / 2, 0}, out.data(), + {desc.width / 2, desc.height / 2, 0}, desc, pitch, + {desc.width, desc.height, 0}, copyExtent); q.wait_and_throw(); @@ -101,21 +157,36 @@ void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &desc, const std::vector &dataIn, sycl::device dev, sycl::queue q, std::vector &out) { - // Allocate 2D device USM memory + // Allocate 2D device USM memory. size_t pitch = 0; void *imgMemSrc = syclexp::pitched_alloc_device(&pitch, desc, q); syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitch); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + sycl::range hostExtent = {desc.width, desc.height, 0}; - q.wait_and_throw(); + // Copy four quarters of input data into device image memory. + q.ext_oneapi_copy(dataIn.data(), {0, 0, 0}, imgMemSrc, {0, 0, 0}, desc, pitch, + hostExtent, copyExtent); - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, 0}, imgMemSrc, + {desc.width / 2, 0, 0}, desc, pitch, hostExtent, + copyExtent); - // Copy four quarters of square into output image + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, 0}, imgMemSrc, + {0, desc.height / 2, 0}, desc, pitch, hostExtent, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, desc.height / 2, 0}, + imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitch, hostExtent, copyExtent); + + q.wait_and_throw(); + + // Copy data from device to device, using four sub-region copies. q.ext_oneapi_copy(imgMemSrc, {0, 0, 0}, desc, pitch, imgMemDst.get_handle(), {0, 0, 0}, desc, copyExtent); @@ -133,8 +204,23 @@ void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &desc, q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + // Copy device data back to host. + // Copy four quarters of device imgMemDst data to host out. + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + out.data(), {desc.width / 2, 0, 0}, + {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + out.data(), {0, desc.height / 2, 0}, + {desc.width, desc.height, 0}, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, out.data(), + {desc.width / 2, desc.height / 2, 0}, + {desc.width, desc.height, 0}, copyExtent); q.wait_and_throw(); @@ -144,22 +230,36 @@ void copy_usm_to_image_mem_handle(const syclexp::image_descriptor &desc, void copy_usm_to_usm(const syclexp::image_descriptor &desc, const std::vector &dataIn, sycl::device dev, sycl::queue q, std::vector &out) { - // Allocate 2D device USM memory + // Allocate 2D device USM memory. size_t pitchSrc = 0; void *imgMemSrc = syclexp::pitched_alloc_device(&pitchSrc, desc, q); size_t pitchDst = 0; void *imgMemDst = syclexp::pitched_alloc_device(&pitchDst, desc, q); - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc, desc, pitchSrc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + sycl::range hostExtent = {desc.width, desc.height, 0}; - q.wait_and_throw(); + q.ext_oneapi_copy(dataIn.data(), {0, 0, 0}, imgMemSrc, {0, 0, 0}, desc, + pitchSrc, hostExtent, copyExtent); - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, 1}; + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, 0}, imgMemSrc, + {desc.width / 2, 0, 0}, desc, pitchSrc, hostExtent, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, 0}, imgMemSrc, + {0, desc.height / 2, 0}, desc, pitchSrc, hostExtent, + copyExtent); - // Copy four quarters of square into output image + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, desc.height / 2, 0}, + imgMemSrc, {desc.width / 2, desc.height / 2, 0}, desc, + pitchSrc, hostExtent, copyExtent); + + q.wait_and_throw(); + + // Copy four quarters of square into output image. q.ext_oneapi_copy(imgMemSrc, {0, 0, 0}, desc, pitchSrc, imgMemDst, {0, 0, 0}, desc, pitchDst, copyExtent); @@ -177,8 +277,22 @@ void copy_usm_to_usm(const syclexp::image_descriptor &desc, q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst, out.data(), desc, pitchDst); + // Copy device data back to host. + // Copy four quarters of device imgMemDst data to host out. + q.ext_oneapi_copy(imgMemDst, {0, 0, 0}, out.data(), {0, 0, 0}, desc, pitchDst, + hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst, {desc.width / 2, 0, 0}, out.data(), + {desc.width / 2, 0, 0}, desc, pitchDst, hostExtent, + copyExtent); + + q.ext_oneapi_copy(imgMemDst, {0, desc.height / 2, 0}, out.data(), + {0, desc.height / 2, 0}, desc, pitchDst, hostExtent, + copyExtent); + + q.ext_oneapi_copy(imgMemDst, {desc.width / 2, desc.height / 2, 0}, out.data(), + {desc.width / 2, desc.height / 2, 0}, desc, pitchDst, + hostExtent, copyExtent); q.wait_and_throw(); @@ -352,7 +466,7 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { std::vector expected(dims.size()); std::iota(expected.begin(), expected.end(), 0); - std::vector out(dims.size()); + std::vector out(dims.size(), 0); syclexp::image_descriptor desc = syclexp::image_descriptor(dims, channelNum, channelType); @@ -364,14 +478,20 @@ bool run_copy_test(sycl::device &dev, sycl::queue &q, sycl::range<2> dims) { validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_image_mem_handle_to_usm(desc, dataIn, dev, q, out); validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_usm_to_image_mem_handle(desc, dataIn, dev, q, out); validated = validated && check_test(out, expected); + std::fill(out.begin(), out.end(), 0); + copy_usm_to_usm(desc, dataIn, dev, q, out); validated = validated && check_test(out, expected); @@ -405,7 +525,5 @@ int main() { return 1; } - std::cout << "Tests passed\n"; - return 0; } diff --git a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp b/sycl/test-e2e/bindless_images/copies/copy_subregion_3D.cpp similarity index 61% rename from sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp rename to sycl/test-e2e/bindless_images/copies/copy_subregion_3D.cpp index 9c0d34e423cb9..147435fb5806a 100644 --- a/sycl/test-e2e/bindless_images/copies/device_to_device_copy_3D_subregion.cpp +++ b/sycl/test-e2e/bindless_images/copies/copy_subregion_3D.cpp @@ -19,13 +19,45 @@ void copy_image_mem_handle_to_image_mem_handle( syclexp::image_mem imgMemSrc(desc, dev, q.get_context()); syclexp::image_mem imgMemDst(desc, dev, q.get_context()); - // Copy input data to device - q.ext_oneapi_copy(dataIn.data(), imgMemSrc.get_handle(), desc); + // Copy host input data to device. + // Extent to copy. + sycl::range copyExtent = {desc.width / 2, desc.height / 2, desc.depth / 2}; + sycl::range hostExtent = {desc.width, desc.height, desc.depth}; - q.wait_and_throw(); + // Copy eight quadrants of input data into device image memory. + q.ext_oneapi_copy(dataIn.data(), {0, 0, 0}, hostExtent, + imgMemSrc.get_handle(), {0, 0, 0}, desc, copyExtent); - // Extent to copy - sycl::range copyExtent = {desc.width / 2, desc.height / 2, desc.depth / 2}; + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, 0}, hostExtent, + imgMemSrc.get_handle(), {desc.width / 2, 0, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, 0}, hostExtent, + imgMemSrc.get_handle(), {0, desc.height / 2, 0}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, desc.height / 2, 0}, + hostExtent, imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, 0}, desc, copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {0, 0, desc.depth / 2}, hostExtent, + imgMemSrc.get_handle(), {0, 0, desc.depth / 2}, desc, + copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {desc.width / 2, 0, desc.depth / 2}, + hostExtent, imgMemSrc.get_handle(), + {desc.width / 2, 0, desc.depth / 2}, desc, copyExtent); + + q.ext_oneapi_copy(dataIn.data(), {0, desc.height / 2, desc.depth / 2}, + hostExtent, imgMemSrc.get_handle(), + {0, desc.height / 2, desc.depth / 2}, desc, copyExtent); + + q.ext_oneapi_copy( + dataIn.data(), {desc.width / 2, desc.height / 2, desc.depth / 2}, + hostExtent, imgMemSrc.get_handle(), + {desc.width / 2, desc.height / 2, desc.depth / 2}, desc, copyExtent); + + q.wait_and_throw(); // Copy eight quadrants of square into output image q.ext_oneapi_copy(imgMemSrc.get_handle(), {0, 0, 0}, desc, @@ -64,8 +96,37 @@ void copy_image_mem_handle_to_image_mem_handle( q.wait_and_throw(); - // Copy out data to host - q.ext_oneapi_copy(imgMemDst.get_handle(), out.data(), desc); + // Copy device data back to host. + // Copy four quarters of device imgMemDst data to host out. + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, 0}, desc, out.data(), + {0, 0, 0}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {desc.width / 2, 0, 0}, desc, + out.data(), {desc.width / 2, 0, 0}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, desc.height / 2, 0}, desc, + out.data(), {0, desc.height / 2, 0}, hostExtent, + copyExtent); + + q.ext_oneapi_copy( + imgMemDst.get_handle(), {desc.width / 2, desc.height / 2, 0}, desc, + out.data(), {desc.width / 2, desc.height / 2, 0}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {0, 0, desc.depth / 2}, desc, + out.data(), {0, 0, desc.depth / 2}, hostExtent, copyExtent); + + q.ext_oneapi_copy(imgMemDst.get_handle(), {desc.width / 2, 0, desc.depth / 2}, + desc, out.data(), {desc.width / 2, 0, desc.depth / 2}, + hostExtent, copyExtent); + + q.ext_oneapi_copy( + imgMemDst.get_handle(), {0, desc.height / 2, desc.depth / 2}, desc, + out.data(), {0, desc.height / 2, desc.depth / 2}, hostExtent, copyExtent); + + q.ext_oneapi_copy( + imgMemDst.get_handle(), {desc.width / 2, desc.height / 2, desc.depth / 2}, + desc, out.data(), {desc.width / 2, desc.height / 2, desc.depth / 2}, + hostExtent, copyExtent); q.wait_and_throw(); } @@ -160,7 +221,5 @@ int main() { return 1; } - std::cout << "Tests passed\n"; - return 0; } diff --git a/unified-runtime/source/adapters/cuda/image.cpp b/unified-runtime/source/adapters/cuda/image.cpp index 4d97b225cbb4d..4f2a83d0878c8 100644 --- a/unified-runtime/source/adapters/cuda/image.cpp +++ b/unified-runtime/source/adapters/cuda/image.cpp @@ -704,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -725,7 +725,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -745,7 +745,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY; cpy_desc.dstArray = (CUarray)pDst; @@ -793,7 +793,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstY = pCopyRegion->dstOffset.y; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; if (pSrcImageDesc->rowPitch == 0) { @@ -818,7 +818,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; @@ -838,7 +838,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = as_CUArray(pSrc); cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height); cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height); diff --git a/unified-runtime/source/adapters/hip/image.cpp b/unified-runtime/source/adapters/hip/image.cpp index 4851b197d623c..0874df4c8ce85 100644 --- a/unified-runtime/source/adapters/hip/image.cpp +++ b/unified-runtime/source/adapters/hip/image.cpp @@ -704,7 +704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcY = pCopyRegion->srcOffset.y; cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes; cpy_desc.dstY = pCopyRegion->dstOffset.y; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; if (pDstImageDesc->rowPitch == 0) { cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -727,7 +727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = pSrcImageDesc->height; cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -749,7 +749,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.dstZ = pCopyRegion->dstOffset.z; cpy_desc.srcMemoryType = hipMemoryTypeHost; cpy_desc.srcHost = pSrc; - cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes; + cpy_desc.srcPitch = pSrcImageDesc->rowPitch; cpy_desc.srcHeight = std::max(MinCopyHeight, pSrcImageDesc->height); cpy_desc.dstMemoryType = hipMemoryTypeArray; cpy_desc.dstArray = static_cast(pDst); @@ -824,7 +824,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( } cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; UR_CHECK_ERROR(hipMemcpyParam2DAsync(&cpy_desc, Stream)); @@ -840,7 +840,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = pDstImageDesc->height; cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = pCopyRegion->copyExtent.height; @@ -863,7 +863,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.srcArray = static_cast(const_cast(pSrc)); cpy_desc.dstMemoryType = hipMemoryTypeHost; cpy_desc.dstHost = pDst; - cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes; + cpy_desc.dstPitch = pDstImageDesc->rowPitch; cpy_desc.dstHeight = std::max(MinCopyHeight, pDstImageDesc->height); cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width; cpy_desc.Height = diff --git a/unified-runtime/source/adapters/level_zero/image_common.cpp b/unified-runtime/source/adapters/level_zero/image_common.cpp index b34a59ad5edb6..9b76788b6a65e 100644 --- a/unified-runtime/source/adapters/level_zero/image_common.cpp +++ b/unified-runtime/source/adapters/level_zero/image_common.cpp @@ -779,8 +779,7 @@ ur_result_t bindlessImagesHandleCopyFlags( switch (imageCopyFlags) { case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: { - uint32_t SrcRowPitch = - pSrcImageDesc->width * getPixelSizeBytes(pSrcImageFormat); + uint32_t SrcRowPitch = pSrcImageDesc->rowPitch; uint32_t SrcSlicePitch = SrcRowPitch * pSrcImageDesc->height; if (pDstImageDesc->rowPitch == 0) { // Copy to Non-USM memory @@ -824,8 +823,7 @@ ur_result_t bindlessImagesHandleCopyFlags( return UR_RESULT_SUCCESS; }; case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST: { - uint32_t DstRowPitch = - pDstImageDesc->width * getPixelSizeBytes(pDstImageFormat); + uint32_t DstRowPitch = pDstImageDesc->rowPitch; uint32_t DstSlicePitch = DstRowPitch * pDstImageDesc->height; if (pSrcImageDesc->rowPitch == 0) { // Copy from Non-USM memory to host