-
Notifications
You must be signed in to change notification settings - Fork 15.9k
Add test for vk_buffer from host memory #19254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b34ab0f
47b5f02
60ebeb2
fe438c6
40f23b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1569,6 +1569,7 @@ struct ggml_vk_garbage_collector { | |
| static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx); | ||
| static void ggml_vk_load_shaders(vk_device& device); | ||
| static void ggml_pipeline_allocate_descriptor_sets(ggml_backend_vk_context * ctx); | ||
| static vk_buffer ggml_vk_buffer_from_host_ptr(vk_device & device, void * ptr, size_t size); | ||
|
|
||
| static bool vk_memory_logger_enabled = false; | ||
|
|
||
|
|
@@ -12095,6 +12096,89 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, | |
| free(d); | ||
| free(d_chk); | ||
| } | ||
|
|
||
| static void ggml_vk_test_single_device_buffer_from_host_ptr(vk_device& device) { | ||
| static std::vector<vk_device> tested_devices; | ||
| if (std::find(tested_devices.begin(), tested_devices.end(), device) == tested_devices.end()) { | ||
| tested_devices.push_back(device); | ||
| } else{ | ||
| return; // Already tested this device, skip | ||
| } | ||
|
Comment on lines
+12101
to
+12106
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The existing tests just run once and abort, it's not intended to keep them active during runtime, so this test isn't necessary. |
||
|
|
||
| VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(" << device->name << ") - Single-device external host memory test"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The debug name is inconsistent with the function name, and the additional text is unusual. Was that to make it easier to spot in the debug log? |
||
|
|
||
| const size_t test_buffer_size = 1024 * 1024; // 1 MB | ||
| const size_t test_buffer_ne = test_buffer_size / sizeof(float); | ||
|
|
||
| // Get required alignment for external memory | ||
| const vk::DeviceSize min_alignment = device->physical_device.getProperties2<vk::PhysicalDeviceProperties2, vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().get<vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>().minImportedHostPointerAlignment; | ||
|
|
||
| // Allocate aligned host memory using aligned_alloc (C11/POSIX) | ||
| // Note: aligned_alloc requires size to be a multiple of alignment | ||
| const size_t aligned_size = ((test_buffer_size + min_alignment - 1) / min_alignment) * min_alignment; | ||
|
|
||
| VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory: min_alignment = " << min_alignment << ", aligned_size = " << aligned_size); | ||
|
|
||
| // Initialize host memory, which we will use as input to the device | ||
| auto input_deleter = [](float* p) { free(p); }; | ||
| std::unique_ptr<float[], decltype(input_deleter)> input_host_memory( | ||
| static_cast<float*>(aligned_alloc(min_alignment, aligned_size)), | ||
| input_deleter | ||
| ); | ||
|
Comment on lines
+12124
to
+12127
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a little convoluted compared to just going However, to make it easier to understand what is going on, please check if the alloc succeeded and print an error message if not. Same for the other alloc below. |
||
| for (size_t i = 0; i < test_buffer_ne; i++) { | ||
| input_host_memory[i] = static_cast<float>(i); | ||
| } | ||
|
|
||
| // Initialize destination host memory, which we will use to verify results | ||
| auto result_deleter = [](float* p) { free(p); }; | ||
| std::unique_ptr<float[], decltype(result_deleter)> result_host_memory( | ||
| static_cast<float*>(aligned_alloc(min_alignment, aligned_size)), | ||
| result_deleter | ||
| ); | ||
| for (size_t i = 0; i < test_buffer_ne; i++) { | ||
| result_host_memory[i] = 0.0f; // Ensure we know the original contents | ||
| } | ||
|
|
||
| // Allocate an operation on-device destination buffer | ||
| vk_buffer device_dst_buffer = ggml_vk_create_buffer_device(device, test_buffer_size); | ||
|
|
||
| // Allocate an external memory buffer on device, importing the host memory | ||
| vk_buffer external_source_buffer = ggml_vk_buffer_from_host_ptr(device, input_host_memory.get(), test_buffer_size); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing as above, to make this easier to understand please check if this actually succeeded and print an error if not. In my test it returned a nullptr and this just crashed the copy_async functions below, so I needed a debugger to figure out which function failed. |
||
|
|
||
| // Allocate an external memory buffer on device for the result | ||
| vk_buffer external_result_buffer = ggml_vk_buffer_from_host_ptr(device, result_host_memory.get(), test_buffer_size); | ||
|
|
||
| vk_context subctx = ggml_vk_create_temporary_context(device->transfer_queue.cmd_pool); | ||
| ggml_vk_ctx_begin(device, subctx); | ||
|
|
||
| // Submit an operation which we can observe accessing data from the source and moving to dst. | ||
| // For simplicity, we use a copy operation here, but this could be any compute operation (SUM, MUL_MAT, etc.). | ||
| ggml_vk_buffer_copy_async(subctx, device_dst_buffer, 0, external_source_buffer, 0, test_buffer_size); | ||
|
|
||
| // Copy from on-device dst to host memory | ||
| // This is really intended to be a copy, as if we had done some compute on the device and now want to read back the results. | ||
| ggml_vk_buffer_copy_async(subctx, external_result_buffer, 0, device_dst_buffer, 0, test_buffer_size); | ||
|
|
||
| // Submit and wait for completion | ||
| ggml_vk_ctx_end(subctx); | ||
| vk::Fence fence = device->device.createFence({}); | ||
| VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): Submitting and waiting for device."); | ||
| ggml_vk_submit(subctx, fence); | ||
| VK_CHECK(device->device.waitForFences(fence, VK_TRUE, UINT64_MAX), "Failed to wait for fence"); | ||
| device->device.destroyFence(fence); | ||
|
|
||
| for (size_t i = 0; i < test_buffer_ne; i++) { | ||
| if (result_host_memory[i] != input_host_memory[i]) { | ||
| VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory(): External host memory test failed at index " << std::dec << i << ": expected " << input_host_memory[i] << ", got " << result_host_memory[i]); | ||
| GGML_ABORT("External host memory test failed"); | ||
| } | ||
| } | ||
|
|
||
| ggml_vk_destroy_buffer(device_dst_buffer); | ||
| ggml_vk_destroy_buffer(external_source_buffer); | ||
| ggml_vk_destroy_buffer(external_result_buffer); | ||
| VK_LOG_DEBUG("ggml_vk_test_single_device_external_memory() - Passed single-device, single-buffer external host memory test."); | ||
| } | ||
| #endif | ||
|
|
||
| static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_context subctx) { | ||
|
|
@@ -12184,6 +12268,14 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx, vk_contex | |
| } | ||
| } | ||
|
|
||
| for (size_t device_idx = 0; device_idx < GGML_VK_MAX_DEVICES; device_idx++) { | ||
| vk_device device = vk_instance.devices[device_idx]; | ||
| if (device == nullptr) { | ||
| continue; | ||
| } | ||
| ggml_vk_test_single_device_buffer_from_host_ptr(device); | ||
| } | ||
|
|
||
| GGML_ABORT("fatal error"); | ||
| #endif | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.