Skip to content

Commit 338fa1e

Browse files
riverlijunjiepeterchen-intelsshlyapn
authored
[GPU] migrate pa/sdpa to new infra ocl_v2 (#31201)
### Details: - migrate PA/SDPA primitive impl to ocl_v2 infra ### Tickets: - [CVS-169326](https://jira.devtools.intel.com/browse/CVS-169326) --------- Co-authored-by: Chen Peter <[email protected]> Co-authored-by: Sergey Shlyapnikov <[email protected]>
1 parent 576b27d commit 338fa1e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+4955
-5158
lines changed

src/plugins/intel_gpu/src/graph/debug_helper.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,31 @@ NodeDebugHelper::~NodeDebugHelper() {
466466
dump_raw);
467467
}
468468
}
469+
for (size_t i = 0; i < m_inst.get_intermediates_memories().size(); i++) {
470+
std::string name = get_file_prefix() + "_intermediates_" + std::to_string(i);
471+
auto output_mem = m_inst.get_intermediates_memories()[i];
472+
if (output_mem == nullptr) {
473+
GPU_DEBUG_COUT << " intermediates_mem is nullptr. Nothing to dump." << std::endl;
474+
continue;
475+
}
469476

477+
auto& output_layout = output_mem->get_layout();
478+
if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary) {
479+
// Binary dump : raw
480+
auto filename = get_file_path_for_binary_dump(output_layout, name, config.get_dump_tensors_path());
481+
482+
mem_lock<char, mem_lock_type::read> lock(output_mem, m_stream);
483+
ov::util::save_binary(filename, lock.data(), output_mem->size());
484+
GPU_DEBUG_COUT << " Dump layer dst : " << layer_name << " to " << filename << std::endl;
485+
debug_str_for_bin_load += (filename + ",");
486+
} else {
487+
const bool dump_raw = config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::text_raw;
488+
GPU_DEBUG_COUT << " Dump " << (dump_raw ? "raw " : "") << name << std::endl;
489+
auto filename = config.get_dump_tensors_path() + get_name_for_dump(name) + ".txt";
490+
// Text dump
491+
log_memory_to_file(output_mem, output_layout, m_stream, filename, dump_raw);
492+
}
493+
}
470494
if (config.get_dump_tensors_format() == ov::intel_gpu::DumpFormat::binary && m_inst.is_input()) {
471495
debug_str_for_bin_load[debug_str_for_bin_load.size()-1] = '\"';
472496
GPU_DEBUG_COUT << debug_str_for_bin_load << std::endl;;

src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,13 +213,6 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code,
213213

214214
current_batch.has_microkernels |= kernel_string->has_microkernels;
215215

216-
// TODO: Technically, microkernels doesn't require specific headers, but we don't want to include
217-
// some headers to all batches as it may lead to compilation error on some driver versions.
218-
// Need to generalize work with headers to include only necessary parts
219-
if (current_batch.has_microkernels) {
220-
current_batch.source.insert(current_batch.source.begin(), current_batch.micro_headers.begin(), current_batch.micro_headers.end());
221-
}
222-
223216
current_batch.source.push_back(std::move(full_code));
224217
current_batch.kernels_counter++;
225218
}

src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp

Lines changed: 0 additions & 1177 deletions
This file was deleted.

src/plugins/intel_gpu/src/graph/impls/ocl/register.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ void register_implementations() {
3535
REGISTER_OCL(generate_proposals);
3636
REGISTER_OCL(grid_sample);
3737
REGISTER_OCL(kv_cache);
38-
REGISTER_OCL(paged_attention);
3938
REGISTER_OCL(lrn);
4039
REGISTER_OCL(multiclass_nms);
4140
REGISTER_OCL(multinomial);
@@ -81,7 +80,6 @@ void register_implementations() {
8180
REGISTER_OCL(eye);
8281
REGISTER_OCL(unique_count);
8382
REGISTER_OCL(unique_gather);
84-
REGISTER_OCL(scaled_dot_product_attention);
8583
REGISTER_OCL(search_sorted);
8684
REGISTER_OCL(STFT);
8785
REGISTER_OCL(ISTFT);

src/plugins/intel_gpu/src/graph/impls/ocl/register.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,7 @@
6262
#include "intel_gpu/primitives/non_zero.hpp"
6363
#include "intel_gpu/primitives/eye.hpp"
6464
#include "intel_gpu/primitives/unique.hpp"
65-
#include "intel_gpu/primitives/paged_attention.hpp"
6665
#include "intel_gpu/primitives/kv_cache.hpp"
67-
#include "intel_gpu/primitives/scaled_dot_product_attention.hpp"
6866

6967
namespace cldnn {
7068
namespace ocl {
@@ -106,7 +104,6 @@ REGISTER_OCL(gemm);
106104
REGISTER_OCL(generate_proposals);
107105
REGISTER_OCL(grid_sample);
108106
REGISTER_OCL(kv_cache);
109-
REGISTER_OCL(paged_attention);
110107
REGISTER_OCL(lrn);
111108
REGISTER_OCL(multiclass_nms);
112109
REGISTER_OCL(multinomial);
@@ -152,7 +149,6 @@ REGISTER_OCL(gather_nonzero);
152149
REGISTER_OCL(eye);
153150
REGISTER_OCL(unique_count);
154151
REGISTER_OCL(unique_gather);
155-
REGISTER_OCL(scaled_dot_product_attention);
156152
REGISTER_OCL(search_sorted);
157153
REGISTER_OCL(STFT);
158154
REGISTER_OCL(ISTFT);

0 commit comments

Comments
 (0)