1010
1111#include " program.hpp"
1212
13+ #ifdef SYCL_ENABLE_KERNEL_FUSION
14+ #include < amd_comgr/amd_comgr.h>
15+ namespace {
16+ template <typename ReleaseType, ReleaseType Release, typename T>
17+ struct COMgrObjCleanUp {
18+ COMgrObjCleanUp (T Obj) : Obj{Obj} {}
19+ ~COMgrObjCleanUp () { Release (Obj); }
20+ T Obj;
21+ };
22+
23+ using COMgrDataTCleanUp =
24+ COMgrObjCleanUp<decltype (&amd_comgr_release_data), &amd_comgr_release_data,
25+ amd_comgr_data_t >;
26+ using COMgrDataSetTCleanUp =
27+ COMgrObjCleanUp<decltype (&amd_comgr_destroy_data_set),
28+ &amd_comgr_destroy_data_set, amd_comgr_data_set_t >;
29+ using COMgrActionInfoCleanUp =
30+ COMgrObjCleanUp<decltype (&amd_comgr_destroy_action_info),
31+ &amd_comgr_destroy_action_info, amd_comgr_action_info_t >;
32+
33+ void getCoMgrBuildLog (const amd_comgr_data_set_t BuildDataSet, char *BuildLog,
34+ size_t MaxLogSize) {
35+ size_t count = 0 ;
36+ amd_comgr_status_t status = amd_comgr_action_data_count (
37+ BuildDataSet, AMD_COMGR_DATA_KIND_LOG, &count);
38+
39+ if (status != AMD_COMGR_STATUS_SUCCESS || count == 0 ) {
40+ std::strcpy (BuildLog, " extracting build log failed (no log)." );
41+ return ;
42+ }
43+
44+ amd_comgr_data_t LogBinaryData;
45+
46+ if (amd_comgr_action_data_get_data (BuildDataSet, AMD_COMGR_DATA_KIND_LOG, 0 ,
47+ &LogBinaryData) !=
48+ AMD_COMGR_STATUS_SUCCESS) {
49+ std::strcpy (BuildLog, " extracting build log failed (no data)." );
50+ return ;
51+ }
52+ COMgrDataTCleanUp LogDataCleanup{LogBinaryData};
53+
54+ size_t binarySize = 0 ;
55+ if (amd_comgr_get_data (LogBinaryData, &binarySize, NULL ) !=
56+ AMD_COMGR_STATUS_SUCCESS) {
57+ std::strcpy (BuildLog, " extracting build log failed (no log size)." );
58+ return ;
59+ }
60+
61+ if (binarySize == 0 ) {
62+ std::strcpy (BuildLog, " no log." );
63+ return ;
64+ }
65+
66+ size_t bufSize = binarySize < MaxLogSize ? binarySize : MaxLogSize;
67+
68+ if (amd_comgr_get_data (LogBinaryData, &bufSize, BuildLog) !=
69+ AMD_COMGR_STATUS_SUCCESS) {
70+ std::strcpy (BuildLog, " extracting build log failed (cannot copy log)." );
71+ return ;
72+ }
73+ }
74+ } // namespace
75+ #endif
76+
1377ur_program_handle_t_::ur_program_handle_t_ (ur_context_handle_t Ctxt)
14- : Module{nullptr }, Binary{}, BinarySizeInBytes{0 }, RefCount{1 }, Context{
15- Ctxt} {
78+ : Module{nullptr }, Binary{}, BinarySizeInBytes{0 }, RefCount{1 },
79+ Context{ Ctxt} {
1680 urContextRetain (Context);
1781}
1882
1983ur_program_handle_t_::~ur_program_handle_t_ () { urContextRelease (Context); }
2084
85+ ur_result_t
86+ ur_program_handle_t_::setMetadata (const ur_program_metadata_t *Metadata,
87+ size_t Length) {
88+ for (size_t i = 0 ; i < Length; ++i) {
89+ const ur_program_metadata_t MetadataElement = Metadata[i];
90+ std::string MetadataElementName{MetadataElement.pName };
91+
92+ if (MetadataElementName ==
93+ __SYCL_UR_PROGRAM_METADATA_TAG_NEED_FINALIZATION) {
94+ assert (MetadataElement.type == UR_PROGRAM_METADATA_TYPE_UINT32);
95+ IsRelocatable = MetadataElement.value .data32 ;
96+ }
97+ }
98+ return UR_RESULT_SUCCESS;
99+ }
100+
21101ur_result_t ur_program_handle_t_::setBinary (const char *Source, size_t Length) {
22102 // Do not re-set program binary data which has already been set as that will
23103 // delete the old binary data.
@@ -28,7 +108,80 @@ ur_result_t ur_program_handle_t_::setBinary(const char *Source, size_t Length) {
28108 return UR_RESULT_SUCCESS;
29109}
30110
111+ ur_result_t ur_program_handle_t_::finalizeRelocatable () {
112+ #ifndef SYCL_ENABLE_KERNEL_FUSION
113+ assert (false && " Relocation only available with fusion" );
114+ return UR_RESULT_ERROR_UNKNOWN;
115+ #else
116+ assert (IsRelocatable && " Not a relocatable input" );
117+ amd_comgr_data_t ComgrData;
118+ amd_comgr_data_set_t RelocatableData;
119+ UR_CHECK_ERROR (amd_comgr_create_data_set (&RelocatableData));
120+ COMgrDataSetTCleanUp RelocatableDataCleanup{RelocatableData};
121+
122+ UR_CHECK_ERROR (
123+ amd_comgr_create_data (AMD_COMGR_DATA_KIND_RELOCATABLE, &ComgrData));
124+ // RAII for auto clean-up
125+ COMgrDataTCleanUp DataCleanup{ComgrData};
126+ UR_CHECK_ERROR (amd_comgr_set_data (ComgrData, BinarySizeInBytes, Binary));
127+ UR_CHECK_ERROR (amd_comgr_set_data_name (ComgrData, " jit_obj.o" ));
128+
129+ UR_CHECK_ERROR (amd_comgr_data_set_add (RelocatableData, ComgrData));
130+
131+ amd_comgr_action_info_t Action;
132+
133+ UR_CHECK_ERROR (amd_comgr_create_action_info (&Action));
134+ COMgrActionInfoCleanUp ActionCleanUp{Action};
135+
136+ std::string ISA = " amdgcn-amd-amdhsa--" ;
137+ hipDeviceProp_t Props;
138+ detail::ur::assertion (hipGetDeviceProperties (
139+ &Props, Context->getDevice ()->get ()) == hipSuccess);
140+ ISA += Props.gcnArchName ;
141+ UR_CHECK_ERROR (amd_comgr_action_info_set_isa_name (Action, ISA.data ()));
142+
143+ UR_CHECK_ERROR (amd_comgr_action_info_set_logging (Action, true ));
144+
145+ amd_comgr_data_set_t Output;
146+ UR_CHECK_ERROR (amd_comgr_create_data_set (&Output));
147+ COMgrDataSetTCleanUp OutputDataCleanup{Output};
148+
149+ if (amd_comgr_do_action (AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE,
150+ Action, RelocatableData,
151+ Output) != AMD_COMGR_STATUS_SUCCESS) {
152+ getCoMgrBuildLog (Output, ErrorLog, MAX_LOG_SIZE);
153+ return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE;
154+ }
155+ amd_comgr_data_t binaryData;
156+
157+ UR_CHECK_ERROR (amd_comgr_action_data_get_data (
158+ Output, AMD_COMGR_DATA_KIND_EXECUTABLE, 0 , &binaryData));
159+ {
160+ COMgrDataTCleanUp binaryDataCleanUp{binaryData};
161+
162+ size_t binarySize = 0 ;
163+ UR_CHECK_ERROR (amd_comgr_get_data (binaryData, &binarySize, NULL ));
164+
165+ ExecutableCache.resize (binarySize);
166+
167+ UR_CHECK_ERROR (
168+ amd_comgr_get_data (binaryData, &binarySize, ExecutableCache.data ()));
169+ }
170+ Binary = ExecutableCache.data ();
171+ BinarySizeInBytes = ExecutableCache.size ();
172+ return UR_RESULT_SUCCESS;
173+ #endif
174+ }
175+
31176ur_result_t ur_program_handle_t_::buildProgram (const char *BuildOptions) {
177+ if (IsRelocatable) {
178+ if (finalizeRelocatable () != UR_RESULT_SUCCESS) {
179+ BuildStatus = UR_PROGRAM_BUILD_STATUS_ERROR;
180+ return UR_RESULT_ERROR_PROGRAM_BUILD_FAILURE;
181+ }
182+ IsRelocatable = false ;
183+ }
184+
32185 if (BuildOptions) {
33186 this ->BuildOptions = BuildOptions;
34187 }
@@ -246,7 +399,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
246399// / Note: Only supports one device
247400UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary (
248401 ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
249- const uint8_t *pBinary, const ur_program_properties_t *,
402+ const uint8_t *pBinary, const ur_program_properties_t *pProperties ,
250403 ur_program_handle_t *phProgram) {
251404 UR_ASSERT (pBinary != nullptr && size != 0 , UR_RESULT_ERROR_INVALID_BINARY);
252405 UR_ASSERT (hContext->getDevice ()->get () == hDevice->get (),
@@ -259,6 +412,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
259412
260413 // TODO: Set metadata here and use reqd_work_group_size information.
261414 // See urProgramCreateWithBinary in CUDA adapter.
415+ if (pProperties) {
416+ if (pProperties->count > 0 && pProperties->pMetadatas == nullptr ) {
417+ return UR_RESULT_ERROR_INVALID_NULL_POINTER;
418+ } else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr ) {
419+ return UR_RESULT_ERROR_INVALID_SIZE;
420+ }
421+ Result =
422+ RetProgram->setMetadata (pProperties->pMetadatas , pProperties->count );
423+ }
424+ UR_ASSERT (Result == UR_RESULT_SUCCESS, Result);
262425
263426 auto pBinary_string = reinterpret_cast <const char *>(pBinary);
264427 if (size == 0 ) {
0 commit comments