Skip to content

Commit 042fc71

Browse files
bweltonclaude
andcommitted
[rocprofiler-sdk] Add ROCPROFILER_ONDEMAND_QUEUE env flag for on-demand queue control
Gate the on-demand queue creation/destruction behavior behind the ROCPROFILER_ONDEMAND_QUEUE environment variable. When set to 1 or true, profile queues are created in start_context and destroyed in stop_context. When unset (default), the original behavior is preserved with persistent queues created during AgentCache initialization. This allows controlled rollout and A/B testing of the on-demand queue optimization that eliminates ~130ms TTFT overhead from persistent GPU queues in the MES runlist. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5a8d276 commit 042fc71

File tree

3 files changed

+37
-15
lines changed

3 files changed

+37
-15
lines changed

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,11 @@ start_agent_ctx(const context::context* ctx)
433433
}
434434

435435
// On-demand: create the profile queue now (destroyed in stop_agent_ctx)
436-
agent->init_device_counting_service_queue(*hsa::get_core_table(), *hsa::get_amd_ext_table());
436+
if(hsa::use_ondemand_queue())
437+
{
438+
agent->init_device_counting_service_queue(*hsa::get_core_table(),
439+
*hsa::get_amd_ext_table());
440+
}
437441

438442
// But if we have an agent cache, we need a profile queue.
439443
if(!agent->profile_queue())
@@ -608,17 +612,22 @@ stop_agent_ctx(const context::context* ctx)
608612
}
609613

610614
// On-demand cleanup: destroy signals, reset packet, destroy queue
611-
if(callback_data.completion.handle != 0) {
612-
hsa::get_core_table()->hsa_signal_destroy_fn(callback_data.completion);
613-
callback_data.completion.handle = 0;
614-
}
615-
if(callback_data.start_signal.handle != 0) {
616-
hsa::get_core_table()->hsa_signal_destroy_fn(callback_data.start_signal);
617-
callback_data.start_signal.handle = 0;
615+
if(hsa::use_ondemand_queue())
616+
{
617+
if(callback_data.completion.handle != 0)
618+
{
619+
hsa::get_core_table()->hsa_signal_destroy_fn(callback_data.completion);
620+
callback_data.completion.handle = 0;
621+
}
622+
if(callback_data.start_signal.handle != 0)
623+
{
624+
hsa::get_core_table()->hsa_signal_destroy_fn(callback_data.start_signal);
625+
callback_data.start_signal.handle = 0;
626+
}
627+
callback_data.packet.reset();
628+
callback_data.queue = nullptr;
629+
agent->destroy_device_counting_service_queue();
618630
}
619-
callback_data.packet.reset();
620-
callback_data.queue = nullptr;
621-
agent->destroy_device_counting_service_queue();
622631
}
623632

624633
agent_ctx.status.exchange(rocprofiler::context::device_counting_service::state::DISABLED);

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/agent_cache.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
// THE SOFTWARE.
2222

2323
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
24+
#include "lib/common/environment.hpp"
2425
#include "lib/common/logging.hpp"
2526

2627
#include <fmt/core.h>
@@ -120,6 +121,13 @@ namespace rocprofiler
120121
{
121122
namespace hsa
122123
{
124+
bool
125+
use_ondemand_queue()
126+
{
127+
static bool value = rocprofiler::common::get_env("ROCPROFILER_ONDEMAND_QUEUE", false);
128+
return value;
129+
}
130+
123131
void
124132
AgentCache::init_device_counting_service_queue(const CoreApiTable& api,
125133
const AmdExtTable& ext) const
@@ -172,7 +180,8 @@ AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
172180
{
173181
init_cpu_pool(ext_table, *this);
174182
init_gpu_pool(ext_table, *this);
175-
// init_device_counting_service_queue(api, ext_table); // Deferred to start_context for on-demand queue
183+
// When on-demand queue mode is enabled, defer queue creation to start_context
184+
if(!use_ondemand_queue()) init_device_counting_service_queue(api, ext_table);
176185
} catch(std::runtime_error& e)
177186
{
178187
ROCP_WARNING << fmt::format(
@@ -182,7 +191,6 @@ AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent,
182191
}
183192
}
184193

185-
186194
void
187195
AgentCache::destroy_device_counting_service_queue() const
188196
{
@@ -192,8 +200,7 @@ AgentCache::destroy_device_counting_service_queue() const
192200
if(!m_profile_queue) return;
193201

194202
auto* api = hsa::get_core_table();
195-
if(api && api->hsa_queue_destroy_fn)
196-
api->hsa_queue_destroy_fn(m_profile_queue);
203+
if(api && api->hsa_queue_destroy_fn) api->hsa_queue_destroy_fn(m_profile_queue);
197204
m_profile_queue = nullptr;
198205
}
199206

projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/agent_cache.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,11 @@ AgentCache::operator==(hsa_agent_t agent) const
107107
{
108108
return (agent.handle == m_hsa_agent.handle);
109109
}
110+
/// Returns true when ROCPROFILER_ONDEMAND_QUEUE=1|true is set.
111+
/// In on-demand mode, the profile queue is created in start_context
112+
/// and destroyed in stop_context instead of being persistent.
113+
bool
114+
use_ondemand_queue();
115+
110116
} // namespace hsa
111117
} // namespace rocprofiler

0 commit comments

Comments
 (0)