Skip to content

Commit 32d4899

Browse files
cj401-amdGoogle-ML-Automation
authored andcommitted
PR #29769: [ROCm] upstream using rocprofiler-sdk (v3) for tracing AMD GPU events
Imported from GitHub PR #29769 @xla-rotation, would you please kindly help review this PR? We are phasing out development and support for roctracer/rocprofiler/rocprof/rocprofv2 in favor of rocprofiler-sdk (v3) in upcoming ROCm releases. rocprofielr-sdk (v3) also moves away from cupti. This PR integrates rocprofiler-sdk (v3) into XLA for profiling GPU events on AMD GPUs. - Integrate rocprofiler-sdk (v3) to XLA for improved profiling of GPU events, support both time-based and step-based profiling, - Add unit tests for rocm_collector and rocm_tracer Copybara import of the project: -- 99fe3f6 by cj401-amd <[email protected]>: update with refactoring based on the PR comments -- 3f015cb by cj401-amd <[email protected]>: update with VLOG(2) and fix a bug, new line at the EoF -- 04e2b6b by cj401-amd <[email protected]>: update the macro -- 88fc01a by cj401-amd <[email protected]>: update header files -- e664bcb by cj401-amd <[email protected]>: update a: addressing comments, b: add kernel details, c: update rocprofiler-sdk path Merging this change closes #29769 FUTURE_COPYBARA_INTEGRATE_REVIEW=#29769 from ROCm:ci_cj-rocprof-v3-roctracer-v1 e664bcb PiperOrigin-RevId: 808483340
1 parent 4d09558 commit 32d4899

File tree

14 files changed

+1826
-2656
lines changed

14 files changed

+1826
-2656
lines changed

third_party/gpus/rocm/BUILD.tpl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,19 @@ cc_library(
349349
deps = [":rocm_config"],
350350
)
351351

352+
cc_library(
353+
name = "rocprofiler-sdk",
354+
srcs = glob(["%{rocm_root}/lib/librocprofiler-sdk*.so*"]),
355+
hdrs = glob(["%{rocm_root}/include/rocprofiler-sdk/**"]),
356+
include_prefix = "rocm",
357+
includes = [
358+
"%{rocm_root}/include/",
359+
],
360+
strip_include_prefix = "%{rocm_root}",
361+
visibility = ["//visibility:public"],
362+
deps = [":rocm_config"],
363+
)
364+
352365
cc_library(
353366
name = "rocsolver",
354367
srcs = glob(["%{rocm_root}/lib/librocsolver*.so*"]),

third_party/gpus/rocm_configure.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def _find_libs(repository_ctx, rocm_config, miopen_path, rccl_path, bash_bin):
295295
("hipsparse", rocm_config.rocm_toolkit_path),
296296
("roctracer64", rocm_config.rocm_toolkit_path),
297297
("rocsolver", rocm_config.rocm_toolkit_path),
298+
("rocprofiler-sdk", rocm_config.rocm_toolkit_path),
298299
]
299300
]
300301
if int(rocm_config.rocm_version_number) >= 40500:

xla/backends/profiler/gpu/BUILD

Lines changed: 88 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ cc_library(
3838
deps = [
3939
":cupti_collector",
4040
":cupti_tracer",
41-
":cupti_tracer_options_utils",
4241
"//xla/tsl/platform:errors",
4342
"//xla/tsl/profiler/utils:time_utils",
4443
"//xla/tsl/util:env_var",
@@ -157,7 +156,9 @@ xla_test(
157156
":cupti_wrapper",
158157
":mock_cupti",
159158
"//xla/tsl/profiler/utils:time_utils",
159+
"@com_google_absl//absl/memory",
160160
"@com_google_googletest//:gtest_main",
161+
"@tsl//tsl/platform:test",
161162
],
162163
)
163164

@@ -223,8 +224,6 @@ cc_library(
223224
"//xla/tsl/profiler/backends/cpu:annotation_stack",
224225
"//xla/tsl/profiler/utils:lock_free_queue",
225226
"//xla/tsl/profiler/utils:per_thread",
226-
"//xla/tsl/profiler/utils:xplane_builder",
227-
"//xla/tsl/profiler/utils:xplane_schema",
228227
"@com_google_absl//absl/base:core_headers",
229228
"@com_google_absl//absl/cleanup",
230229
"@com_google_absl//absl/container:flat_hash_map",
@@ -260,7 +259,6 @@ cc_library(
260259
"cuda-only",
261260
"gpu",
262261
],
263-
visibility = ["//visibility:public"],
264262
deps = [
265263
":cupti_collector",
266264
":cupti_interface",
@@ -287,9 +285,9 @@ cc_library(
287285
"cuda-only",
288286
"gpu",
289287
],
290-
visibility = ["//visibility:public"],
291288
deps = [
292289
":cupti_collector",
290+
":cupti_interface",
293291
"@com_google_absl//absl/status",
294292
"@com_google_absl//absl/time",
295293
],
@@ -308,7 +306,6 @@ cc_library(
308306
"gpu",
309307
"manual", # This target requires CUDA 12.6+, therefore we only built it if it was requested via a dependency.
310308
],
311-
visibility = ["//visibility:public"],
312309
deps = [
313310
":cupti_collector",
314311
":cupti_interface",
@@ -360,11 +357,32 @@ cc_library(
360357
],
361358
)
362359

360+
cc_library(
361+
name = "rocm_tracer_utils",
362+
srcs = ["rocm_tracer_utils.cc"],
363+
hdrs = ["rocm_tracer_utils.h"],
364+
visibility = ["//visibility:public"],
365+
deps = [
366+
"//xla/tsl/profiler/backends/cpu:annotation_stack",
367+
"//xla/tsl/profiler/utils:math_utils",
368+
"//xla/tsl/profiler/utils:time_utils",
369+
"@com_google_absl//absl/container:flat_hash_map",
370+
"@com_google_absl//absl/container:flat_hash_set",
371+
"@com_google_absl//absl/container:node_hash_map",
372+
"@com_google_absl//absl/container:node_hash_set",
373+
"@com_google_absl//absl/strings:string_view",
374+
"@tsl//tsl/platform:env",
375+
"@tsl//tsl/platform:env_time",
376+
"@tsl//tsl/platform:errors",
377+
"@tsl//tsl/platform:logging",
378+
"@tsl//tsl/platform:macros",
379+
],
380+
)
381+
363382
cc_library(
364383
name = "rocm_collector",
365384
srcs = ["rocm_collector.cc"],
366385
hdrs = ["rocm_collector.h"],
367-
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
368386
tags = [
369387
"gpu",
370388
"rocm-only",
@@ -373,6 +391,7 @@ cc_library(
373391
"manual",
374392
]),
375393
deps = [
394+
":rocm_tracer_utils",
376395
"//xla/stream_executor/rocm:roctracer_wrapper",
377396
"//xla/tsl/profiler/backends/cpu:annotation_stack",
378397
"//xla/tsl/profiler/utils:parse_annotation",
@@ -388,6 +407,7 @@ cc_library(
388407
"@com_google_absl//absl/strings",
389408
"@com_google_absl//absl/strings:str_format",
390409
"@com_google_absl//absl/synchronization",
410+
"@local_config_rocm//rocm:rocprofiler-sdk",
391411
"@tsl//tsl/platform:abi",
392412
"@tsl//tsl/platform:env_time",
393413
"@tsl//tsl/platform:errors",
@@ -404,7 +424,6 @@ cc_library(
404424
name = "rocm_tracer",
405425
srcs = ["rocm_tracer.cc"],
406426
hdrs = ["rocm_tracer.h"],
407-
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
408427
tags = [
409428
"gpu",
410429
"rocm-only",
@@ -414,9 +433,14 @@ cc_library(
414433
]),
415434
deps = [
416435
":rocm_collector",
436+
":rocm_tracer_utils",
417437
"//xla/stream_executor/rocm:roctracer_wrapper",
418438
"//xla/tsl/profiler/backends/cpu:annotation_stack",
419439
"//xla/tsl/profiler/utils:time_utils",
440+
"//xla/tsl/profiler/utils:xplane_builder",
441+
"//xla/tsl/profiler/utils:xplane_schema",
442+
"//xla/tsl/profiler/utils:xplane_utils",
443+
"//xla/tsl/util:env_var",
420444
"@com_google_absl//absl/container:fixed_array",
421445
"@com_google_absl//absl/container:flat_hash_map",
422446
"@com_google_absl//absl/container:flat_hash_set",
@@ -425,6 +449,7 @@ cc_library(
425449
"@com_google_absl//absl/status",
426450
"@com_google_absl//absl/synchronization",
427451
"@local_config_rocm//rocm:rocm_headers",
452+
"@local_config_rocm//rocm:rocprofiler-sdk",
428453
"@tsl//tsl/platform:env",
429454
"@tsl//tsl/platform:errors",
430455
"@tsl//tsl/platform:logging",
@@ -433,6 +458,61 @@ cc_library(
433458
"@tsl//tsl/platform:status",
434459
"@tsl//tsl/platform:thread_annotations",
435460
"@tsl//tsl/platform:types",
461+
"@tsl//tsl/profiler/lib:profiler_factory",
462+
"@tsl//tsl/profiler/lib:profiler_interface",
463+
],
464+
)
465+
466+
xla_cc_test(
467+
name = "rocm_tracer_test",
468+
size = "small",
469+
srcs = ["rocm_tracer_test.cc"],
470+
tags = [
471+
"gpu",
472+
"rocm",
473+
"rocm-only",
474+
] + if_google([
475+
# Optional: only run internally if ROCm config is enabled
476+
"manual",
477+
]),
478+
deps = [
479+
":rocm_tracer",
480+
":rocm_tracer_utils",
481+
"//xla/tsl/profiler/utils:xplane_builder",
482+
"@com_google_absl//absl/container:flat_hash_map",
483+
"@com_google_googletest//:gtest_main",
484+
"@tsl//tsl/platform:status_matchers",
485+
"@tsl//tsl/platform:test",
486+
"@tsl//tsl/profiler/protobuf:xplane_proto_cc",
487+
],
488+
)
489+
490+
xla_cc_test(
491+
name = "rocm_collector_test",
492+
size = "small",
493+
srcs = ["rocm_collector_test.cc"],
494+
tags = [
495+
"gpu",
496+
"rocm",
497+
"rocm-only",
498+
] + if_google([
499+
"manual",
500+
]),
501+
deps = [
502+
# ":rocm_tracer",
503+
":rocm_collector",
504+
":rocm_tracer_utils",
505+
"//xla/tsl/profiler/utils:xplane_builder",
506+
"@com_google_absl//absl/container:flat_hash_map",
507+
"@com_google_googletest//:gtest_main",
508+
"@tsl//tsl/platform:env_time",
509+
"@tsl//tsl/platform:status_matchers",
510+
"@tsl//tsl/platform:test",
511+
"@tsl//tsl/profiler/protobuf:xplane_proto_cc",
512+
"@tsl//tsl/platform:env",
513+
"@tsl//tsl/platform:errors",
514+
"@tsl//tsl/platform:logging",
515+
"@tsl//tsl/platform:macros",
436516
],
437517
)
438518

@@ -707,7 +787,6 @@ xla_test(
707787
deps = [
708788
":cupti_collector",
709789
":cupti_error_manager",
710-
":cupti_pm_sampler_stub",
711790
":cupti_tracer",
712791
":cupti_utils",
713792
":cupti_wrapper",
@@ -718,26 +797,3 @@ xla_test(
718797
"@com_google_googletest//:gtest_main",
719798
],
720799
)
721-
722-
cc_library(
723-
name = "cupti_tracer_options_utils",
724-
srcs = ["cupti_tracer_options_utils.cc"],
725-
hdrs = ["cupti_tracer_options_utils.h"],
726-
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
727-
tags = [
728-
"cuda-only",
729-
"gpu",
730-
],
731-
visibility = ["//visibility:public"],
732-
deps = [
733-
":cupti_collector",
734-
":cupti_tracer",
735-
"//xla/tsl/platform:errors",
736-
"//xla/tsl/profiler/utils:profiler_options_util",
737-
"@com_google_absl//absl/container:flat_hash_set",
738-
"@com_google_absl//absl/status",
739-
"@com_google_absl//absl/strings",
740-
"@local_config_cuda//cuda:cuda_headers",
741-
"@tsl//tsl/profiler/protobuf:profiler_options_proto_cc",
742-
],
743-
)

0 commit comments

Comments
 (0)