Skip to content

Commit c6080d1

Browse files
committed
update with refactoring based on the PR comments
1 parent 1088d5a commit c6080d1

File tree

11 files changed

+1831
-2638
lines changed

11 files changed

+1831
-2638
lines changed

xla/backends/profiler/gpu/BUILD

Lines changed: 100 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
23
load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm_is_configured")
34
load("//xla:xla.default.bzl", "xla_cc_test")
@@ -37,7 +38,6 @@ cc_library(
3738
deps = [
3839
":cupti_collector",
3940
":cupti_tracer",
40-
":cupti_tracer_options_utils",
4141
"//xla/tsl/platform:errors",
4242
"//xla/tsl/profiler/utils:time_utils",
4343
"//xla/tsl/util:env_var",
@@ -156,7 +156,9 @@ xla_test(
156156
":cupti_wrapper",
157157
":mock_cupti",
158158
"//xla/tsl/profiler/utils:time_utils",
159+
"@com_google_absl//absl/memory",
159160
"@com_google_googletest//:gtest_main",
161+
"@tsl//tsl/platform:test",
160162
],
161163
)
162164

@@ -222,8 +224,6 @@ cc_library(
222224
"//xla/tsl/profiler/backends/cpu:annotation_stack",
223225
"//xla/tsl/profiler/utils:lock_free_queue",
224226
"//xla/tsl/profiler/utils:per_thread",
225-
"//xla/tsl/profiler/utils:xplane_builder",
226-
"//xla/tsl/profiler/utils:xplane_schema",
227227
"@com_google_absl//absl/base:core_headers",
228228
"@com_google_absl//absl/cleanup",
229229
"@com_google_absl//absl/container:flat_hash_map",
@@ -259,7 +259,6 @@ cc_library(
259259
"cuda-only",
260260
"gpu",
261261
],
262-
visibility = ["//visibility:public"],
263262
deps = [
264263
":cupti_collector",
265264
":cupti_interface",
@@ -286,9 +285,9 @@ cc_library(
286285
"cuda-only",
287286
"gpu",
288287
],
289-
visibility = ["//visibility:public"],
290288
deps = [
291289
":cupti_collector",
290+
":cupti_interface",
292291
"@com_google_absl//absl/status",
293292
"@com_google_absl//absl/time",
294293
],
@@ -307,7 +306,6 @@ cc_library(
307306
"gpu",
308307
"manual", # This target requires CUDA 12.6+, therefore we only built it if it was requested via a dependency.
309308
],
310-
visibility = ["//visibility:public"],
311309
deps = [
312310
":cupti_collector",
313311
":cupti_interface",
@@ -359,11 +357,39 @@ cc_library(
359357
],
360358
)
361359

360+
cc_library(
361+
name = "rocm_tracer_utils",
362+
srcs = ["rocm_tracer_utils.cc"],
363+
hdrs = ["rocm_tracer_utils.h"],
364+
deps = [
365+
"//xla/tsl/profiler/backends/cpu:annotation_stack",
366+
"//xla/tsl/profiler/utils:time_utils",
367+
"//xla/tsl/profiler/utils:math_utils",
368+
"@com_google_absl//absl/strings:string_view",
369+
"@com_google_absl//absl/container:flat_hash_map",
370+
"@com_google_absl//absl/container:flat_hash_set",
371+
"@com_google_absl//absl/container:node_hash_map",
372+
"@com_google_absl//absl/container:node_hash_set",
373+
"@tsl//tsl/platform:env_time",
374+
"@tsl//tsl/platform:env",
375+
"@tsl//tsl/platform:errors",
376+
"@tsl//tsl/platform:logging",
377+
"@tsl//tsl/platform:macros",
378+
],
379+
visibility = ["//visibility:public"],
380+
)
381+
362382
cc_library(
363383
name = "rocm_collector",
364384
srcs = ["rocm_collector.cc"],
365385
hdrs = ["rocm_collector.h"],
366386
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
387+
linkopts = select({
388+
"//conditions:default": [
389+
"-L/opt/rocm/lib", # search path for all ROCm shared objects
390+
"-lrocprofiler-sdk", # the library that owns the missing symbols
391+
],
392+
}),
367393
tags = [
368394
"gpu",
369395
"rocm-only",
@@ -372,6 +398,7 @@ cc_library(
372398
"manual",
373399
]),
374400
deps = [
401+
":rocm_tracer_utils",
375402
"//xla/stream_executor/rocm:roctracer_wrapper",
376403
"//xla/tsl/profiler/backends/cpu:annotation_stack",
377404
"//xla/tsl/profiler/utils:parse_annotation",
@@ -404,6 +431,12 @@ cc_library(
404431
srcs = ["rocm_tracer.cc"],
405432
hdrs = ["rocm_tracer.h"],
406433
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
434+
linkopts = select({
435+
"//conditions:default": [
436+
"-L/opt/rocm/lib", # search path for all ROCm shared objects
437+
"-lrocprofiler-sdk", # the library that owns the missing symbols
438+
],
439+
}),
407440
tags = [
408441
"gpu",
409442
"rocm-only",
@@ -412,10 +445,15 @@ cc_library(
412445
"manual",
413446
]),
414447
deps = [
448+
":rocm_tracer_utils",
415449
":rocm_collector",
416450
"//xla/stream_executor/rocm:roctracer_wrapper",
417451
"//xla/tsl/profiler/backends/cpu:annotation_stack",
418452
"//xla/tsl/profiler/utils:time_utils",
453+
"//xla/tsl/profiler/utils:xplane_builder",
454+
"//xla/tsl/profiler/utils:xplane_schema",
455+
"//xla/tsl/profiler/utils:xplane_utils",
456+
"//xla/tsl/util:env_var",
419457
"@com_google_absl//absl/container:fixed_array",
420458
"@com_google_absl//absl/container:flat_hash_map",
421459
"@com_google_absl//absl/container:flat_hash_set",
@@ -432,9 +470,64 @@ cc_library(
432470
"@tsl//tsl/platform:status",
433471
"@tsl//tsl/platform:thread_annotations",
434472
"@tsl//tsl/platform:types",
473+
"@tsl//tsl/profiler/lib:profiler_factory",
474+
"@tsl//tsl/profiler/lib:profiler_interface",
435475
],
436476
)
437477

478+
xla_cc_test(
479+
name = "rocm_tracer_test",
480+
size = "small",
481+
srcs = ["rocm_tracer_test.cc"],
482+
tags = [
483+
"gpu",
484+
"rocm",
485+
"rocm-only",
486+
] + if_google([
487+
# Optional: only run internally if ROCm config is enabled
488+
"manual",
489+
]),
490+
deps = [
491+
":rocm_tracer",
492+
":rocm_tracer_utils",
493+
"//xla/tsl/profiler/utils:xplane_builder",
494+
"@com_google_absl//absl/container:flat_hash_map",
495+
"@com_google_googletest//:gtest_main",
496+
"@tsl//tsl/platform:status_matchers",
497+
"@tsl//tsl/platform:test",
498+
"@tsl//tsl/profiler/protobuf:xplane_proto_cc",
499+
],
500+
)
501+
502+
xla_cc_test(
503+
name = "rocm_collector_test",
504+
size = "small",
505+
srcs = ["rocm_collector_test.cc"],
506+
tags = [
507+
"gpu",
508+
"rocm",
509+
"rocm-only",
510+
] + if_google([
511+
"manual",
512+
]),
513+
deps = [
514+
# ":rocm_tracer",
515+
":rocm_collector",
516+
":rocm_tracer_utils",
517+
"//xla/tsl/profiler/utils:xplane_builder",
518+
"@com_google_absl//absl/container:flat_hash_map",
519+
"@com_google_googletest//:gtest_main",
520+
"@tsl//tsl/platform:env_time",
521+
"@tsl//tsl/platform:status_matchers",
522+
"@tsl//tsl/platform:test",
523+
"@tsl//tsl/profiler/protobuf:xplane_proto_cc",
524+
"@tsl//tsl/platform:env",
525+
"@tsl//tsl/platform:errors",
526+
"@tsl//tsl/platform:logging",
527+
"@tsl//tsl/platform:macros",
528+
],
529+
)
530+
438531
cc_library(
439532
name = "nvtx_utils",
440533
srcs = ["nvtx_utils.cc"],
@@ -655,7 +748,6 @@ xla_test(
655748
deps = [
656749
":cupti_collector",
657750
":cupti_error_manager",
658-
":cupti_pm_sampler_stub",
659751
":cupti_tracer",
660752
":cupti_utils",
661753
":cupti_wrapper",
@@ -665,27 +757,4 @@ xla_test(
665757
"@com_google_absl//absl/time",
666758
"@com_google_googletest//:gtest_main",
667759
],
668-
)
669-
670-
cc_library(
671-
name = "cupti_tracer_options_utils",
672-
srcs = ["cupti_tracer_options_utils.cc"],
673-
hdrs = ["cupti_tracer_options_utils.h"],
674-
# copybara:uncomment compatible_with = ["//buildenv/target:non_prod"],
675-
tags = [
676-
"cuda-only",
677-
"gpu",
678-
],
679-
visibility = ["//visibility:public"],
680-
deps = [
681-
":cupti_collector",
682-
":cupti_tracer",
683-
"//xla/tsl/platform:errors",
684-
"//xla/tsl/profiler/utils:profiler_options_util",
685-
"@com_google_absl//absl/container:flat_hash_set",
686-
"@com_google_absl//absl/status",
687-
"@com_google_absl//absl/strings",
688-
"@local_config_cuda//cuda:cuda_headers",
689-
"@tsl//tsl/profiler/protobuf:profiler_options_proto_cc",
690-
],
691-
)
760+
)

0 commit comments

Comments
 (0)