Skip to content

Commit e9eff75

Browse files
committed
NA OFI: add counters for monitoring tx/rx/rma/cq counts
Monitor mr and addr counts NA: add NA_Diag_dump_counters() routine to dump counters if HG_LOG_LEVEL>=min_debug is set HG Core: clean up counters HG util: add ability to remove counters
1 parent f3050ff commit e9eff75

File tree

12 files changed

+411
-89
lines changed

12 files changed

+411
-89
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ endif()
192192
# Enable diagnostics counters separately from debug.
193193
#------------------------------------------------------------------------------
194194
option(MERCURY_ENABLE_COUNTERS "Enable diagnostics counters." OFF)
195-
if(MERCURY_ENABLE_COUNTERS)
195+
if(MERCURY_ENABLE_COUNTERS AND NOT WIN32)
196196
set(HG_HAS_DIAG 1)
197197
else()
198198
set(HG_HAS_DIAG 0)

src/mercury.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,7 @@ HG_Diag_dump_counters(void)
11781178
#ifndef _WIN32
11791179
hg_log_dump_counters(&HG_LOG_OUTLET(hg_diag));
11801180
#endif
1181+
NA_Diag_dump_counters();
11811182
}
11821183

11831184
/*---------------------------------------------------------------------------*/

src/mercury_core.c

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ struct hg_core_private_class {
185185
struct hg_core_map rpc_map; /* RPC Map */
186186
struct hg_core_more_data_cb more_data_cb; /* More data callbacks */
187187
na_tag_t request_max_tag; /* Max value for tag */
188-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
188+
#ifdef HG_HAS_DIAG
189189
struct hg_core_counters counters; /* Diag counters */
190190
#endif
191191
hg_atomic_int32_t n_contexts; /* Total number of contexts */
@@ -369,7 +369,7 @@ struct hg_core_private_handle {
369369
uint8_t cookie; /* Cookie */
370370
bool multi_recv_copy; /* Copy on multi-recv */
371371
bool reuse; /* Re-use handle once ref_count is 0 */
372-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
372+
#ifdef HG_HAS_DIAG
373373
bool active;
374374
#endif
375375
};
@@ -405,7 +405,7 @@ hg_core_op_type_to_string(enum hg_core_op_type op_type);
405405
/**
406406
* Init counters.
407407
*/
408-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
408+
#ifdef HG_HAS_DIAG
409409
static void
410410
hg_core_counters_init(struct hg_core_counters *hg_core_counters);
411411
#endif
@@ -447,7 +447,7 @@ hg_core_finalize(struct hg_core_private_class *hg_core_class);
447447
/**
448448
* Get counters.
449449
*/
450-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
450+
#ifdef HG_HAS_DIAG
451451
static void
452452
hg_core_class_get_counters(const struct hg_core_counters *counters,
453453
struct hg_diag_counters *diag_counters);
@@ -1091,7 +1091,7 @@ hg_core_op_type_to_string(enum hg_core_op_type op_type)
10911091
#endif
10921092

10931093
/*---------------------------------------------------------------------------*/
1094-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
1094+
#ifdef HG_HAS_DIAG
10951095
static void
10961096
hg_core_counters_init(struct hg_core_counters *hg_core_counters)
10971097
{
@@ -1326,7 +1326,7 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version,
13261326
hg_core_class->init_info.listen = na_listen;
13271327

13281328
/* Stats / counters */
1329-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
1329+
#ifdef HG_HAS_DIAG
13301330
hg_core_counters_init(&hg_core_class->counters);
13311331
#endif
13321332

@@ -1522,7 +1522,7 @@ hg_core_finalize(struct hg_core_private_class *hg_core_class)
15221522
}
15231523

15241524
/*---------------------------------------------------------------------------*/
1525-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
1525+
#ifdef HG_HAS_DIAG
15261526
static void
15271527
hg_core_class_get_counters(const struct hg_core_counters *counters,
15281528
struct hg_diag_counters *diag_counters)
@@ -3446,7 +3446,7 @@ hg_core_destroy(struct hg_core_private_handle *hg_core_handle)
34463446
return HG_SUCCESS; /* Cannot free yet */
34473447
}
34483448

3449-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
3449+
#ifdef HG_HAS_DIAG
34503450
if (hg_core_handle->active) {
34513451
hg_atomic_decr64(HG_CORE_HANDLE_CLASS(hg_core_handle)
34523452
->counters.rpc_req_recv_active_count);
@@ -4049,7 +4049,7 @@ hg_core_forward(struct hg_core_private_handle *hg_core_handle,
40494049
hg_core_handle->request_callback = callback;
40504050
hg_core_handle->request_arg = arg;
40514051

4052-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4052+
#ifdef HG_HAS_DIAG
40534053
/* Increment counter */
40544054
hg_atomic_incr64(
40554055
HG_CORE_HANDLE_CLASS(hg_core_handle)->counters.rpc_req_sent_count);
@@ -4264,7 +4264,7 @@ hg_core_respond(struct hg_core_private_handle *hg_core_handle,
42644264
hg_core_handle->response_callback = callback;
42654265
hg_core_handle->response_arg = arg;
42664266

4267-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4267+
#ifdef HG_HAS_DIAG
42684268
/* Increment counter */
42694269
hg_atomic_incr64(
42704270
HG_CORE_HANDLE_CLASS(hg_core_handle)->counters.rpc_resp_sent_count);
@@ -4500,7 +4500,7 @@ hg_core_recv_input_cb(const struct na_cb_info *callback_info)
45004500
hg_thread_spin_lock(&hg_core_handle_pool->pending_list.lock);
45014501
LIST_REMOVE(hg_core_handle, pending);
45024502
hg_thread_spin_unlock(&hg_core_handle_pool->pending_list.lock);
4503-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4503+
#ifdef HG_HAS_DIAG
45044504
/* Increment counter */
45054505
hg_atomic_incr64(HG_CORE_HANDLE_CLASS(hg_core_handle)
45064506
->counters.rpc_req_recv_active_count);
@@ -4609,7 +4609,7 @@ hg_core_multi_recv_input_cb(const struct na_cb_info *callback_info)
46094609
ret = hg_core_handle_pool_get(context->handle_pool, &hg_core_handle);
46104610
HG_CHECK_SUBSYS_HG_ERROR(
46114611
rpc, error, ret, "Could not get handle from pool");
4612-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4612+
#ifdef HG_HAS_DIAG
46134613
/* Increment counter */
46144614
hg_atomic_incr64(HG_CORE_HANDLE_CLASS(hg_core_handle)
46154615
->counters.rpc_req_recv_active_count);
@@ -4666,7 +4666,7 @@ hg_core_multi_recv_input_cb(const struct na_cb_info *callback_info)
46664666
"Copying multi-recv payload of size %zu for handle (%p)",
46674667
hg_core_handle->core_handle.in_buf_used,
46684668
(void *) hg_core_handle);
4669-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4669+
#ifdef HG_HAS_DIAG
46704670
/* Increment counter */
46714671
hg_atomic_incr64(HG_CORE_CONTEXT_CLASS(context)
46724672
->counters.rpc_multi_recv_copy_count);
@@ -4764,7 +4764,7 @@ hg_core_process_input(struct hg_core_private_handle *hg_core_handle)
47644764
uint32_t flags = (uint32_t) hg_atomic_get32(&hg_core_handle->flags);
47654765
hg_return_t ret;
47664766

4767-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4767+
#ifdef HG_HAS_DIAG
47684768
/* Increment counter */
47694769
hg_atomic_incr64(hg_core_class->counters.rpc_req_recv_count);
47704770
#endif
@@ -4813,7 +4813,7 @@ hg_core_process_input(struct hg_core_private_handle *hg_core_handle)
48134813
"Handle (%p) expected_count incr to %" PRId32,
48144814
(void *) hg_core_handle, expected_count);
48154815

4816-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4816+
#ifdef HG_HAS_DIAG
48174817
/* Increment counter */
48184818
hg_atomic_incr64(hg_core_class->counters.rpc_req_extra_count);
48194819
#endif
@@ -4937,7 +4937,7 @@ hg_core_process_output(struct hg_core_private_handle *hg_core_handle)
49374937
uint32_t flags = (uint32_t) hg_atomic_get32(&hg_core_handle->flags);
49384938
hg_return_t ret;
49394939

4940-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4940+
#ifdef HG_HAS_DIAG
49414941
/* Increment counter */
49424942
hg_atomic_incr64(hg_core_class->counters.rpc_resp_recv_count);
49434943
#endif
@@ -4981,7 +4981,7 @@ hg_core_process_output(struct hg_core_private_handle *hg_core_handle)
49814981
"Handle (%p) expected_count incr to %" PRId32,
49824982
(void *) hg_core_handle, expected_count);
49834983

4984-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
4984+
#ifdef HG_HAS_DIAG
49854985
/* Increment counter */
49864986
hg_atomic_incr64(hg_core_class->counters.rpc_resp_extra_count);
49874987
#endif
@@ -5320,7 +5320,7 @@ hg_core_completion_add(struct hg_core_context *core_context,
53205320
struct hg_core_completion_queue *backfill_queue = &context->backfill_queue;
53215321
int rc;
53225322

5323-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
5323+
#ifdef HG_HAS_DIAG
53245324
/* Increment counter */
53255325
if (hg_completion_entry->op_type == HG_BULK)
53265326
hg_atomic_incr64(HG_CORE_CONTEXT_CLASS(context)->counters.bulk_count);
@@ -6213,7 +6213,7 @@ hg_return_t
62136213
HG_Core_class_get_counters(const hg_core_class_t *hg_core_class,
62146214
struct hg_diag_counters *diag_counters)
62156215
{
6216-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
6216+
#ifdef HG_HAS_DIAG
62176217
const struct hg_core_private_class *private_class =
62186218
(const struct hg_core_private_class *) hg_core_class;
62196219
#endif
@@ -6223,7 +6223,7 @@ HG_Core_class_get_counters(const hg_core_class_t *hg_core_class,
62236223
HG_INVALID_ARG, "NULL HG core class");
62246224
HG_CHECK_SUBSYS_ERROR(cls, diag_counters == NULL, error, ret,
62256225
HG_INVALID_ARG, "NULL pointer to diag_counters");
6226-
#if defined(HG_HAS_DIAG) && !defined(_WIN32)
6226+
#ifdef HG_HAS_DIAG
62276227
hg_core_class_get_counters(&private_class->counters, diag_counters);
62286228
#else
62296229
HG_LOG_SUBSYS_ERROR(cls, "Counters not supported in current build, please "

src/na/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ set(NA_BUILD_INCLUDE_DEPENDENCIES
5959
#------------------------------------------------------------------------------
6060
# Internal dependencies
6161
#------------------------------------------------------------------------------
62+
# Diagnostics counters
63+
if(HG_HAS_DIAG)
64+
set(NA_HAS_DIAG 1)
65+
endif()
66+
6267
# Multi progress
6368
if(NOT HG_ALLOW_MULTI_PROGRESS)
6469
option(NA_ALLOW_MULTI_PROGRESS "Allow concurrent progress on single context." ON)

src/na/na.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,15 @@ NA_Set_log_level(const char *level)
10101010
hg_log_set_subsys_level(NA_SUBSYS_NAME_STRING, hg_log_name_to_level(level));
10111011
}
10121012

1013+
/*---------------------------------------------------------------------------*/
1014+
void
1015+
NA_Diag_dump_counters(void)
1016+
{
1017+
#ifndef _WIN32
1018+
hg_log_dump_counters(&HG_LOG_OUTLET(NA_SUBSYS_NAME));
1019+
#endif
1020+
}
1021+
10131022
/*---------------------------------------------------------------------------*/
10141023
na_context_t *
10151024
NA_Context_create(na_class_t *na_class)

src/na/na.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,12 @@ NA_Has_opt_feature(
153153
NA_PUBLIC void
154154
NA_Set_log_level(const char *level);
155155

156+
/**
157+
* Dump diagnostic counters into the existing log stream.
158+
*/
159+
NA_PUBLIC void
160+
NA_Diag_dump_counters(void);
161+
156162
/**
157163
* Return the name of the NA class.
158164
*

src/na/na_config.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585

8686
/* Build Options */
8787
#cmakedefine NA_HAS_DEBUG
88+
#cmakedefine NA_HAS_DIAG
8889
#cmakedefine NA_HAS_MULTI_PROGRESS
8990

9091
/* HWLOC */

0 commit comments

Comments
 (0)