Skip to content

Commit 34b3477

Browse files
committed
in_podman_metrics: fix multiple cgroup v2 issues
Fix five bugs in the podman_metrics input plugin: 1. CPU counter division: cgroup v2 cpu.stat reports usage in microseconds, not nanoseconds like cgroup v1 cpuacct. Use the correct divisor (1e6) when converting to seconds. 2. RSS memory key: cgroup v2 memory.stat does not have a "rss" field. The equivalent metric is "anon" (anonymous memory). Add V2_STAT_KEY_RSS and use it in the v2 collection path. 3. memory.max "max" keyword: cgroup v2 uses the literal string "max" in memory.max when the memory limit is unlimited. read_from_file() fails to parse this with fscanf("%lu"), causing spurious warnings. Add read_from_sysfs_or_max() helper that returns 0 for "max" (unlimited). 4. PID alt path typo: V2_SYSFS_FILE_PIDS_ALT was set to "containers/cgroup.procs" (plural) but the actual cgroup v2 subdirectory is "container/cgroup.procs" (singular). This caused PID lookup to fail for all containers, which in turn prevented all network metrics from being collected. 5. Image name NULL safety: when parsing container metadata JSON, strstr() for the closing quote of the image name field can return NULL if the metadata is malformed or truncated. The result was used directly in pointer arithmetic and strncpy(), causing undefined behaviour and potential crashes. Add a NULL guard that falls back to image="unknown" when parsing fails. Fixes: #7769 Signed-off-by: stondo <stondo@gmail.com>
1 parent 63ed88e commit 34b3477

3 files changed

Lines changed: 77 additions & 11 deletions

File tree

plugins/in_podman_metrics/podman_metrics.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,15 @@ static int collect_container_data(struct flb_in_metrics *ctx)
113113
metadata_token_start = strstr(metadata, JSON_SUBFIELD_IMAGE_NAME);
114114
if (metadata_token_start) {
115115
metadata_token_stop = strstr(metadata_token_start + JSON_SUBFIELD_SIZE_IMAGE_NAME+1, "\\\"");
116-
metadata_token_size = metadata_token_stop - metadata_token_start - JSON_SUBFIELD_SIZE_IMAGE_NAME;
117-
118-
strncpy(image_name, metadata_token_start+JSON_SUBFIELD_SIZE_IMAGE_NAME, metadata_token_size);
119-
image_name[metadata_token_size] = '\0';
116+
if (metadata_token_stop) {
117+
metadata_token_size = metadata_token_stop - metadata_token_start - JSON_SUBFIELD_SIZE_IMAGE_NAME;
118+
strncpy(image_name, metadata_token_start+JSON_SUBFIELD_SIZE_IMAGE_NAME, metadata_token_size);
119+
image_name[metadata_token_size] = '\0';
120+
}
121+
else {
122+
strncpy(image_name, "unknown", IMAGE_NAME_SIZE - 1);
123+
image_name[7] = '\0';
124+
}
120125

121126
flb_plg_trace(ctx->ins, "Found image name %s", image_name);
122127
add_container_to_list(ctx, id, name, image_name);
@@ -225,10 +230,19 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count
225230
return -1;
226231
}
227232

228-
if (strcmp(metric_name, COUNTER_CPU) == 0 || strcmp(metric_name, COUNTER_CPU_USER) == 0) {
229-
fvalue = fvalue / 1000000000;
230-
flb_plg_trace(ctx->ins, "Converting %s from nanoseconds to seconds (%lu -> %lu)", metric_name, value, fvalue);
231-
233+
if (strcmp(metric_name, COUNTER_CPU) == 0 ||
234+
strcmp(metric_name, COUNTER_CPU_USER) == 0) {
235+
if (ctx->cgroup_version == CGROUP_V2) {
236+
/* cgroup v2 cpu.stat reports in microseconds */
237+
fvalue = fvalue / 1000000;
238+
}
239+
else {
240+
/* cgroup v1 cpuacct reports in nanoseconds */
241+
fvalue = fvalue / 1000000000;
242+
}
243+
flb_plg_trace(ctx->ins,
244+
"Converting %s to seconds (%lu -> %lu)",
245+
metric_name, value, fvalue);
232246
}
233247

234248
labels = (char *[]){id, name, image_name, interface};

plugins/in_podman_metrics/podman_metrics_config.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383

8484
/* Key names in .stat files */
8585
#define STAT_KEY_RSS "rss"
86+
#define V2_STAT_KEY_RSS "anon"
8687
#define STAT_KEY_CPU "usage_usec"
8788
#define STAT_KEY_CPU_USER "user_usec"
8889

@@ -106,7 +107,7 @@
106107
#define V2_SYSFS_FILE_MEMORY_LIMIT "memory.max"
107108
#define V2_SYSFS_FILE_CPU_STAT "cpu.stat"
108109
#define V2_SYSFS_FILE_PIDS "cgroup.procs"
109-
#define V2_SYSFS_FILE_PIDS_ALT "containers/cgroup.procs"
110+
#define V2_SYSFS_FILE_PIDS_ALT "container/cgroup.procs"
110111

111112
/* Values used to construct counters/gauges names and descriptions */
112113
#define COUNTER_PREFIX "container"

plugins/in_podman_metrics/podman_metrics_data.c

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,54 @@ int fill_counters_with_sysfs_data_v1(struct flb_in_metrics *ctx)
342342
return 0;
343343
}
344344

345+
/*
346+
* Read uint64_t value from sysfs file, with special handling for the "max"
347+
* keyword used by cgroup v2 to indicate an unlimited resource.
348+
* Returns 0 when file contains "max".
349+
*/
350+
static uint64_t read_from_sysfs_or_max(struct flb_in_metrics *ctx,
351+
flb_sds_t dir,
352+
flb_sds_t name)
353+
{
354+
char path[SYSFS_FILE_PATH_SIZE];
355+
char buf[32];
356+
uint64_t value = UINT64_MAX;
357+
FILE *fp;
358+
int c;
359+
360+
if (dir == NULL) {
361+
return value;
362+
}
363+
364+
snprintf(path, sizeof(path), "%s/%s", dir, name);
365+
366+
fp = fopen(path, "r");
367+
if (!fp) {
368+
flb_plg_warn(ctx->ins, "Failed to read %s", path);
369+
return value;
370+
}
371+
372+
if (fgets(buf, sizeof(buf), fp) != NULL) {
373+
/* cgroup v2 uses "max" to indicate unlimited */
374+
if (strncmp(buf, "max", 3) == 0) {
375+
flb_plg_debug(ctx->ins, "%s: max (unlimited)", path);
376+
fclose(fp);
377+
return 0;
378+
}
379+
c = sscanf(buf, "%lu", &value);
380+
if (c != 1) {
381+
flb_plg_warn(ctx->ins,
382+
"Failed to read a number from %s", path);
383+
fclose(fp);
384+
return UINT64_MAX;
385+
}
386+
}
387+
388+
fclose(fp);
389+
flb_plg_debug(ctx->ins, "%s: %lu", path, value);
390+
return value;
391+
}
392+
345393
/*
346394
* Iterate over previously created container list. For each entry, generate its
347395
* path in sysfs system directory. From this path, grab data about container metrics
@@ -363,8 +411,11 @@ int fill_counters_with_sysfs_data_v2(struct flb_in_metrics *ctx)
363411

364412
cnt->memory_usage = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY, NULL);
365413
cnt->memory_max_usage = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MAX_MEMORY, NULL);
366-
cnt->rss = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_STAT, STAT_KEY_RSS);
367-
cnt->memory_limit = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_LIMIT, NULL);
414+
cnt->rss = get_data_from_sysfs(ctx, path,
415+
V2_SYSFS_FILE_MEMORY_STAT,
416+
V2_STAT_KEY_RSS);
417+
cnt->memory_limit = read_from_sysfs_or_max(ctx, path,
418+
V2_SYSFS_FILE_MEMORY_LIMIT);
368419
cnt->cpu_user = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU_USER);
369420
cnt->cpu = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU);
370421
pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS, NULL);

0 commit comments

Comments
 (0)