Skip to content

Commit ffd3d18

Browse files
kim-phillips-armacmel
authored andcommitted
perf tools: Add ARM Statistical Profiling Extensions (SPE) support
'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips <[email protected]> Reviewed-by: Dongjiu Geng <[email protected]> Acked-by: Adrian Hunter <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: [email protected] Cc: Marc Zyngier <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Pawel Moll <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Rob Herring <[email protected]> Cc: Suzuki Poulouse <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Wang Nan <[email protected]> Cc: Will Deacon <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent d777f8d commit ffd3d18

11 files changed

Lines changed: 1077 additions & 7 deletions

File tree

tools/perf/arch/arm/util/auxtrace.c

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,94 @@
2222
#include "../../util/evlist.h"
2323
#include "../../util/pmu.h"
2424
#include "cs-etm.h"
25+
#include "arm-spe.h"
26+
27+
static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
28+
{
29+
struct perf_pmu **arm_spe_pmus = NULL;
30+
int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
31+
/* arm_spe_xxxxxxxxx\0 */
32+
char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
33+
34+
arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
35+
if (!arm_spe_pmus) {
36+
pr_err("spes alloc failed\n");
37+
*err = -ENOMEM;
38+
return NULL;
39+
}
40+
41+
for (i = 0; i < nr_cpus; i++) {
42+
ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
43+
if (ret < 0) {
44+
pr_err("sprintf failed\n");
45+
*err = -ENOMEM;
46+
return NULL;
47+
}
48+
49+
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
50+
if (arm_spe_pmus[*nr_spes]) {
51+
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
52+
__func__, __LINE__, *nr_spes,
53+
arm_spe_pmus[*nr_spes]->type,
54+
arm_spe_pmus[*nr_spes]->name);
55+
(*nr_spes)++;
56+
}
57+
}
58+
59+
return arm_spe_pmus;
60+
}
2561

2662
struct auxtrace_record
2763
*auxtrace_record__init(struct perf_evlist *evlist, int *err)
2864
{
2965
struct perf_pmu *cs_etm_pmu;
3066
struct perf_evsel *evsel;
3167
bool found_etm = false;
68+
bool found_spe = false;
69+
static struct perf_pmu **arm_spe_pmus = NULL;
70+
static int nr_spes = 0;
71+
int i;
72+
73+
if (!evlist)
74+
return NULL;
3275

3376
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
3477

35-
if (evlist) {
36-
evlist__for_each_entry(evlist, evsel) {
37-
if (cs_etm_pmu &&
38-
evsel->attr.type == cs_etm_pmu->type)
39-
found_etm = true;
78+
if (!arm_spe_pmus)
79+
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
80+
81+
evlist__for_each_entry(evlist, evsel) {
82+
if (cs_etm_pmu &&
83+
evsel->attr.type == cs_etm_pmu->type)
84+
found_etm = true;
85+
86+
if (!nr_spes)
87+
continue;
88+
89+
for (i = 0; i < nr_spes; i++) {
90+
if (evsel->attr.type == arm_spe_pmus[i]->type) {
91+
found_spe = true;
92+
break;
93+
}
4094
}
4195
}
4296

97+
if (found_etm && found_spe) {
98+
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
99+
*err = -EOPNOTSUPP;
100+
return NULL;
101+
}
102+
43103
if (found_etm)
44104
return cs_etm_record_init(err);
45105

106+
#if defined(__aarch64__)
107+
if (found_spe)
108+
return arm_spe_recording_init(err, arm_spe_pmus[i]);
109+
#endif
110+
46111
/*
47-
* Clear 'err' even if we haven't found a cs_etm event - that way perf
112+
* Clear 'err' even if we haven't found an event - that way perf
48113
* record can still be used even if tracers aren't present. The NULL
49114
* return value will take care of telling the infrastructure HW tracing
50115
* isn't available.

tools/perf/arch/arm/util/pmu.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <linux/perf_event.h>
2121

2222
#include "cs-etm.h"
23+
#include "arm-spe.h"
2324
#include "../../util/pmu.h"
2425

2526
struct perf_event_attr
@@ -30,7 +31,12 @@ struct perf_event_attr
3031
/* add ETM default config here */
3132
pmu->selectable = true;
3233
pmu->set_drv_config = cs_etm_set_drv_config;
34+
#if defined(__aarch64__)
35+
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
36+
return arm_spe_pmu_default_config(pmu);
37+
#endif
3338
}
39+
3440
#endif
3541
return NULL;
3642
}

tools/perf/arch/arm64/util/Build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
55

66
libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
77
../../arm/util/auxtrace.o \
8-
../../arm/util/cs-etm.o
8+
../../arm/util/cs-etm.o \
9+
arm-spe.o
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Arm Statistical Profiling Extensions (SPE) support
4+
* Copyright (c) 2017-2018, Arm Ltd.
5+
*/
6+
7+
#include <linux/kernel.h>
8+
#include <linux/types.h>
9+
#include <linux/bitops.h>
10+
#include <linux/log2.h>
11+
#include <time.h>
12+
13+
#include "../../util/cpumap.h"
14+
#include "../../util/evsel.h"
15+
#include "../../util/evlist.h"
16+
#include "../../util/session.h"
17+
#include "../../util/util.h"
18+
#include "../../util/pmu.h"
19+
#include "../../util/debug.h"
20+
#include "../../util/auxtrace.h"
21+
#include "../../util/arm-spe.h"
22+
23+
#define KiB(x) ((x) * 1024)
24+
#define MiB(x) ((x) * 1024 * 1024)
25+
26+
struct arm_spe_recording {
27+
struct auxtrace_record itr;
28+
struct perf_pmu *arm_spe_pmu;
29+
struct perf_evlist *evlist;
30+
};
31+
32+
static size_t
33+
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
34+
struct perf_evlist *evlist __maybe_unused)
35+
{
36+
return ARM_SPE_AUXTRACE_PRIV_SIZE;
37+
}
38+
39+
static int arm_spe_info_fill(struct auxtrace_record *itr,
40+
struct perf_session *session,
41+
struct auxtrace_info_event *auxtrace_info,
42+
size_t priv_size)
43+
{
44+
struct arm_spe_recording *sper =
45+
container_of(itr, struct arm_spe_recording, itr);
46+
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
47+
48+
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
49+
return -EINVAL;
50+
51+
if (!session->evlist->nr_mmaps)
52+
return -EINVAL;
53+
54+
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
55+
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
56+
57+
return 0;
58+
}
59+
60+
static int arm_spe_recording_options(struct auxtrace_record *itr,
61+
struct perf_evlist *evlist,
62+
struct record_opts *opts)
63+
{
64+
struct arm_spe_recording *sper =
65+
container_of(itr, struct arm_spe_recording, itr);
66+
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
67+
struct perf_evsel *evsel, *arm_spe_evsel = NULL;
68+
bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
69+
struct perf_evsel *tracking_evsel;
70+
int err;
71+
72+
sper->evlist = evlist;
73+
74+
evlist__for_each_entry(evlist, evsel) {
75+
if (evsel->attr.type == arm_spe_pmu->type) {
76+
if (arm_spe_evsel) {
77+
pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
78+
return -EINVAL;
79+
}
80+
evsel->attr.freq = 0;
81+
evsel->attr.sample_period = 1;
82+
arm_spe_evsel = evsel;
83+
opts->full_auxtrace = true;
84+
}
85+
}
86+
87+
if (!opts->full_auxtrace)
88+
return 0;
89+
90+
/* We are in full trace mode but '-m,xyz' wasn't specified */
91+
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
92+
if (privileged) {
93+
opts->auxtrace_mmap_pages = MiB(4) / page_size;
94+
} else {
95+
opts->auxtrace_mmap_pages = KiB(128) / page_size;
96+
if (opts->mmap_pages == UINT_MAX)
97+
opts->mmap_pages = KiB(256) / page_size;
98+
}
99+
}
100+
101+
/* Validate auxtrace_mmap_pages */
102+
if (opts->auxtrace_mmap_pages) {
103+
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
104+
size_t min_sz = KiB(8);
105+
106+
if (sz < min_sz || !is_power_of_2(sz)) {
107+
pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
108+
min_sz / 1024);
109+
return -EINVAL;
110+
}
111+
}
112+
113+
114+
/*
115+
* To obtain the auxtrace buffer file descriptor, the auxtrace event
116+
* must come first.
117+
*/
118+
perf_evlist__to_front(evlist, arm_spe_evsel);
119+
120+
perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
121+
perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
122+
perf_evsel__set_sample_bit(arm_spe_evsel, TID);
123+
124+
/* Add dummy event to keep tracking */
125+
err = parse_events(evlist, "dummy:u", NULL);
126+
if (err)
127+
return err;
128+
129+
tracking_evsel = perf_evlist__last(evlist);
130+
perf_evlist__set_tracking_event(evlist, tracking_evsel);
131+
132+
tracking_evsel->attr.freq = 0;
133+
tracking_evsel->attr.sample_period = 1;
134+
perf_evsel__set_sample_bit(tracking_evsel, TIME);
135+
perf_evsel__set_sample_bit(tracking_evsel, CPU);
136+
perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
137+
138+
return 0;
139+
}
140+
141+
static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
142+
{
143+
struct timespec ts;
144+
145+
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
146+
147+
return ts.tv_sec ^ ts.tv_nsec;
148+
}
149+
150+
static void arm_spe_recording_free(struct auxtrace_record *itr)
151+
{
152+
struct arm_spe_recording *sper =
153+
container_of(itr, struct arm_spe_recording, itr);
154+
155+
free(sper);
156+
}
157+
158+
static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
159+
{
160+
struct arm_spe_recording *sper =
161+
container_of(itr, struct arm_spe_recording, itr);
162+
struct perf_evsel *evsel;
163+
164+
evlist__for_each_entry(sper->evlist, evsel) {
165+
if (evsel->attr.type == sper->arm_spe_pmu->type)
166+
return perf_evlist__enable_event_idx(sper->evlist,
167+
evsel, idx);
168+
}
169+
return -EINVAL;
170+
}
171+
172+
struct auxtrace_record *arm_spe_recording_init(int *err,
173+
struct perf_pmu *arm_spe_pmu)
174+
{
175+
struct arm_spe_recording *sper;
176+
177+
if (!arm_spe_pmu) {
178+
*err = -ENODEV;
179+
return NULL;
180+
}
181+
182+
sper = zalloc(sizeof(struct arm_spe_recording));
183+
if (!sper) {
184+
*err = -ENOMEM;
185+
return NULL;
186+
}
187+
188+
sper->arm_spe_pmu = arm_spe_pmu;
189+
sper->itr.recording_options = arm_spe_recording_options;
190+
sper->itr.info_priv_size = arm_spe_info_priv_size;
191+
sper->itr.info_fill = arm_spe_info_fill;
192+
sper->itr.free = arm_spe_recording_free;
193+
sper->itr.reference = arm_spe_reference;
194+
sper->itr.read_finish = arm_spe_read_finish;
195+
sper->itr.alignment = 0;
196+
197+
return &sper->itr;
198+
}
199+
200+
struct perf_event_attr
201+
*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
202+
{
203+
struct perf_event_attr *attr;
204+
205+
attr = zalloc(sizeof(struct perf_event_attr));
206+
if (!attr) {
207+
pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
208+
return NULL;
209+
}
210+
211+
/*
212+
* If kernel driver doesn't advertise a minimum,
213+
* use max allowable by PMSIDR_EL1.INTERVAL
214+
*/
215+
if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
216+
&attr->sample_period) != 1) {
217+
pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
218+
attr->sample_period = 4096;
219+
}
220+
221+
arm_spe_pmu->selectable = true;
222+
arm_spe_pmu->is_uncore = false;
223+
224+
return attr;
225+
}

tools/perf/util/Build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o
8686
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
8787
libperf-$(CONFIG_AUXTRACE) += intel-pt.o
8888
libperf-$(CONFIG_AUXTRACE) += intel-bts.o
89+
libperf-$(CONFIG_AUXTRACE) += arm-spe.o
90+
libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
8991
libperf-y += parse-branch-options.o
9092
libperf-y += dump-insn.o
9193
libperf-y += parse-regs-options.o

0 commit comments

Comments
 (0)