summaryrefslogtreecommitdiff
path: root/tools/perf/builtin-c2c.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/builtin-c2c.c
parent63bd30f249dcf0a7ce16967935cecee8feec24bb (diff)
parent0f66dfe7b91d2743cc71dfff37af503215b204ef (diff)
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/builtin-c2c.c')
-rw-r--r--tools/perf/builtin-c2c.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index f78eea9e2153..16b40f5d43db 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -3215,12 +3215,19 @@ static int parse_record_events(const struct option *opt,
const char *str, int unset __maybe_unused)
{
bool *event_set = (bool *) opt->value;
+ struct perf_pmu *pmu;
+
+ pmu = perf_mem_events_find_pmu();
+ if (!pmu) {
+ pr_err("failed: there is no PMU that supports perf c2c\n");
+ exit(-1);
+ }
if (!strcmp(str, "list")) {
- perf_mem_events__list();
+ perf_pmu__mem_events_list(pmu);
exit(0);
}
- if (perf_mem_events__parse(str))
+ if (perf_pmu__mem_events_parse(pmu, str))
exit(-1);
*event_set = true;
@@ -3238,13 +3245,13 @@ static const char * const *record_mem_usage = __usage_record;
static int perf_c2c__record(int argc, const char **argv)
{
- int rec_argc, i = 0, j, rec_tmp_nr = 0;
+ int rec_argc, i = 0, j;
const char **rec_argv;
- char **rec_tmp;
int ret;
bool all_user = false, all_kernel = false;
bool event_set = false;
struct perf_mem_event *e;
+ struct perf_pmu *pmu;
struct option options[] = {
OPT_CALLBACK('e', "event", &event_set, "event",
"event selector. Use 'perf c2c record -e list' to list available events",
@@ -3256,7 +3263,13 @@ static int perf_c2c__record(int argc, const char **argv)
OPT_END()
};
- if (perf_mem_events__init()) {
+ pmu = perf_mem_events_find_pmu();
+ if (!pmu) {
+ pr_err("failed: no PMU supports the memory events\n");
+ return -1;
+ }
+
+ if (perf_pmu__mem_events_init(pmu)) {
pr_err("failed: memory events not supported\n");
return -1;
}
@@ -3265,22 +3278,16 @@ static int perf_c2c__record(int argc, const char **argv)
PARSE_OPT_KEEP_UNKNOWN);
/* Max number of arguments multiplied by number of PMUs that can support them. */
- rec_argc = argc + 11 * perf_pmus__num_mem_pmus();
+ rec_argc = argc + 11 * (perf_pmu__mem_events_num_mem_pmus(pmu) + 1);
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
return -1;
- rec_tmp = calloc(rec_argc + 1, sizeof(char *));
- if (!rec_tmp) {
- free(rec_argv);
- return -1;
- }
-
rec_argv[i++] = "record";
if (!event_set) {
- e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
+ e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD_STORE);
/*
* The load and store operations are required, use the event
* PERF_MEM_EVENTS__LOAD_STORE if it is supported.
@@ -3289,15 +3296,15 @@ static int perf_c2c__record(int argc, const char **argv)
e->record = true;
rec_argv[i++] = "-W";
} else {
- e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+ e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
e->record = true;
- e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+ e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE);
e->record = true;
}
}
- e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+ e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
if (e->record)
rec_argv[i++] = "-W";
@@ -3305,7 +3312,7 @@ static int perf_c2c__record(int argc, const char **argv)
rec_argv[i++] = "--phys-data";
rec_argv[i++] = "--sample-cpu";
- ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &rec_tmp_nr);
+ ret = perf_mem_events__record_args(rec_argv, &i);
if (ret)
goto out;
@@ -3332,10 +3339,6 @@ static int perf_c2c__record(int argc, const char **argv)
ret = cmd_record(i, rec_argv);
out:
- for (i = 0; i < rec_tmp_nr; i++)
- free(rec_tmp[i]);
-
- free(rec_tmp);
free(rec_argv);
return ret;
}