diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-14 16:31:23 -0700 |
| commit | 1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch) | |
| tree | a391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/builtin-c2c.c | |
| parent | 63bd30f249dcf0a7ce16967935cecee8feec24bb (diff) | |
| parent | 0f66dfe7b91d2743cc71dfff37af503215b204ef (diff) | |
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim:
"perf stat:
- Support new 'cluster' aggregation mode for shared resources
depending on the hardware configuration:
$ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1
Performance counter stats for 'system wide':
S0-D0-CLS0 2 85,051,822 cycles
S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle
S0-D0-CLS2 2 93,365,918 cycles
S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle
S0-D0-CLS4 2 104,157,523 cycles
S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle
S0-D0-CLS6 2 65,891,079 cycles
S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle
1.002407989 seconds time elapsed
- Various fixes and cleanups for event metrics including NaN handling
perf script:
- Use libcapstone if available to disassemble the instructions. This
enables 'perf script -F disasm' and 'perf script --insn-trace=disasm'
(for Intel-PT):
$ perf script -F event,ip,disasm
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa9839d25 movq %rax, %r14
cycles:P: ffffffffa9cdcaf0 endbr64
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8
cycles:P: ffffffffa988d428 wrmsr
cycles:P: ffffffffaa401f86 iretq
cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax
cycles:P: ffffffffa988d428 wrmsr
- Expose sample ID / stream ID to python scripts
perf test:
- Add more perf test cases from Redhat internal test suites. This
time it adds the base infra and a few perf probe tests. More to
come. :)
- Add 'perf test -p' for parallel execution and fix some issues found
by the parallel test
- Support symbol test to print symbols in given (active) module:
$ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko
--- start ---
Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko
Overlapping symbols:
7a990-7a9a0 l __pfx_ext4_exit_fs
7a990-7a9a0 g __pfx_cleanup_module
Overlapping symbols:
7a9a0-7aa1c l ext4_exit_fs
7a9a0-7aa1c g cleanup_module
...
JSON metric updates:
- A new round of Intel metric updates
- Support Power11 PVR (compatible to Power10)
- Fix cache latency events on Zen 4 to set SliceId properly
Internal:
- Fix reference counting for 'map' data structure, tireless work from
Ian!
- More memory optimization for struct thread and annotate histogram.
Now, 'perf report' (TUI) and 'perf annotate' should be much
lighter-weight in terms of memory footprint
- Support cross-arch perf register access. Clean up the build
configuration so that it can detect arch-register support at
runtime. This can allow to parse register data in sample which was
recorded in a different arch
Others:
- Sync task state in 'perf sched' to kernel using trace event fields.
The task states have been changed so tools cannot assume a fixed
encoding
- Clean up 'perf mem' to generalize the arch-specific events
- Add support for local and global variables to data type profiling.
This would increase the success rate of type resolution with DWARF
- Add short option -H for --hierarchy in 'perf report' and 'perf top'"
* tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits)
perf annotate: Add comments in the data structures
perf annotate: Remove sym_hist.addr[] array
perf annotate: Calculate instruction overhead using hashmap
perf annotate: Add a hashmap for symbol histogram
perf threads: Reduce table size from 256 to 8
perf threads: Switch from rbtree to hashmap
perf threads: Move threads to its own files
perf machine: Move machine's threads into its own abstraction
perf machine: Move fprintf to for_each loop and a callback
perf trace: Ignore thread hashing in summary
perf report: Sort child tasks by tid
perf vendor events amd: Fix Zen 4 cache latency events
perf version: Display availability of OpenCSD support
perf vendor events intel: Add umasks/occ_sel to PCU events.
perf map: Fix map reference count issues
libperf evlist: Avoid out-of-bounds access
perf lock contention: Account contending locks too
perf metrics: Fix segv for metrics with no events
perf metrics: Fix metric matching
perf pmu: Fix a potential memory leak in perf_pmu__lookup()
...
Diffstat (limited to 'tools/perf/builtin-c2c.c')
| -rw-r--r-- | tools/perf/builtin-c2c.c | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f78eea9e2153..16b40f5d43db 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -3215,12 +3215,19 @@ static int parse_record_events(const struct option *opt, const char *str, int unset __maybe_unused) { bool *event_set = (bool *) opt->value; + struct perf_pmu *pmu; + + pmu = perf_mem_events_find_pmu(); + if (!pmu) { + pr_err("failed: there is no PMU that supports perf c2c\n"); + exit(-1); + } if (!strcmp(str, "list")) { - perf_mem_events__list(); + perf_pmu__mem_events_list(pmu); exit(0); } - if (perf_mem_events__parse(str)) + if (perf_pmu__mem_events_parse(pmu, str)) exit(-1); *event_set = true; @@ -3238,13 +3245,13 @@ static const char * const *record_mem_usage = __usage_record; static int perf_c2c__record(int argc, const char **argv) { - int rec_argc, i = 0, j, rec_tmp_nr = 0; + int rec_argc, i = 0, j; const char **rec_argv; - char **rec_tmp; int ret; bool all_user = false, all_kernel = false; bool event_set = false; struct perf_mem_event *e; + struct perf_pmu *pmu; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", "event selector. Use 'perf c2c record -e list' to list available events", @@ -3256,7 +3263,13 @@ static int perf_c2c__record(int argc, const char **argv) OPT_END() }; - if (perf_mem_events__init()) { + pmu = perf_mem_events_find_pmu(); + if (!pmu) { + pr_err("failed: no PMU supports the memory events\n"); + return -1; + } + + if (perf_pmu__mem_events_init(pmu)) { pr_err("failed: memory events not supported\n"); return -1; } @@ -3265,22 +3278,16 @@ static int perf_c2c__record(int argc, const char **argv) PARSE_OPT_KEEP_UNKNOWN); /* Max number of arguments multiplied by number of PMUs that can support them. */ - rec_argc = argc + 11 * perf_pmus__num_mem_pmus(); + rec_argc = argc + 11 * (perf_pmu__mem_events_num_mem_pmus(pmu) + 1); rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; - rec_tmp = calloc(rec_argc + 1, sizeof(char *)); - if (!rec_tmp) { - free(rec_argv); - return -1; - } - rec_argv[i++] = "record"; if (!event_set) { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); + e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD_STORE); /* * The load and store operations are required, use the event * PERF_MEM_EVENTS__LOAD_STORE if it is supported. @@ -3289,15 +3296,15 @@ static int perf_c2c__record(int argc, const char **argv) e->record = true; rec_argv[i++] = "-W"; } else { - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD); e->record = true; - e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE); + e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE); e->record = true; } } - e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); + e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD); if (e->record) rec_argv[i++] = "-W"; @@ -3305,7 +3312,7 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "--phys-data"; rec_argv[i++] = "--sample-cpu"; - ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &rec_tmp_nr); + ret = perf_mem_events__record_args(rec_argv, &i); if (ret) goto out; @@ -3332,10 +3339,6 @@ static int perf_c2c__record(int argc, const char **argv) ret = cmd_record(i, rec_argv); out: - for (i = 0; i < rec_tmp_nr; i++) - free(rec_tmp[i]); - - free(rec_tmp); free(rec_argv); return ret; } |