summaryrefslogtreecommitdiff
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 14:39:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-11 14:39:17 -0800
commit5cb52b5e1654f3f1ed9c32e34456d98559c85aa0 (patch)
tree737c73d6aef99a17f57c2974f1e2a142a5f1a377 /tools/perf/util/machine.c
parent24af98c4cf5f5e69266e270c7f3fb34b82ff6656 (diff)
parent3eb9ede23bdd96e9ba60e2b4d4d17a7c35d58448 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Intel Knights Landing support. (Harish Chegondi) - Intel Broadwell-EP uncore PMU support. (Kan Liang) - Core code improvements. (Peter Zijlstra.) - Event filter, LBR and PEBS fixes. (Stephane Eranian) - Enable cycles:pp on Intel Atom. (Stephane Eranian) - Add cycles:ppp support for Skylake. (Andi Kleen) - Various x86 NMI overhead optimizations. (Andi Kleen) - Intel PT enhancements. (Takao Indoh) - AMD cache events fix. (Vince Weaver) Tons of tooling changes: - Show random perf tool tips in the 'perf report' bottom line (Namhyung Kim) - perf report now defaults to --group if the perf.data file has grouped events, try it with: # perf record -e '{cycles,instructions}' -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.093 MB perf.data (1247 samples) ] # perf report # Samples: 1K of event 'anon group { cycles, instructions }' # Event count (approx.): 1955219195 # # Overhead Command Shared Object Symbol 2.86% 0.22% swapper [kernel.kallsyms] [k] intel_idle 1.05% 0.33% firefox libxul.so [.] js::SetObjectElement 1.05% 0.00% kworker/0:3 [kernel.kallsyms] [k] gen6_ring_get_seqno 0.88% 0.17% chrome chrome [.] 0x0000000000ee27ab 0.65% 0.86% firefox libxul.so [.] js::ValueToId<(js::AllowGC)1> 0.64% 0.23% JS Helper libxul.so [.] js::SplayTree<js::jit::LiveRange*, js::jit::LiveRange>::splay 0.62% 1.27% firefox libxul.so [.] js::GetIterator 0.61% 1.74% firefox libxul.so [.] js::NativeSetProperty 0.61% 0.31% firefox libxul.so [.] js::SetPropertyByDefining - Introduce the 'perf stat record/report' workflow: Generate perf.data files from 'perf stat', to tap into the scripting capabilities perf has instead of defining a 'perf stat' specific scripting support to calculate event ratios, etc. Simple example: $ perf stat record -e cycles usleep 1 Performance counter stats for 'usleep 1': 1,134,996 cycles 0.000670644 seconds time elapsed $ perf stat report Performance counter stats for '/home/acme/bin/perf stat record -e cycles usleep 1': 1,134,996 cycles 0.000670644 seconds time elapsed $ It generates PERF_RECORD_ userspace records to store the details: $ perf report -D | grep PERF_RECORD 0xf0 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27637 0x118 [0x12]: PERF_RECORD_CPU_MAP nr: 1 cpu: 65535 0x12a [0x40]: PERF_RECORD_STAT_CONFIG 0x16a [0x30]: PERF_RECORD_STAT -1 -1 0x19a [0x40]: PERF_RECORD_MMAP -1/0: [0xffffffff81000000(0x1f000000) @ 0xffffffff81000000]: x [kernel.kallsyms]_text 0x1da [0x18]: PERF_RECORD_STAT_ROUND [acme@ssdandy linux]$ An effort was made to make perf.data files generated like this to not generate cryptic messages when processed by older tools. The 'perf script' bits need rebasing, will go up later. - Make command line options always available, even when they depend on some feature being enabled, warning the user about use of such options (Wang Nan) - Support hw breakpoint events (mem:0xAddress) in the default output mode in 'perf script' (Wang Nan) - Fixes and improvements for supporting annotating ARM binaries, support ARM call and jump instructions, more work needed to have arch specific stuff separated into tools/perf/arch/*/annotate/ (Russell King) - Add initial 'perf config' command, for now just with a --list command to the contents of the configuration file in use and a basic man page describing its format, commands for doing edits and detailed documentation are being reviewed and proof-read. (Taeung Song) - Allows BPF scriptlets specify arguments to be fetched using DWARF info, using a prologue generated at compile/build time (He Kuang, Wang Nan) - Allow attaching BPF scriptlets to module symbols (Wang Nan) - Allow attaching BPF scriptlets to userspace code using uprobe (Wang Nan) - BPF programs now can specify 'perf probe' tunables via its section name, separating key=val values using semicolons (Wang Nan) Testing some of these new BPF features: Use case: get callchains when receiving SSL packets, filter then in the kernel, at arbitrary place. # cat ssl.bpf.c #define SEC(NAME) __attribute__((section(NAME), used)) struct pt_regs; SEC("func=__inet_lookup_established hnum") int func(struct pt_regs *ctx, int err, unsigned short port) { return err == 0 && port == 443; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; # # perf record -a -g -e ssl.bpf.c ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.787 MB perf.data (3 samples) ] # perf script | head -30 swapper 0 [000] 58783.268118: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux) 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux) 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux) 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) 8572a8 process_backlog (/lib/modules/4.3.0+/build/vmlinux) 856b11 net_rx_action (/lib/modules/4.3.0+/build/vmlinux) 2a284b __do_softirq (/lib/modules/4.3.0+/build/vmlinux) 2a2ba3 irq_exit (/lib/modules/4.3.0+/build/vmlinux) 96b7a4 do_IRQ (/lib/modules/4.3.0+/build/vmlinux) 969807 ret_from_intr (/lib/modules/4.3.0+/build/vmlinux) 2dede5 cpu_startup_entry (/lib/modules/4.3.0+/build/vmlinux) 95d5bc rest_init (/lib/modules/4.3.0+/build/vmlinux) 1163ffa start_kernel ([kernel.vmlinux].init.text) 11634d7 x86_64_start_reservations ([kernel.vmlinux].init.text) 1163623 x86_64_start_kernel ([kernel.vmlinux].init.text) qemu-system-x86 9178 [003] 58785.792417: perf_bpf_probe:func: (ffffffff816a0f60) hnum=0x1bb 8a0f61 __inet_lookup_established (/lib/modules/4.3.0+/build/vmlinux) 896def ip_rcv_finish (/lib/modules/4.3.0+/build/vmlinux) 8976c2 ip_rcv (/lib/modules/4.3.0+/build/vmlinux) 855eba __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) 856660 netif_receive_skb_internal (/lib/modules/4.3.0+/build/vmlinux) 8566ec netif_receive_skb_sk (/lib/modules/4.3.0+/build/vmlinux) 430a br_handle_frame_finish ([bridge]) 48bc br_handle_frame ([bridge]) 855f44 __netif_receive_skb_core (/lib/modules/4.3.0+/build/vmlinux) 8565d8 __netif_receive_skb (/lib/modules/4.3.0+/build/vmlinux) # - Use 'perf probe' various options to list functions, see what variables can be collected at any given point, experiment first collecting without a filter, then filter, use it together with 'perf trace', 'perf top', with or without callchains, if it explodes, please tell us! - Introduce a new callchain mode: "folded", that will list per line representations of all callchains for a give histogram entry, facilitating 'perf report' output processing by other tools, such as Brendan Gregg's flamegraph tools (Namhyung Kim) E.g: # perf report | grep -v ^# | head 18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry | ---cpu_startup_entry | |--12.07%--start_secondary | --6.30%--rest_init start_kernel x86_64_start_reservations x86_64_start_kernel # Becomes, in "folded" mode: # perf report -g folded | grep -v ^# | head -5 18.37% 0.00% swapper [kernel.kallsyms] [k] cpu_startup_entry 12.07% cpu_startup_entry;start_secondary 6.30% cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 16.90% 0.00% swapper [kernel.kallsyms] [k] call_cpuidle 11.23% call_cpuidle;cpu_startup_entry;start_secondary 5.67% call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 16.90% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter 11.23% cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary 5.67% cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel 15.12% 0.00% swapper [kernel.kallsyms] [k] cpuidle_enter_state # The user can also select one of "count", "period" or "percent" as the first column. ... and lots of infrastructure enhancements, plus fixes and other changes, features I failed to list - see the shortlog and the git log for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (271 commits) perf evlist: Add --trace-fields option to show trace fields perf record: Store data mmaps for dwarf unwind perf libdw: Check for mmaps also in MAP__VARIABLE tree perf unwind: Check for mmaps also in MAP__VARIABLE tree perf unwind: Use find_map function in access_dso_mem perf evlist: Remove perf_evlist__(enable|disable)_event functions perf evlist: Make perf_evlist__open() open evsels with their cpus and threads (like perf record does) perf report: Show random usage tip on the help line perf hists: Export a couple of hist functions perf diff: Use perf_hpp__register_sort_field interface perf tools: Add overhead/overhead_children keys defaults via string perf tools: Remove list entry from struct sort_entry perf tools: Include all tools/lib directory for tags/cscope/TAGS targets perf script: Align event name properly perf tools: Add missing headers in perf's MANIFEST perf tools: Do not show trace command if it's not compiled in perf report: Change default to use event group view perf top: Decay periods in callchains tools lib: Move bitmap.[ch] from tools/perf/ to tools/{lib,include}/ tools lib: Sync tools/lib/find_bit.c with the kernel ...
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c74
1 files changed, 62 insertions, 12 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8b303ff20289..ad79297c76c8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos)
int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
+ memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);
@@ -44,6 +45,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
+ memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
return -ENOMEM;
@@ -122,6 +125,7 @@ void machine__delete_threads(struct machine *machine)
void machine__exit(struct machine *machine)
{
+ machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
@@ -348,13 +352,18 @@ static void machine__update_thread_pid(struct machine *machine,
}
th->mg = map_groups__get(leader->mg);
-
+out_put:
+ thread__put(leader);
return;
-
out_err:
pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+ goto out_put;
}
+/*
+ * Caller must eventually drop thread->refcnt returned with a successfull
+ * lookup/new thread inserted.
+ */
static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
@@ -372,7 +381,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
machine->last_match = NULL;
@@ -385,7 +394,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (th->tid == tid) {
machine->last_match = th;
machine__update_thread_pid(machine, th, pid);
- return th;
+ return thread__get(th);
}
if (tid < th->tid)
@@ -413,7 +422,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (thread__init_map_groups(th, machine)) {
rb_erase_init(&th->rb_node, &machine->threads);
RB_CLEAR_NODE(&th->rb_node);
- thread__delete(th);
+ thread__put(th);
return NULL;
}
/*
@@ -437,7 +446,7 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
struct thread *th;
pthread_rwlock_wrlock(&machine->threads_lock);
- th = thread__get(__machine__findnew_thread(machine, pid, tid));
+ th = __machine__findnew_thread(machine, pid, tid);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -447,7 +456,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
{
struct thread *th;
pthread_rwlock_rdlock(&machine->threads_lock);
- th = thread__get(____machine__findnew_thread(machine, pid, tid, false));
+ th = ____machine__findnew_thread(machine, pid, tid, false);
pthread_rwlock_unlock(&machine->threads_lock);
return th;
}
@@ -560,11 +569,29 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
+{
+ const char *dup_filename;
+
+ if (!filename || !dso || !dso->long_name)
+ return;
+ if (dso->long_name[0] != '[')
+ return;
+ if (!strchr(filename, '/'))
+ return;
+
+ dup_filename = strdup(filename);
+ if (!dup_filename)
+ return;
+
+ dso__set_long_name(dso, dup_filename, true);
+}
+
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
struct map *map = NULL;
- struct dso *dso;
+ struct dso *dso = NULL;
struct kmod_path m;
if (kmod_path__parse_name(&m, filename))
@@ -572,8 +599,15 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
m.name);
- if (map)
+ if (map) {
+ /*
+ * If the map's dso is an offline module, give dso__load()
+ * a chance to find the file path of that module by fixing
+ * long_name.
+ */
+ dso__adjust_kmod_long_name(map->dso, filename);
goto out;
+ }
dso = machine__findnew_module_dso(machine, &m, filename);
if (dso == NULL)
@@ -585,7 +619,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
map_groups__insert(&machine->kmaps, map);
+ /* Put the map here because map_groups__insert alread got it */
+ map__put(map);
out:
+ /* put the dso here, corresponding to machine__findnew_module_dso */
+ dso__put(dso);
free(m.name);
return map;
}
@@ -740,6 +778,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
enum map_type type;
u64 start = machine__get_running_kernel_start(machine, NULL);
+ /* In case of renewal the kernel map, destroy previous one */
+ machine__destroy_kernel_maps(machine);
+
for (type = 0; type < MAP__NR_TYPES; ++type) {
struct kmap *kmap;
struct map *map;
@@ -788,6 +829,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
kmap->ref_reloc_sym = NULL;
}
+ map__put(machine->vmlinux_maps[type]);
machine->vmlinux_maps[type] = NULL;
}
}
@@ -1084,11 +1126,14 @@ int machine__create_kernel_maps(struct machine *machine)
struct dso *kernel = machine__get_kernel(machine);
const char *name;
u64 addr = machine__get_running_kernel_start(machine, &name);
- if (!addr)
+ int ret;
+
+ if (!addr || kernel == NULL)
return -1;
- if (kernel == NULL ||
- __machine__create_kernel_maps(machine, kernel) < 0)
+ ret = __machine__create_kernel_maps(machine, kernel);
+ dso__put(kernel);
+ if (ret < 0)
return -1;
if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
@@ -1609,6 +1654,8 @@ static int add_callchain_ip(struct thread *thread,
}
}
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ return 0;
return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
}
@@ -1863,6 +1910,9 @@ check_calls:
static int unwind_entry(struct unwind_entry *entry, void *arg)
{
struct callchain_cursor *cursor = arg;
+
+ if (symbol_conf.hide_unresolved && entry->sym == NULL)
+ return 0;
return callchain_cursor_append(cursor, entry->ip,
entry->map, entry->sym);
}