summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig22
-rw-r--r--kernel/trace/blktrace.c36
-rw-r--r--kernel/trace/bpf_trace.c50
-rw-r--r--kernel/trace/fgraph.c75
-rw-r--r--kernel/trace/fprobe.c664
-rw-r--r--kernel/trace/ftrace.c211
-rw-r--r--kernel/trace/pid_list.c2
-rw-r--r--kernel/trace/ring_buffer.c69
-rw-r--r--kernel/trace/trace.c276
-rw-r--r--kernel/trace/trace.h16
-rw-r--r--kernel/trace/trace_eprobe.c5
-rw-r--r--kernel/trace/trace_events.c237
-rw-r--r--kernel/trace/trace_fprobe.c270
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_functions_graph.c47
-rw-r--r--kernel/trace/trace_irqsoff.c20
-rw-r--r--kernel/trace/trace_kprobe.c8
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_probe_tmpl.h2
-rw-r--r--kernel/trace/trace_sched_switch.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c20
-rw-r--r--kernel/trace/trace_selftest.c11
-rw-r--r--kernel/trace/trace_uprobe.c6
23 files changed, 1226 insertions, 832 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 74c2b1d43bb9..d570b8b9c0a9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,9 +31,14 @@ config HAVE_FUNCTION_GRAPH_TRACER
help
See Documentation/trace/ftrace-design.rst
-config HAVE_FUNCTION_GRAPH_RETVAL
+config HAVE_FUNCTION_GRAPH_FREGS
bool
+config HAVE_FTRACE_GRAPH_FUNC
+ bool
+ help
+ True if ftrace_graph_func() is defined.
+
config HAVE_DYNAMIC_FTRACE
bool
help
@@ -57,6 +62,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
This allows for use of ftrace_regs_get_argument() and
ftrace_regs_get_stack_pointer().
+config HAVE_FTRACE_REGS_HAVING_PT_REGS
+ bool
+ help
+ If this is set, ftrace_regs has pt_regs, thus it can convert to
+ pt_regs without allocating memory.
+
config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -232,7 +243,7 @@ config FUNCTION_GRAPH_TRACER
config FUNCTION_GRAPH_RETVAL
bool "Kernel Function Graph Return Value"
- depends on HAVE_FUNCTION_GRAPH_RETVAL
+ depends on HAVE_FUNCTION_GRAPH_FREGS
depends on FUNCTION_GRAPH_TRACER
default n
help
@@ -296,10 +307,9 @@ config DYNAMIC_FTRACE_WITH_ARGS
config FPROBE
bool "Kernel Function Probe (fprobe)"
- depends on FUNCTION_TRACER
- depends on DYNAMIC_FTRACE_WITH_REGS
- depends on HAVE_RETHOOK
- select RETHOOK
+ depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
+ depends on DYNAMIC_FTRACE_WITH_ARGS
+ select FUNCTION_GRAPH_TRACER
default n
help
This option enables kernel function probe (fprobe) based on ftrace.
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 8fd292d34d89..3679a6d18934 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -617,8 +617,9 @@ err:
return ret;
}
-static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev, char __user *arg)
+int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev,
+ char __user *arg)
{
struct blk_user_trace_setup buts;
int ret;
@@ -627,29 +628,18 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (ret)
return -EFAULT;
+ mutex_lock(&q->debugfs_mutex);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ mutex_unlock(&q->debugfs_mutex);
if (ret)
return ret;
if (copy_to_user(arg, &buts, sizeof(buts))) {
- __blk_trace_remove(q);
+ blk_trace_remove(q);
return -EFAULT;
}
return 0;
}
-
-int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev,
- char __user *arg)
-{
- int ret;
-
- mutex_lock(&q->debugfs_mutex);
- ret = __blk_trace_setup(q, name, dev, bdev, arg);
- mutex_unlock(&q->debugfs_mutex);
-
- return ret;
-}
EXPORT_SYMBOL_GPL(blk_trace_setup);
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
@@ -673,12 +663,14 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
.pid = cbuts.pid,
};
+ mutex_lock(&q->debugfs_mutex);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ mutex_unlock(&q->debugfs_mutex);
if (ret)
return ret;
if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
- __blk_trace_remove(q);
+ blk_trace_remove(q);
return -EFAULT;
}
@@ -732,12 +724,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
int ret, start = 0;
char b[BDEVNAME_SIZE];
- mutex_lock(&q->debugfs_mutex);
-
switch (cmd) {
case BLKTRACESETUP:
snprintf(b, sizeof(b), "%pg", bdev);
- ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+ ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
case BLKTRACESETUP32:
@@ -749,17 +739,15 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
start = 1;
fallthrough;
case BLKTRACESTOP:
- ret = __blk_trace_startstop(q, start);
+ ret = blk_trace_startstop(q, start);
break;
case BLKTRACETEARDOWN:
- ret = __blk_trace_remove(q);
+ ret = blk_trace_remove(q);
break;
default:
ret = -ENOTTY;
break;
}
-
- mutex_unlock(&q->debugfs_mutex);
return ret;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 949a3870946c..c462aca8b7e6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -619,7 +619,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
- u64 flags, struct perf_sample_data *sd)
+ u64 flags, struct perf_raw_record *raw,
+ struct perf_sample_data *sd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
@@ -644,6 +645,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
+ perf_sample_save_raw_data(sd, event, raw);
+
return perf_event_output(event, sd, regs);
}
@@ -687,9 +690,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- err = __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_trace_nest_level);
preempt_enable();
@@ -748,9 +750,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- ret = __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
preempt_enable();
@@ -2250,6 +2251,9 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ if (!old_array)
+ goto put;
+
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
@@ -2258,6 +2262,14 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_free_sleepable(old_array);
}
+put:
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
+ * programs and uses tasks-trace-RCU.
+ */
+ synchronize_rcu_tasks_trace();
+
bpf_prog_put(event->prog);
event->prog = NULL;
@@ -2573,6 +2585,20 @@ struct user_syms {
char *buf;
};
+#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
+static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
+{
+ unsigned long ip = ftrace_get_symaddr(fentry_ip);
+
+ return ip ? : fentry_ip;
+}
+
static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
{
unsigned long __user usymbol;
@@ -2767,7 +2793,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
static int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
- unsigned long entry_ip, struct pt_regs *regs,
+ unsigned long entry_ip, struct ftrace_regs *fregs,
bool is_return, void *data)
{
struct bpf_kprobe_multi_run_ctx run_ctx = {
@@ -2779,6 +2805,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+ struct pt_regs *regs;
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2789,6 +2816,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
migrate_disable();
rcu_read_lock();
+ regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2802,26 +2830,28 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
int err;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, false, data);
+ err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
}
static void
kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, data);
+ kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, true, data);
}
static int symbols_cmp_r(const void *a, const void *b, const void *priv)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 0bf78517b5d4..9e6b5a71555b 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -292,13 +292,15 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset)
}
/* ftrace_graph_entry set to this to tell some archs to run function graph */
-static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops)
+static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops,
+ struct ftrace_regs *fregs)
{
return 0;
}
/* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops,
+ struct ftrace_regs *fregs)
{
}
@@ -520,13 +522,15 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
#endif
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
return 0;
}
static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
}
@@ -644,14 +648,20 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
#endif
/* If the caller does not use ftrace, call this function. */
-int function_graph_enter(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
+int function_graph_enter_regs(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct ftrace_regs *fregs)
{
struct ftrace_graph_ent trace;
unsigned long bitmap = 0;
int offset;
+ int bit;
int i;
+ bit = ftrace_test_recursion_trylock(func, ret);
+ if (bit < 0)
+ return -EBUSY;
+
trace.func = func;
trace.depth = ++current->curr_ret_depth;
@@ -663,7 +673,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
if (static_branch_likely(&fgraph_do_direct)) {
int save_curr_ret_stack = current->curr_ret_stack;
- if (static_call(fgraph_func)(&trace, fgraph_direct_gops))
+ if (static_call(fgraph_func)(&trace, fgraph_direct_gops, fregs))
bitmap |= BIT(fgraph_direct_gops->idx);
else
/* Clear out any saved storage */
@@ -681,7 +691,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
save_curr_ret_stack = current->curr_ret_stack;
if (ftrace_ops_test(&gops->ops, func, NULL) &&
- gops->entryfunc(&trace, gops))
+ gops->entryfunc(&trace, gops, fregs))
bitmap |= BIT(i);
else
/* Clear out any saved storage */
@@ -697,12 +707,13 @@ int function_graph_enter(unsigned long ret, unsigned long func,
* flag, set that bit always.
*/
set_bitmap(current, offset, bitmap | BIT(0));
-
+ ftrace_test_recursion_unlock(bit);
return 0;
out_ret:
current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1;
out:
current->curr_ret_depth--;
+ ftrace_test_recursion_unlock(bit);
return -EBUSY;
}
@@ -792,15 +803,12 @@ static struct notifier_block ftrace_suspend_notifier = {
.notifier_call = ftrace_suspend_notifier_call,
};
-/* fgraph_ret_regs is not defined without CONFIG_FUNCTION_GRAPH_RETVAL */
-struct fgraph_ret_regs;
-
/*
* Send the trace to the ring-buffer.
* @return the original return address.
*/
-static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs,
- unsigned long frame_pointer)
+static inline unsigned long
+__ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointer)
{
struct ftrace_ret_stack *ret_stack;
struct ftrace_graph_ret trace;
@@ -819,8 +827,11 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
}
trace.rettime = trace_clock_local();
+ if (fregs)
+ ftrace_regs_set_instruction_pointer(fregs, ret);
+
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
- trace.retval = fgraph_ret_regs_return_value(ret_regs);
+ trace.retval = ftrace_regs_get_return_value(fregs);
#endif
bitmap = get_bitmap_bits(current, offset);
@@ -828,17 +839,17 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
#ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
if (test_bit(fgraph_direct_gops->idx, &bitmap))
- static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+ static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, fregs);
} else
#endif
{
for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) {
- struct fgraph_ops *gops = fgraph_array[i];
+ struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]);
if (gops == &fgraph_stub)
continue;
- gops->retfunc(&trace, gops);
+ gops->retfunc(&trace, gops, fregs);
}
}
@@ -855,14 +866,14 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
}
/*
- * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can
- * leave only ftrace_return_to_handler(ret_regs).
+ * After all architecures have selected HAVE_FUNCTION_GRAPH_FREGS, we can
+ * leave only ftrace_return_to_handler(fregs).
*/
-#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
-unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs)
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
+unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs)
{
- return __ftrace_return_to_handler(ret_regs,
- fgraph_ret_regs_frame_pointer(ret_regs));
+ return __ftrace_return_to_handler(fregs,
+ ftrace_regs_get_frame_pointer(fregs));
}
#else
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
@@ -1010,7 +1021,8 @@ void ftrace_graph_sleep_time_control(bool enable)
* Simply points to ftrace_stub, but with the proper protocol.
* Defined by the linker script in linux/vmlinux.lds.h
*/
-void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
/* The callbacks that hook a function */
trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
@@ -1174,7 +1186,8 @@ void ftrace_graph_exit_task(struct task_struct *t)
#ifdef CONFIG_DYNAMIC_FTRACE
static int fgraph_pid_func(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = gops->ops.private;
int pid;
@@ -1188,7 +1201,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace,
return 0;
}
- return gops->saved_func(trace, gops);
+ return gops->saved_func(trace, gops, fregs);
}
void fgraph_update_pid_func(void)
@@ -1215,7 +1228,7 @@ void fgraph_update_pid_func(void)
static int start_graph_tracing(void)
{
unsigned long **ret_stack_list;
- int ret;
+ int ret, cpu;
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
sizeof(*ret_stack_list), GFP_KERNEL);
@@ -1223,6 +1236,12 @@ static int start_graph_tracing(void)
if (!ret_stack_list)
return -ENOMEM;
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
do {
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9ff018245840..2560b312ad57 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -8,98 +8,224 @@
#include <linux/fprobe.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
-#include <linux/rethook.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/sort.h>
+#include <asm/fprobe.h>
+
#include "trace.h"
-struct fprobe_rethook_node {
- struct rethook_node node;
- unsigned long entry_ip;
- unsigned long entry_parent_ip;
- char data[];
-};
+#define FPROBE_IP_HASH_BITS 8
+#define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
-static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
-{
- struct fprobe_rethook_node *fpr;
- struct rethook_node *rh = NULL;
- struct fprobe *fp;
- void *entry_data = NULL;
- int ret = 0;
+#define FPROBE_HASH_BITS 6
+#define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
- fp = container_of(ops, struct fprobe, ops);
+#define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
- if (fp->exit_handler) {
- rh = rethook_try_get(fp->rethook);
- if (!rh) {
- fp->nmissed++;
- return;
- }
- fpr = container_of(rh, struct fprobe_rethook_node, node);
- fpr->entry_ip = ip;
- fpr->entry_parent_ip = parent_ip;
- if (fp->entry_data_size)
- entry_data = fpr->data;
+/*
+ * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
+ * exists. The key is the address of fprobe instance.
+ * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
+ * instance related to the funciton address. The key is the ftrace IP
+ * address.
+ *
+ * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
+ * are set NULL and delete those from both hash tables (by hlist_del_rcu).
+ * After an RCU grace period, the fprobe_hlist itself will be released.
+ *
+ * fprobe_table and fprobe_ip_table can be accessed from either
+ * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
+ * - RCU hlist traversal under disabling preempt
+ */
+static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
+static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
+static DEFINE_MUTEX(fprobe_mutex);
+
+/*
+ * Find first fprobe in the hlist. It will be iterated twice in the entry
+ * probe, once for correcting the total required size, the second time is
+ * calling back the user handlers.
+ * Thus the hlist in the fprobe_table must be sorted and new probe needs to
+ * be added *before* the first fprobe.
+ */
+static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
+{
+ struct fprobe_hlist_node *node;
+ struct hlist_head *head;
+
+ head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
+ hlist_for_each_entry_rcu(node, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
+ if (node->addr == ip)
+ return node;
}
+ return NULL;
+}
+NOKPROBE_SYMBOL(find_first_fprobe_node);
- if (fp->entry_handler)
- ret = fp->entry_handler(fp, ip, parent_ip, ftrace_get_regs(fregs), entry_data);
+/* Node insertion and deletion requires the fprobe_mutex */
+static void insert_fprobe_node(struct fprobe_hlist_node *node)
+{
+ unsigned long ip = node->addr;
+ struct fprobe_hlist_node *next;
+ struct hlist_head *head;
- /* If entry_handler returns !0, nmissed is not counted. */
- if (rh) {
- if (ret)
- rethook_recycle(rh);
- else
- rethook_hook(rh, ftrace_get_regs(fregs), true);
+ lockdep_assert_held(&fprobe_mutex);
+
+ next = find_first_fprobe_node(ip);
+ if (next) {
+ hlist_add_before_rcu(&node->hlist, &next->hlist);
+ return;
}
+ head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
+ hlist_add_head_rcu(&node->hlist, head);
}
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+/* Return true if there are synonims */
+static bool delete_fprobe_node(struct fprobe_hlist_node *node)
{
- struct fprobe *fp;
- int bit;
+ lockdep_assert_held(&fprobe_mutex);
- fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
+ WRITE_ONCE(node->fp, NULL);
+ hlist_del_rcu(&node->hlist);
+ return !!find_first_fprobe_node(node->addr);
+}
- /* recursion detection has to go before any traceable function and
- * all functions before this point should be marked as notrace
- */
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
+/* Check existence of the fprobe */
+static bool is_fprobe_still_exist(struct fprobe *fp)
+{
+ struct hlist_head *head;
+ struct fprobe_hlist *fph;
+
+ head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
+ hlist_for_each_entry_rcu(fph, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
+ if (fph->fp == fp)
+ return true;
}
- __fprobe_handler(ip, parent_ip, ops, fregs);
- ftrace_test_recursion_unlock(bit);
+ return false;
+}
+NOKPROBE_SYMBOL(is_fprobe_still_exist);
+
+static int add_fprobe_hash(struct fprobe *fp)
+{
+ struct fprobe_hlist *fph = fp->hlist_array;
+ struct hlist_head *head;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ if (WARN_ON_ONCE(!fph))
+ return -EINVAL;
+
+ if (is_fprobe_still_exist(fp))
+ return -EEXIST;
+
+ head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
+ hlist_add_head_rcu(&fp->hlist_array->hlist, head);
+ return 0;
+}
+
+static int del_fprobe_hash(struct fprobe *fp)
+{
+ struct fprobe_hlist *fph = fp->hlist_array;
+ lockdep_assert_held(&fprobe_mutex);
+
+ if (WARN_ON_ONCE(!fph))
+ return -EINVAL;
+
+ if (!is_fprobe_still_exist(fp))
+ return -ENOENT;
+
+ fph->fp = NULL;
+ hlist_del_rcu(&fph->hlist);
+ return 0;
}
-NOKPROBE_SYMBOL(fprobe_handler);
-static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+#ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
+
+/* The arch should encode fprobe_header info into one unsigned long */
+#define FPROBE_HEADER_SIZE_IN_LONG 1
+
+static inline bool write_fprobe_header(unsigned long *stack,
+ struct fprobe *fp, unsigned int size_words)
{
+ if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
+ !arch_fprobe_header_encodable(fp)))
+ return false;
+
+ *stack = arch_encode_fprobe_header(fp, size_words);
+ return true;
+}
+
+static inline void read_fprobe_header(unsigned long *stack,
+ struct fprobe **fp, unsigned int *size_words)
+{
+ *fp = arch_decode_fprobe_header_fp(*stack);
+ *size_words = arch_decode_fprobe_header_size(*stack);
+}
+
+#else
+
+/* Generic fprobe_header */
+struct __fprobe_header {
struct fprobe *fp;
- int bit;
+ unsigned long size_words;
+} __packed;
- fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
+#define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header))
- /* recursion detection has to go before any traceable function and
- * all functions called before this point should be marked as notrace
- */
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
- }
+static inline bool write_fprobe_header(unsigned long *stack,
+ struct fprobe *fp, unsigned int size_words)
+{
+ struct __fprobe_header *fph = (struct __fprobe_header *)stack;
+
+ if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
+ return false;
+
+ fph->fp = fp;
+ fph->size_words = size_words;
+ return true;
+}
+
+static inline void read_fprobe_header(unsigned long *stack,
+ struct fprobe **fp, unsigned int *size_words)
+{
+ struct __fprobe_header *fph = (struct __fprobe_header *)stack;
+
+ *fp = fph->fp;
+ *size_words = fph->size_words;
+}
+
+#endif
+
+/*
+ * fprobe shadow stack management:
+ * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
+ * among the probes on the same function exit. Note that a new probe can be
+ * registered before a target function is returning, we can not use the hash
+ * table to find the corresponding probes. Thus the probe address is stored on
+ * the shadow stack with its entry data size.
+ *
+ */
+static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct fprobe *fp, struct ftrace_regs *fregs,
+ void *data)
+{
+ if (!fp->entry_handler)
+ return 0;
+
+ return fp->entry_handler(fp, ip, parent_ip, fregs, data);
+}
+static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct fprobe *fp, struct ftrace_regs *fregs,
+ void *data)
+{
+ int ret;
/*
* This user handler is shared with other kprobes and is not expected to be
* called recursively. So if any other kprobe handler is running, this will
@@ -108,44 +234,183 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
*/
if (unlikely(kprobe_running())) {
fp->nmissed++;
- goto recursion_unlock;
+ return 0;
}
kprobe_busy_begin();
- __fprobe_handler(ip, parent_ip, ops, fregs);
+ ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
kprobe_busy_end();
-
-recursion_unlock:
- ftrace_test_recursion_unlock(bit);
+ return ret;
}
-static void fprobe_exit_handler(struct rethook_node *rh, void *data,
- unsigned long ret_ip, struct pt_regs *regs)
+static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
- struct fprobe *fp = (struct fprobe *)data;
- struct fprobe_rethook_node *fpr;
- int bit;
+ struct fprobe_hlist_node *node, *first;
+ unsigned long *fgraph_data = NULL;
+ unsigned long func = trace->func;
+ unsigned long ret_ip;
+ int reserved_words;
+ struct fprobe *fp;
+ int used, ret;
- if (!fp || fprobe_disabled(fp))
- return;
+ if (WARN_ON_ONCE(!fregs))
+ return 0;
- fpr = container_of(rh, struct fprobe_rethook_node, node);
+ first = node = find_first_fprobe_node(func);
+ if (unlikely(!first))
+ return 0;
+
+ reserved_words = 0;
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (!fp || !fp->exit_handler)
+ continue;
+ /*
+ * Since fprobe can be enabled until the next loop, we ignore the
+ * fprobe's disabled flag in this loop.
+ */
+ reserved_words +=
+ FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
+ }
+ node = first;
+ if (reserved_words) {
+ fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
+ if (unlikely(!fgraph_data)) {
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (fp && !fprobe_disabled(fp))
+ fp->nmissed++;
+ }
+ return 0;
+ }
+ }
/*
- * we need to assure no calls to traceable functions in-between the
- * end of fprobe_handler and the beginning of fprobe_exit_handler.
+ * TODO: recursion detection has been done in the fgraph. Thus we need
+ * to add a callback to increment missed counter.
*/
- bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip);
- if (bit < 0) {
- fp->nmissed++;
+ ret_ip = ftrace_regs_get_return_address(fregs);
+ used = 0;
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ int data_size;
+ void *data;
+
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (!fp || fprobe_disabled(fp))
+ continue;
+
+ data_size = fp->entry_data_size;
+ if (data_size && fp->exit_handler)
+ data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
+ else
+ data = NULL;
+
+ if (fprobe_shared_with_kprobes(fp))
+ ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
+ else
+ ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
+
+ /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
+ if (!ret && fp->exit_handler) {
+ int size_words = SIZE_IN_LONG(data_size);
+
+ if (write_fprobe_header(&fgraph_data[used], fp, size_words))
+ used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
+ }
+ }
+ if (used < reserved_words)
+ memset(fgraph_data + used, 0, reserved_words - used);
+
+ /* If any exit_handler is set, data must be used. */
+ return used != 0;
+}
+NOKPROBE_SYMBOL(fprobe_entry);
+
+static void fprobe_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
+{
+ unsigned long *fgraph_data = NULL;
+ unsigned long ret_ip;
+ struct fprobe *fp;
+ int size, curr;
+ int size_words;
+
+ fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
+ if (WARN_ON_ONCE(!fgraph_data))
return;
+ size_words = SIZE_IN_LONG(size);
+ ret_ip = ftrace_regs_get_instruction_pointer(fregs);
+
+ preempt_disable();
+
+ curr = 0;
+ while (size_words > curr) {
+ read_fprobe_header(&fgraph_data[curr], &fp, &size);
+ if (!fp)
+ break;
+ curr += FPROBE_HEADER_SIZE_IN_LONG;
+ if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
+ if (WARN_ON_ONCE(curr + size > size_words))
+ break;
+ fp->exit_handler(fp, trace->func, ret_ip, fregs,
+ size ? fgraph_data + curr : NULL);
+ }
+ curr += size;
}
+ preempt_enable();
+}
+NOKPROBE_SYMBOL(fprobe_return);
+
+static struct fgraph_ops fprobe_graph_ops = {
+ .entryfunc = fprobe_entry,
+ .retfunc = fprobe_return,
+};
+static int fprobe_graph_active;
- fp->exit_handler(fp, fpr->entry_ip, ret_ip, regs,
- fp->entry_data_size ? (void *)fpr->data : NULL);
- ftrace_test_recursion_unlock(bit);
+/* Add @addrs to the ftrace filter and register fgraph if needed. */
+static int fprobe_graph_add_ips(unsigned long *addrs, int num)
+{
+ int ret;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
+ if (ret)
+ return ret;
+
+ if (!fprobe_graph_active) {
+ ret = register_ftrace_graph(&fprobe_graph_ops);
+ if (WARN_ON_ONCE(ret)) {
+ ftrace_free_filter(&fprobe_graph_ops.ops);
+ return ret;
+ }
+ }
+ fprobe_graph_active++;
+ return 0;
+}
+
+/* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
+static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
+{
+ lockdep_assert_held(&fprobe_mutex);
+
+ fprobe_graph_active--;
+ if (!fprobe_graph_active) {
+ /* Q: should we unregister it ? */
+ unregister_ftrace_graph(&fprobe_graph_ops);
+ return;
+ }
+
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
-NOKPROBE_SYMBOL(fprobe_exit_handler);
static int symbols_cmp(const void *a, const void *b)
{
@@ -175,53 +440,97 @@ static unsigned long *get_ftrace_locations(const char **syms, int num)
return ERR_PTR(-ENOENT);
}
-static void fprobe_init(struct fprobe *fp)
-{
- fp->nmissed = 0;
- if (fprobe_shared_with_kprobes(fp))
- fp->ops.func = fprobe_kprobe_handler;
- else
- fp->ops.func = fprobe_handler;
- fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
-}
+struct filter_match_data {
+ const char *filter;
+ const char *notfilter;
+ size_t index;
+ size_t size;
+ unsigned long *addrs;
+};
-static int fprobe_init_rethook(struct fprobe *fp, int num)
+static int filter_match_callback(void *data, const char *name, unsigned long addr)
{
- int size;
+ struct filter_match_data *match = data;
- if (!fp->exit_handler) {
- fp->rethook = NULL;
+ if (!glob_match(match->filter, name) ||
+ (match->notfilter && glob_match(match->notfilter, name)))
return 0;
- }
- /* Initialize rethook if needed */
- if (fp->nr_maxactive)
- num = fp->nr_maxactive;
- else
- num *= num_possible_cpus() * 2;
- if (num <= 0)
- return -EINVAL;
+ if (!ftrace_location(addr))
+ return 0;
+
+ if (match->addrs)
+ match->addrs[match->index] = addr;
- size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size;
+ match->index++;
+ return match->index == match->size;
+}
- /* Initialize rethook */
- fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num);
- if (IS_ERR(fp->rethook))
- return PTR_ERR(fp->rethook);
+/*
+ * Make IP list from the filter/no-filter glob patterns.
+ * Return the number of matched symbols, or -ENOENT.
+ */
+static int ip_list_from_filter(const char *filter, const char *notfilter,
+ unsigned long *addrs, size_t size)
+{
+ struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
+ .index = 0, .size = size, .addrs = addrs};
+ int ret;
- return 0;
+ ret = kallsyms_on_each_symbol(filter_match_callback, &match);
+ if (ret < 0)
+ return ret;
+ ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
+ if (ret < 0)
+ return ret;
+
+ return match.index ?: -ENOENT;
}
static void fprobe_fail_cleanup(struct fprobe *fp)
{
- if (!IS_ERR_OR_NULL(fp->rethook)) {
- /* Don't need to cleanup rethook->handler because this is not used. */
- rethook_free(fp->rethook);
- fp->rethook = NULL;
+ kfree(fp->hlist_array);
+ fp->hlist_array = NULL;
+}
+
+/* Initialize the fprobe data structure. */
+static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
+{
+ struct fprobe_hlist *hlist_array;
+ unsigned long addr;
+ int size, i;
+
+ if (!fp || !addrs || num <= 0)
+ return -EINVAL;
+
+ size = ALIGN(fp->entry_data_size, sizeof(long));
+ if (size > MAX_FPROBE_DATA_SIZE)
+ return -E2BIG;
+ fp->entry_data_size = size;
+
+ hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
+ if (!hlist_array)
+ return -ENOMEM;
+
+ fp->nmissed = 0;
+
+ hlist_array->size = num;
+ fp->hlist_array = hlist_array;
+ hlist_array->fp = fp;
+ for (i = 0; i < num; i++) {
+ hlist_array->array[i].fp = fp;
+ addr = ftrace_location(addrs[i]);
+ if (!addr) {
+ fprobe_fail_cleanup(fp);
+ return -ENOENT;
+ }
+ hlist_array->array[i].addr = addr;
}
- ftrace_free_filter(&fp->ops);
+ return 0;
}
+#define FPROBE_IPS_MAX INT_MAX
+
/**
* register_fprobe() - Register fprobe to ftrace by pattern.
* @fp: A fprobe data structure to be registered.
@@ -235,46 +544,24 @@ static void fprobe_fail_cleanup(struct fprobe *fp)
*/
int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
{
- struct ftrace_hash *hash;
- unsigned char *str;
- int ret, len;
+ unsigned long *addrs;
+ int ret;
if (!fp || !filter)
return -EINVAL;
- fprobe_init(fp);
-
- len = strlen(filter);
- str = kstrdup(filter, GFP_KERNEL);
- ret = ftrace_set_filter(&fp->ops, str, len, 0);
- kfree(str);
- if (ret)
+ ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX);
+ if (ret < 0)
return ret;
- if (notfilter) {
- len = strlen(notfilter);
- str = kstrdup(notfilter, GFP_KERNEL);
- ret = ftrace_set_notrace(&fp->ops, str, len, 0);
- kfree(str);
- if (ret)
- goto out;
- }
-
- /* TODO:
- * correctly calculate the total number of filtered symbols
- * from both filter and notfilter.
- */
- hash = rcu_access_pointer(fp->ops.local_hash.filter_hash);
- if (WARN_ON_ONCE(!hash))
- goto out;
-
- ret = fprobe_init_rethook(fp, (int)hash->count);
- if (!ret)
- ret = register_ftrace_function(&fp->ops);
+ addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+ ret = ip_list_from_filter(filter, notfilter, addrs, ret);
+ if (ret > 0)
+ ret = register_fprobe_ips(fp, addrs, ret);
-out:
- if (ret)
- fprobe_fail_cleanup(fp);
+ kfree(addrs);
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe);
@@ -282,7 +569,7 @@ EXPORT_SYMBOL_GPL(register_fprobe);
/**
* register_fprobe_ips() - Register fprobe to ftrace by address.
* @fp: A fprobe data structure to be registered.
- * @addrs: An array of target ftrace location addresses.
+ * @addrs: An array of target function address.
* @num: The number of entries of @addrs.
*
* Register @fp to ftrace for enabling the probe on the address given by @addrs.
@@ -294,23 +581,27 @@ EXPORT_SYMBOL_GPL(register_fprobe);
*/
int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
{
- int ret;
-
- if (!fp || !addrs || num <= 0)
- return -EINVAL;
-
- fprobe_init(fp);
+ struct fprobe_hlist *hlist_array;
+ int ret, i;
- ret = ftrace_set_filter_ips(&fp->ops, addrs, num, 0, 0);
+ ret = fprobe_init(fp, addrs, num);
if (ret)
return ret;
- ret = fprobe_init_rethook(fp, num);
- if (!ret)
- ret = register_ftrace_function(&fp->ops);
+ mutex_lock(&fprobe_mutex);
+
+ hlist_array = fp->hlist_array;
+ ret = fprobe_graph_add_ips(addrs, num);
+ if (!ret) {
+ add_fprobe_hash(fp);
+ for (i = 0; i < hlist_array->size; i++)
+ insert_fprobe_node(&hlist_array->array[i]);
+ }
+ mutex_unlock(&fprobe_mutex);
if (ret)
fprobe_fail_cleanup(fp);
+
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe_ips);
@@ -348,14 +639,13 @@ EXPORT_SYMBOL_GPL(register_fprobe_syms);
bool fprobe_is_registered(struct fprobe *fp)
{
- if (!fp || (fp->ops.saved_func != fprobe_handler &&
- fp->ops.saved_func != fprobe_kprobe_handler))
+ if (!fp || !fp->hlist_array)
return false;
return true;
}
/**
- * unregister_fprobe() - Unregister fprobe from ftrace
+ * unregister_fprobe() - Unregister fprobe.
* @fp: A fprobe data structure to be unregistered.
*
* Unregister fprobe (and remove ftrace hooks from the function entries).
@@ -364,23 +654,41 @@ bool fprobe_is_registered(struct fprobe *fp)
*/
int unregister_fprobe(struct fprobe *fp)
{
- int ret;
+ struct fprobe_hlist *hlist_array;
+ unsigned long *addrs = NULL;
+ int ret = 0, i, count;
- if (!fprobe_is_registered(fp))
- return -EINVAL;
+ mutex_lock(&fprobe_mutex);
+ if (!fp || !is_fprobe_still_exist(fp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ hlist_array = fp->hlist_array;
+ addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
+ if (!addrs) {
+ ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */
+ goto out;
+ }
- if (!IS_ERR_OR_NULL(fp->rethook))
- rethook_stop(fp->rethook);
+ /* Remove non-synonim ips from table and hash */
+ count = 0;
+ for (i = 0; i < hlist_array->size; i++) {
+ if (!delete_fprobe_node(&hlist_array->array[i]))
+ addrs[count++] = hlist_array->array[i].addr;
+ }
+ del_fprobe_hash(fp);
- ret = unregister_ftrace_function(&fp->ops);
- if (ret < 0)
- return ret;
+ if (count)
+ fprobe_graph_remove_ips(addrs, count);
- if (!IS_ERR_OR_NULL(fp->rethook))
- rethook_free(fp->rethook);
+ kfree_rcu(hlist_array, rcu);
+ fp->hlist_array = NULL;
- ftrace_free_filter(&fp->ops);
+out:
+ mutex_unlock(&fprobe_mutex);
+ kfree(addrs);
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9b17efb1a87d..b2955e504193 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -536,24 +536,21 @@ static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
- int ret = 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
- mutex_lock(&ftrace_profile_lock);
+ guard(mutex)(&ftrace_profile_lock);
/* we raced with function_profile_reset() */
- if (unlikely(rec->counter == 0)) {
- ret = -EBUSY;
- goto out;
- }
+ if (unlikely(rec->counter == 0))
+ return -EBUSY;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
avg = div64_ul(rec->time, rec->counter);
if (tracing_thresh && (avg < tracing_thresh))
- goto out;
+ return 0;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -590,10 +587,8 @@ static int function_stat_show(struct seq_file *m, void *v)
trace_print_seq(m, &s);
#endif
seq_putc(m, '\n');
-out:
- mutex_unlock(&ftrace_profile_lock);
- return ret;
+ return 0;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -789,27 +784,24 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
- unsigned long flags;
if (!ftrace_profile_enabled)
return;
- local_irq_save(flags);
+ guard(preempt_notrace)();
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;
rec = ftrace_find_profiled_func(stat, ip);
if (!rec) {
rec = ftrace_profile_alloc(stat, ip);
if (!rec)
- goto out;
+ return;
}
rec->counter++;
- out:
- local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -827,7 +819,8 @@ struct profile_fgraph_data {
};
static int profile_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct profile_fgraph_data *profile_data;
@@ -849,26 +842,27 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace,
}
static void profile_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct profile_fgraph_data *profile_data;
struct ftrace_profile_stat *stat;
unsigned long long calltime;
unsigned long long rettime = trace_clock_local();
struct ftrace_profile *rec;
- unsigned long flags;
int size;
- local_irq_save(flags);
+ guard(preempt_notrace)();
+
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;
profile_data = fgraph_retrieve_data(gops->idx, &size);
/* If the calltime was zero'd ignore it */
if (!profile_data || !profile_data->calltime)
- goto out;
+ return;
calltime = rettime - profile_data->calltime;
@@ -896,22 +890,16 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
rec->time += calltime;
rec->time_squared += calltime * calltime;
}
-
- out:
- local_irq_restore(flags);
}
static struct fgraph_ops fprofiler_ops = {
- .ops = {
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(fprofiler_ops.ops)
- },
.entryfunc = &profile_graph_entry,
.retfunc = &profile_graph_return,
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&fprofiler_ops.ops);
return register_ftrace_graph(&fprofiler_ops);
}
@@ -922,12 +910,11 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(ftrace_profile_ops)
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&ftrace_profile_ops);
return register_ftrace_function(&ftrace_profile_ops);
}
@@ -950,20 +937,16 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
val = !!val;
- mutex_lock(&ftrace_profile_lock);
+ guard(mutex)(&ftrace_profile_lock);
if (ftrace_profile_enabled ^ val) {
if (val) {
ret = ftrace_profile_init();
- if (ret < 0) {
- cnt = ret;
- goto out;
- }
+ if (ret < 0)
+ return ret;
ret = register_ftrace_profiler();
- if (ret < 0) {
- cnt = ret;
- goto out;
- }
+ if (ret < 0)
+ return ret;
ftrace_profile_enabled = 1;
} else {
ftrace_profile_enabled = 0;
@@ -974,8 +957,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
unregister_ftrace_profiler();
}
}
- out:
- mutex_unlock(&ftrace_profile_lock);
*ppos += cnt;
@@ -1675,14 +1656,12 @@ unsigned long ftrace_location(unsigned long ip)
loc = ftrace_location_range(ip, ip);
if (!loc) {
if (!kallsyms_lookup_size_offset(ip, &size, &offset))
- goto out;
+ return 0;
/* map sym+0 to __fentry__ */
if (!offset)
loc = ftrace_location_range(ip, ip + size - 1);
}
-
-out:
return loc;
}
@@ -2077,7 +2056,7 @@ rollback:
continue;
if (rec == end)
- goto err_out;
+ return -EBUSY;
in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
@@ -2090,7 +2069,6 @@ rollback:
rec->flags |= FTRACE_FL_IPMODIFY;
} while_for_each_ftrace_rec();
-err_out:
return -EBUSY;
}
@@ -4986,10 +4964,6 @@ static int cache_mod(struct trace_array *tr,
return ftrace_add_mod(tr, func, module, enable);
}
-static int
-ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
- int reset, int enable);
-
#ifdef CONFIG_MODULES
static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
char *mod, bool enable)
@@ -5619,20 +5593,15 @@ static DEFINE_MUTEX(ftrace_cmd_mutex);
__init int register_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p;
- int ret = 0;
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
list_for_each_entry(p, &ftrace_commands, list) {
- if (strcmp(cmd->name, p->name) == 0) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ if (strcmp(cmd->name, p->name) == 0)
+ return -EBUSY;
}
list_add(&cmd->list, &ftrace_commands);
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return 0;
}
/*
@@ -5642,20 +5611,17 @@ __init int register_ftrace_command(struct ftrace_func_command *cmd)
__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p, *n;
- int ret = -ENODEV;
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
+
list_for_each_entry_safe(p, n, &ftrace_commands, list) {
if (strcmp(cmd->name, p->name) == 0) {
- ret = 0;
list_del_init(&p->list);
- goto out_unlock;
+ return 0;
}
}
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return -ENODEV;
}
static int ftrace_process_regex(struct ftrace_iterator *iter,
@@ -5665,7 +5631,7 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
struct trace_array *tr = iter->ops->private;
char *func, *command, *next = buff;
struct ftrace_func_command *p;
- int ret = -EINVAL;
+ int ret;
func = strsep(&next, ":");
@@ -5682,17 +5648,14 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
command = strsep(&next, ":");
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
+
list_for_each_entry(p, &ftrace_commands, list) {
- if (strcmp(p->name, command) == 0) {
- ret = p->func(tr, hash, func, command, next, enable);
- goto out_unlock;
- }
+ if (strcmp(p->name, command) == 0)
+ return p->func(tr, hash, func, command, next, enable);
}
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return -EINVAL;
}
static ssize_t
@@ -5726,12 +5689,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
parser->idx, enable);
trace_parser_clear(parser);
if (ret < 0)
- goto out;
+ return ret;
}
- ret = read;
- out:
- return ret;
+ return read;
}
ssize_t
@@ -5792,7 +5753,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long *ips,
static int
ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
unsigned long *ips, unsigned int cnt,
- int remove, int reset, int enable)
+ int remove, int reset, int enable, char *mod)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
@@ -5818,7 +5779,15 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
goto out_regex_unlock;
}
- if (buf && !ftrace_match_records(hash, buf, len)) {
+ if (buf && !match_records(hash, buf, len, mod)) {
+ /* If this was for a module and nothing was enabled, flag it */
+ if (mod)
+ (*orig_hash)->flags |= FTRACE_HASH_FL_MOD;
+
+ /*
+ * Even if it is a mod, return error to let caller know
+ * nothing was added
+ */
ret = -EINVAL;
goto out_regex_unlock;
}
@@ -5843,7 +5812,7 @@ static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt,
int remove, int reset, int enable)
{
- return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable, NULL);
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
@@ -6221,7 +6190,38 @@ static int
ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
int reset, int enable)
{
- return ftrace_set_hash(ops, buf, len, NULL, 0, 0, reset, enable);
+ char *mod = NULL, *func, *command, *next = buf;
+ char *tmp __free(kfree) = NULL;
+ struct trace_array *tr = ops->private;
+ int ret;
+
+ func = strsep(&next, ":");
+
+ /* This can also handle :mod: parsing */
+ if (next) {
+ if (!tr)
+ return -EINVAL;
+
+ command = strsep(&next, ":");
+ if (strcmp(command, "mod") != 0)
+ return -EINVAL;
+
+ mod = next;
+ len = command - func;
+ /* Save the original func as ftrace_set_hash() can modify it */
+ tmp = kstrdup(func, GFP_KERNEL);
+ }
+
+ ret = ftrace_set_hash(ops, func, len, NULL, 0, 0, reset, enable, mod);
+
+ if (tr && mod && ret < 0) {
+ /* Did tmp fail to allocate? */
+ if (!tmp)
+ return -ENOMEM;
+ ret = cache_mod(tr, tmp, mod, enable);
+ }
+
+ return ret;
}
/**
@@ -6385,6 +6385,14 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
ftrace_ops_init(ops);
+ /* The trace_array is needed for caching module function filters */
+ if (!ops->private) {
+ struct trace_array *tr = trace_get_global_array();
+
+ ops->private = tr;
+ ftrace_init_trace_array(tr);
+ }
+
while (buf) {
func = strsep(&buf, ",");
ftrace_set_regex(ops, func, strlen(func), 0, enable);
@@ -7818,9 +7826,14 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops)
void ftrace_init_trace_array(struct trace_array *tr)
{
+ if (tr->flags & TRACE_ARRAY_FL_MOD_INIT)
+ return;
+
INIT_LIST_HEAD(&tr->func_probes);
INIT_LIST_HEAD(&tr->mod_trace);
INIT_LIST_HEAD(&tr->mod_notrace);
+
+ tr->flags |= TRACE_ARRAY_FL_MOD_INIT;
}
#else
@@ -7849,7 +7862,8 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops)
__init void ftrace_init_global_array_ops(struct trace_array *tr)
{
tr->ops = &global_ops;
- tr->ops->private = tr;
+ if (!global_ops.private)
+ global_ops.private = tr;
ftrace_init_trace_array(tr);
init_array_fgraph_ops(tr, tr->ops);
}
@@ -8291,7 +8305,7 @@ pid_write(struct file *filp, const char __user *ubuf,
if (!cnt)
return 0;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
switch (type) {
case TRACE_PIDS:
@@ -8307,14 +8321,13 @@ pid_write(struct file *filp, const char __user *ubuf,
lockdep_is_held(&ftrace_lock));
break;
default:
- ret = -EINVAL;
WARN_ON_ONCE(1);
- goto out;
+ return -EINVAL;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
- goto out;
+ return ret;
switch (type) {
case TRACE_PIDS:
@@ -8343,11 +8356,8 @@ pid_write(struct file *filp, const char __user *ubuf,
ftrace_update_pid_func();
ftrace_startup_all(0);
- out:
- mutex_unlock(&ftrace_lock);
- if (ret > 0)
- *ppos += ret;
+ *ppos += ret;
return ret;
}
@@ -8750,17 +8760,17 @@ static int
ftrace_enable_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret = -ENODEV;
+ int ret;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
if (unlikely(ftrace_disabled))
- goto out;
+ return -ENODEV;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
- goto out;
+ return ret;
if (ftrace_enabled) {
@@ -8774,8 +8784,7 @@ ftrace_enable_sysctl(const struct ctl_table *table, int write,
} else {
if (is_permanent_ops_registered()) {
ftrace_enabled = true;
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
}
/* stopping ftrace calls (just send to ftrace_stub) */
@@ -8785,9 +8794,7 @@ ftrace_enable_sysctl(const struct ctl_table *table, int write,
}
last_ftrace_enabled = !!ftrace_enabled;
- out:
- mutex_unlock(&ftrace_lock);
- return ret;
+ return 0;
}
static struct ctl_table ftrace_sysctls[] = {
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index 4966e6bbdf6f..c62b9b3cfb3d 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
int i;
/* According to linux/thread.h, pids can be no bigger that 30 bits */
- WARN_ON_ONCE(pid_max > (1 << 30));
+ WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30));
pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7e257e855dd1..6d61ff78926b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4682,40 +4682,22 @@ int ring_buffer_write(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_write);
-static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct buffer_page *reader = cpu_buffer->reader_page;
- struct buffer_page *head = rb_set_head_page(cpu_buffer);
- struct buffer_page *commit = cpu_buffer->commit_page;
-
- /* In case of error, head will be NULL */
- if (unlikely(!head))
- return true;
-
- /* Reader should exhaust content in reader page */
- if (reader->read != rb_page_size(reader))
- return false;
-
- /*
- * If writers are committing on the reader page, knowing all
- * committed content has been read, the ring buffer is empty.
- */
- if (commit == reader)
- return true;
-
- /*
- * If writers are committing on a page other than reader page
- * and head page, there should always be content to read.
- */
- if (commit != head)
- return false;
+ return local_read(&cpu_buffer->entries) -
+ (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
- /*
- * Writers are committing on the head page, we just need
- * to care about there're committed data, and the reader will
- * swap reader page with head page when it is to read data.
- */
- return rb_page_commit(commit) == 0;
+static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return !rb_num_of_entries(cpu_buffer);
}
/**
@@ -4861,19 +4843,6 @@ void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
-/*
- * The total entries in the ring buffer is the running counter
- * of entries entered into the ring buffer, minus the sum of
- * the entries read from the ring buffer and the number of
- * entries that were overwritten.
- */
-static inline unsigned long
-rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
-{
- return local_read(&cpu_buffer->entries) -
- (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
-}
-
/**
* ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
* @buffer: The ring buffer
@@ -7019,7 +6988,11 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order); /* + meta-page */
+ if (nr_pages <= pgoff)
+ return -EINVAL;
+
+ nr_pages -= pgoff;
nr_vma_pages = vma_pages(vma);
if (!nr_vma_pages || nr_vma_pages > nr_pages)
@@ -7055,7 +7028,7 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
}
while (p < nr_pages) {
- struct page *page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
+ struct page *page;
int off = 0;
if (WARN_ON_ONCE(s >= nr_subbufs)) {
@@ -7063,6 +7036,8 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
goto out;
}
+ page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
+
for (; off < (1 << (subbuf_order)); off++, page++) {
if (p >= nr_pages)
break;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..2542ec398b5d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3611,17 +3611,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3656,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
-
- while (*p) {
- bool star = false;
- int len = 0;
-
- j = 0;
-
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
-
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
+ str = *(const char **)(ptr + field->offset);
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
-
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3729,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -4269,6 +4122,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
preempt_model_none() ? "server" :
preempt_model_voluntary() ? "desktop" :
preempt_model_full() ? "preempt" :
+ preempt_model_lazy() ? "lazy" :
preempt_model_rt() ? "preempt_rt" :
"unknown",
/* These are reserved for later use */
@@ -4353,6 +4207,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (event) {
if (tr->trace_flags & TRACE_ITER_FIELDS)
return print_event_fields(iter, event);
+ /*
+ * For TRACE_EVENT() events, the print_fmt is not
+ * safe to use if the array has delta offsets
+ * Force printing via the fields.
+ */
+ if ((tr->text_delta || tr->data_delta) &&
+ event->type > __TRACE_LAST_TYPE)
+ return print_event_fields(iter, event);
+
return event->funcs->trace(iter, sym_flags, event);
}
@@ -5225,6 +5088,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
cpumask_var_t tracing_cpumask_new;
int err;
+ if (count == 0 || count > KMALLOC_MAX_SIZE)
+ return -EINVAL;
+
if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
@@ -10777,8 +10643,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
@@ -10797,6 +10661,14 @@ out:
return ret;
}
+#ifdef CONFIG_FUNCTION_TRACER
+/* Used to set module cached ftrace filtering at boot up */
+__init struct trace_array *trace_get_global_array(void)
+{
+ return &global_trace;
+}
+#endif
+
void __init ftrace_boot_snapshot(void)
{
#ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..04058a9889b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -432,6 +432,7 @@ struct trace_array {
enum {
TRACE_ARRAY_FL_GLOBAL = BIT(0),
TRACE_ARRAY_FL_BOOT = BIT(1),
+ TRACE_ARRAY_FL_MOD_INIT = BIT(2),
};
extern struct list_head ftrace_trace_arrays;
@@ -667,9 +668,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -694,8 +694,10 @@ void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
-int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
+void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
+int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
@@ -718,8 +720,6 @@ extern unsigned long tracing_thresh;
/* PID filtering */
-extern int pid_max;
-
bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
pid_t search_pid);
bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
@@ -1115,6 +1115,7 @@ void ftrace_destroy_function_files(struct trace_array *tr);
int ftrace_allocate_ftrace_ops(struct trace_array *tr);
void ftrace_free_ftrace_ops(struct trace_array *tr);
void ftrace_init_global_array_ops(struct trace_array *tr);
+struct trace_array *trace_get_global_array(void);
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
void ftrace_reset_array_ops(struct trace_array *tr);
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
@@ -1413,7 +1414,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index ebda68ee9abf..be8be0c1aaf0 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -963,6 +963,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error;
}
ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
+ if (ret < 0) {
+ trace_probe_unregister_event_call(&ep->tp);
+ mutex_unlock(&event_mutex);
+ goto error;
+ }
mutex_unlock(&event_mutex);
return ret;
parse_error:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 77e68efbd43e..770e7ed91716 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -244,19 +247,16 @@ int trace_event_get_offsets(struct trace_event_call *call)
return tail->offset + tail->size;
}
-/*
- * Check if the referenced field is an array and return true,
- * as arrays are OK to dereference.
- */
-static bool test_field(const char *fmt, struct trace_event_call *call)
+
+static struct trace_event_fields *find_event_field(const char *fmt,
+ struct trace_event_call *call)
{
struct trace_event_fields *field = call->class->fields_array;
- const char *array_descriptor;
const char *p = fmt;
int len;
if (!(len = str_has_prefix(fmt, "REC->")))
- return false;
+ return NULL;
fmt += len;
for (p = fmt; *p; p++) {
if (!isalnum(*p) && *p != '_')
@@ -265,16 +265,141 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
len = p - fmt;
for (; field->type; field++) {
- if (strncmp(field->name, fmt, len) ||
- field->name[len])
+ if (strncmp(field->name, fmt, len) || field->name[len])
continue;
- array_descriptor = strchr(field->type, '[');
- /* This is an array and is OK to dereference. */
- return array_descriptor != NULL;
+
+ return field;
+ }
+ return NULL;
+}
+
+/*
+ * Check if the referenced field is an array and return true,
+ * as arrays are OK to dereference.
+ */
+static bool test_field(const char *fmt, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* This is an array and is OK to dereference. */
+ return strchr(field->type, '[') != NULL;
+}
+
+/* Look for a string within an argument */
+static bool find_print_string(const char *arg, const char *str, const char *end)
+{
+ const char *r;
+
+ r = strstr(arg, str);
+ return r && r < end;
+}
+
+/* Return true if the argument pointer is safe */
+static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+{
+ const char *r, *e, *a;
+
+ e = fmt + len;
+
+ /* Find the REC-> in the argument */
+ r = strstr(fmt, "REC->");
+ if (r && r < e) {
+ /*
+ * Addresses of events on the buffer, or an array on the buffer is
+ * OK to dereference. There's ways to fool this, but
+ * this is to catch common mistakes, not malicious code.
+ */
+ a = strchr(fmt, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+ return true;
}
return false;
}
+/* Return true if the string is safe */
+static bool process_string(const char *fmt, int len, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+ const char *r, *e, *s;
+
+ e = fmt + len;
+
+ /*
+ * There are several helper functions that return strings.
+ * If the argument contains a function, then assume its field is valid.
+ * It is considered that the argument has a function if it has:
+ * alphanumeric or '_' before a parenthesis.
+ */
+ s = fmt;
+ do {
+ r = strstr(s, "(");
+ if (!r || r >= e)
+ break;
+ for (int i = 1; r - i >= s; i++) {
+ char ch = *(r - i);
+ if (isspace(ch))
+ continue;
+ if (isalnum(ch) || ch == '_')
+ return true;
+ /* Anything else, this isn't a function */
+ break;
+ }
+ /* A function could be wrapped in parethesis, try the next one */
+ s = r + 1;
+ } while (s < e);
+
+ /*
+ * Check for arrays. If the argument has: foo[REC->val]
+ * then it is very likely that foo is an array of strings
+ * that are safe to use.
+ */
+ r = strstr(s, "[");
+ if (r && r < e) {
+ r = strstr(r, "REC->");
+ if (r && r < e)
+ return true;
+ }
+
+ /*
+ * If there's any strings in the argument consider this arg OK as it
+ * could be: REC->field ? "foo" : "bar" and we don't want to get into
+ * verifying that logic here.
+ */
+ if (find_print_string(fmt, "\"", e))
+ return true;
+
+ /* Dereferenced strings are also valid like any other pointer */
+ if (process_pointer(fmt, len, call))
+ return true;
+
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -284,13 +409,14 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
static void test_event_printk(struct trace_event_call *call)
{
u64 dereference_flags = 0;
+ u64 string_flags = 0;
bool first = true;
- const char *fmt, *c, *r, *a;
+ const char *fmt;
int parens = 0;
char in_quote = 0;
int start_arg = 0;
int arg = 0;
- int i;
+ int i, e;
fmt = call->print_fmt;
@@ -374,8 +500,16 @@ static void test_event_printk(struct trace_event_call *call)
star = true;
continue;
}
- if ((fmt[i + j] == 's') && star)
- arg++;
+ if ((fmt[i + j] == 's')) {
+ if (star)
+ arg++;
+ if (WARN_ONCE(arg == 63,
+ "Too many args for event: %s",
+ trace_event_name(call)))
+ return;
+ dereference_flags |= 1ULL << arg;
+ string_flags |= 1ULL << arg;
+ }
break;
}
break;
@@ -403,42 +537,47 @@ static void test_event_printk(struct trace_event_call *call)
case ',':
if (in_quote || parens)
continue;
+ e = i;
i++;
while (isspace(fmt[i]))
i++;
- start_arg = i;
- if (!(dereference_flags & (1ULL << arg)))
- goto next_arg;
- /* Find the REC-> in the argument */
- c = strchr(fmt + i, ',');
- r = strstr(fmt + i, "REC->");
- if (r && (!c || r < c)) {
- /*
- * Addresses of events on the buffer,
- * or an array on the buffer is
- * OK to dereference.
- * There's ways to fool this, but
- * this is to catch common mistakes,
- * not malicious code.
- */
- a = strchr(fmt + i, '&');
- if ((a && (a < r)) || test_field(r, call))
+ /*
+ * If start_arg is zero, then this is the start of the
+ * first argument. The processing of the argument happens
+ * when the end of the argument is found, as it needs to
+ * handle paranthesis and such.
+ */
+ if (!start_arg) {
+ start_arg = i;
+ /* Balance out the i++ in the for loop */
+ i--;
+ continue;
+ }
+
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, e - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, e - start_arg, call))
dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
}
- next_arg:
- i--;
+ start_arg = i;
arg++;
+ /* Balance out the i++ in the for loop */
+ i--;
}
}
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ }
+
/*
* If you triggered the below warning, the trace event reported
* uses an unsafe dereference pointer %p*. As the data stored
@@ -2471,7 +2610,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index c62d1629cffe..b8f3c4ba309b 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -134,7 +134,7 @@ static int
process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
void *dest, void *base)
{
- struct pt_regs *regs = rec;
+ struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
@@ -142,17 +142,17 @@ retry:
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
- val = regs_get_kernel_stack_nth(regs, code->param);
+ val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
- val = kernel_stack_pointer(regs);
+ val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
- val = regs_return_value(regs);
+ val = ftrace_regs_get_return_value(fregs);
break;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
- val = regs_get_kernel_argument(regs, code->param);
+ val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -175,7 +175,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
/* function entry handler */
static nokprobe_inline void
__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs,
+ struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
{
struct fentry_trace_entry_head *entry;
@@ -189,41 +189,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
- dsize = __get_data_size(&tf->tp, regs, NULL);
+ dsize = __get_data_size(&tf->tp, fregs, NULL);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->ip = entry_ip;
- store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, NULL, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
- __fentry_trace_func(tf, entry_ip, regs, link->file);
+ __fentry_trace_func(tf, entry_ip, fregs, link->file);
}
NOKPROBE_SYMBOL(fentry_trace_func);
+static nokprobe_inline
+void store_fprobe_entry_data(void *edata, struct trace_probe *tp, struct ftrace_regs *fregs)
+{
+ struct probe_entry_arg *earg = tp->entry_arg;
+ unsigned long val = 0;
+ int i;
+
+ if (!earg)
+ return;
+
+ for (i = 0; i < earg->size; i++) {
+ struct fetch_insn *code = &earg->code[i];
+
+ switch (code->op) {
+ case FETCH_OP_ARG:
+ val = ftrace_regs_get_argument(fregs, code->param);
+ break;
+ case FETCH_OP_ST_EDATA:
+ *(unsigned long *)((unsigned long)edata + code->offset) = val;
+ break;
+ case FETCH_OP_END:
+ goto end;
+ default:
+ break;
+ }
+ }
+end:
+ return;
+}
+
/* function exit handler */
static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (tf->tp.entry_arg)
- store_trace_entry_data(entry_data, &tf->tp, regs);
+ store_fprobe_entry_data(entry_data, &tf->tp, fregs);
return 0;
}
@@ -231,7 +261,7 @@ NOKPROBE_SYMBOL(trace_fprobe_entry_handler)
static nokprobe_inline void
__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data, struct trace_event_file *trace_file)
{
struct fexit_trace_entry_head *entry;
@@ -245,60 +275,63 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
- dsize = __get_data_size(&tf->tp, regs, entry_data);
+ dsize = __get_data_size(&tf->tp, fregs, entry_data);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, entry_data, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs, void *entry_data)
+ unsigned long ret_ip, struct ftrace_regs *fregs, void *entry_data)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
- __fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file);
+ __fexit_trace_func(tf, entry_ip, ret_ip, fregs, entry_data, link->file);
}
NOKPROBE_SYMBOL(fexit_trace_func);
#ifdef CONFIG_PERF_EVENTS
static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fentry_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
+ struct pt_regs *regs;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return 0;
- dsize = __get_data_size(&tf->tp, regs, NULL);
+ dsize = __get_data_size(&tf->tp, fregs, NULL);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ entry = perf_trace_buf_alloc(size, &regs, &rctx);
if (!entry)
return 0;
+ regs = ftrace_fill_perf_regs(fregs, regs);
+
entry->ip = entry_ip;
memset(&entry[1], 0, dsize);
- store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, NULL, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
@@ -307,31 +340,34 @@ NOKPROBE_SYMBOL(fentry_perf_func);
static void
fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fexit_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
+ struct pt_regs *regs;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return;
- dsize = __get_data_size(&tf->tp, regs, entry_data);
+ dsize = __get_data_size(&tf->tp, fregs, entry_data);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ entry = perf_trace_buf_alloc(size, &regs, &rctx);
if (!entry)
return;
+ regs = ftrace_fill_perf_regs(fregs, regs);
+
entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, entry_data, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
@@ -339,33 +375,34 @@ NOKPROBE_SYMBOL(fexit_perf_func);
#endif /* CONFIG_PERF_EVENTS */
static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
int ret = 0;
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fentry_trace_func(tf, entry_ip, regs);
+ fentry_trace_func(tf, entry_ip, fregs);
+
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- ret = fentry_perf_func(tf, entry_ip, regs);
+ ret = fentry_perf_func(tf, entry_ip, fregs);
#endif
return ret;
}
NOKPROBE_SYMBOL(fentry_dispatcher);
static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data);
+ fexit_trace_func(tf, entry_ip, ret_ip, fregs, entry_data);
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data);
+ fexit_perf_func(tf, entry_ip, ret_ip, fregs, entry_data);
#endif
}
NOKPROBE_SYMBOL(fexit_dispatcher);
@@ -379,6 +416,9 @@ static void free_trace_fprobe(struct trace_fprobe *tf)
}
}
+/* Since alloc_trace_fprobe() can return error, check the pointer is ERR too. */
+DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) free_trace_fprobe(_T))
+
/*
* Allocate new trace_probe and initialize it (including fprobe).
*/
@@ -387,10 +427,9 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *symbol,
struct tracepoint *tpoint,
struct module *mod,
- int maxactive,
int nargs, bool is_return)
{
- struct trace_fprobe *tf;
+ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
int ret = -ENOMEM;
tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
@@ -399,7 +438,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
tf->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tf->symbol)
- goto error;
+ return ERR_PTR(-ENOMEM);
if (is_return)
tf->fp.exit_handler = fexit_dispatcher;
@@ -408,17 +447,13 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
tf->tpoint = tpoint;
tf->mod = mod;
- tf->fp.nr_maxactive = maxactive;
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
dyn_event_init(&tf->devent, &trace_fprobe_ops);
- return tf;
-error:
- free_trace_fprobe(tf);
- return ERR_PTR(ret);
+ return_ptr(tf);
}
static struct trace_fprobe *find_trace_fprobe(const char *event,
@@ -845,14 +880,12 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
struct trace_fprobe *old_tf;
int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
trace_probe_group_name(&tf->tp));
- if (old_tf) {
- ret = append_trace_fprobe(tf, old_tf);
- goto end;
- }
+ if (old_tf)
+ return append_trace_fprobe(tf, old_tf);
/* Register new event */
ret = register_fprobe_event(tf);
@@ -862,7 +895,7 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
trace_probe_log_err(0, EVENT_EXIST);
} else
pr_warn("Failed to register probe event(%d)\n", ret);
- goto end;
+ return ret;
}
/* Register fprobe */
@@ -872,8 +905,6 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
else
dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
-end:
- mutex_unlock(&event_mutex);
return ret;
}
@@ -1034,7 +1065,10 @@ static int parse_symbol_and_return(int argc, const char *argv[],
return 0;
}
-static int __trace_fprobe_create(int argc, const char *argv[])
+DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T))
+
+static int trace_fprobe_create_internal(int argc, const char *argv[],
+ struct traceprobe_parse_context *ctx)
{
/*
* Argument syntax:
@@ -1060,24 +1094,20 @@ static int __trace_fprobe_create(int argc, const char *argv[])
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
- struct trace_fprobe *tf = NULL;
- int i, len, new_argc = 0, ret = 0;
+ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
+ int i, new_argc = 0, ret = 0;
bool is_return = false;
- char *symbol = NULL;
+ char *symbol __free(kfree) = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
- const char **new_argv = NULL;
- int maxactive = 0;
+ const char **new_argv __free(kfree) = NULL;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
- char *dbuf = NULL;
+ char *dbuf __free(kfree) = NULL;
bool is_tracepoint = false;
- struct module *tp_mod = NULL;
+ struct module *tp_mod __free(module_put) = NULL;
struct tracepoint *tpoint = NULL;
- struct traceprobe_parse_context ctx = {
- .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
- };
if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
return -ECANCELED;
@@ -1087,35 +1117,13 @@ static int __trace_fprobe_create(int argc, const char *argv[])
group = TRACEPOINT_EVENT_SYSTEM;
}
- trace_probe_log_init("trace_fprobe", argc, argv);
-
- event = strchr(&argv[0][1], ':');
- if (event)
- event++;
-
- if (isdigit(argv[0][1])) {
- if (event)
- len = event - &argv[0][1] - 1;
- else
- len = strlen(&argv[0][1]);
- if (len > MAX_EVENT_NAME_LEN - 1) {
- trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
- }
- memcpy(buf, &argv[0][1], len);
- buf[len] = '\0';
- ret = kstrtouint(buf, 0, &maxactive);
- if (ret || !maxactive) {
+ if (argv[0][1] != '\0') {
+ if (argv[0][1] != ':') {
+ trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
- }
- /* fprobe rethook instances are iterated over via a list. The
- * maximum should stay reasonable.
- */
- if (maxactive > RETHOOK_MAXACTIVE_MAX) {
- trace_probe_log_err(1, MAXACT_TOO_BIG);
- goto parse_error;
+ return -EINVAL;
}
+ event = &argv[0][2];
}
trace_probe_log_set_index(1);
@@ -1123,20 +1131,14 @@ static int __trace_fprobe_create(int argc, const char *argv[])
/* a symbol(or tracepoint) must be specified */
ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint);
if (ret < 0)
- goto parse_error;
-
- if (!is_return && maxactive) {
- trace_probe_log_set_index(0);
- trace_probe_log_err(1, BAD_MAXACT_TYPE);
- goto parse_error;
- }
+ return -EINVAL;
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
- goto parse_error;
+ return -EINVAL;
}
if (!event) {
@@ -1152,67 +1154,62 @@ static int __trace_fprobe_create(int argc, const char *argv[])
}
if (is_return)
- ctx.flags |= TPARG_FL_RETURN;
+ ctx->flags |= TPARG_FL_RETURN;
else
- ctx.flags |= TPARG_FL_FENTRY;
+ ctx->flags |= TPARG_FL_FENTRY;
if (is_tracepoint) {
- ctx.flags |= TPARG_FL_TPOINT;
+ ctx->flags |= TPARG_FL_TPOINT;
tpoint = find_tracepoint(symbol, &tp_mod);
if (tpoint) {
- ctx.funcname = kallsyms_lookup(
+ ctx->funcname = kallsyms_lookup(
(unsigned long)tpoint->probestub,
NULL, NULL, NULL, sbuf);
} else if (IS_ENABLED(CONFIG_MODULES)) {
/* This *may* be loaded afterwards */
tpoint = TRACEPOINT_STUB;
- ctx.funcname = symbol;
+ ctx->funcname = symbol;
} else {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
- goto parse_error;
+ return -EINVAL;
}
} else
- ctx.funcname = symbol;
+ ctx->funcname = symbol;
argc -= 2; argv += 2;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
- abuf, MAX_BTF_ARGS_LEN, &ctx);
- if (IS_ERR(new_argv)) {
- ret = PTR_ERR(new_argv);
- new_argv = NULL;
- goto out;
- }
+ abuf, MAX_BTF_ARGS_LEN, ctx);
+ if (IS_ERR(new_argv))
+ return PTR_ERR(new_argv);
if (new_argv) {
argc = new_argc;
argv = new_argv;
}
- if (argc > MAX_TRACE_ARGS) {
- ret = -E2BIG;
- goto out;
- }
+ if (argc > MAX_TRACE_ARGS)
+ return -E2BIG;
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
- goto out;
+ return ret;
/* setup a probe */
tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
- maxactive, argc, is_return);
+ argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
- goto out; /* We know tf is not allocated */
+ return ret;
}
/* parse arguments */
for (i = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
- ctx.offset = 0;
- ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
+ ctx->offset = 0;
+ ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], ctx);
if (ret)
- goto error; /* This can be -ENOMEM */
+ return ret; /* This can be -ENOMEM */
}
if (is_return && tf->tp.entry_arg) {
@@ -1223,7 +1220,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
ret = traceprobe_set_print_fmt(&tf->tp,
is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
if (ret < 0)
- goto error;
+ return ret;
ret = register_trace_fprobe(tf);
if (ret) {
@@ -1234,29 +1231,32 @@ static int __trace_fprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, BAD_PROBE_ADDR);
else if (ret != -ENOMEM && ret != -EEXIST)
trace_probe_log_err(0, FAIL_REG_PROBE);
- goto error;
+ return -EINVAL;
}
-out:
- if (tp_mod)
- module_put(tp_mod);
+ /* 'tf' is successfully registered. To avoid freeing, assign NULL. */
+ tf = NULL;
+
+ return 0;
+}
+
+static int trace_fprobe_create_cb(int argc, const char *argv[])
+{
+ struct traceprobe_parse_context ctx = {
+ .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
+ };
+ int ret;
+
+ trace_probe_log_init("trace_fprobe", argc, argv);
+ ret = trace_fprobe_create_internal(argc, argv, &ctx);
traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
- kfree(new_argv);
- kfree(symbol);
- kfree(dbuf);
return ret;
-
-parse_error:
- ret = -EINVAL;
-error:
- free_trace_fprobe(tf);
- goto out;
}
static int trace_fprobe_create(const char *raw_command)
{
- return trace_probe_create(raw_command, __trace_fprobe_create);
+ return trace_probe_create(raw_command, trace_fprobe_create_cb);
}
static int trace_fprobe_release(struct dyn_event *ev)
@@ -1278,8 +1278,6 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
seq_putc(m, 't');
else
seq_putc(m, 'f');
- if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
- seq_printf(m, "%d", tf->fp.nr_maxactive);
seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
trace_probe_name(&tf->tp));
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 74c353164ca1..d358c9935164 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,7 +176,8 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->array_buffer);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* fregs are guaranteed not to be NULL if HAVE_DYNAMIC_FTRACE_WITH_ARGS is set */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 5504b5e4e7b4..dc62eb93837a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -175,16 +175,16 @@ struct fgraph_times {
};
int trace_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
- unsigned long flags;
unsigned int trace_ctx;
long disabled;
- int ret;
+ int ret = 0;
int cpu;
if (*task_var & TRACE_GRAPH_NOTRACE)
@@ -235,25 +235,21 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (tracing_thresh)
return 1;
- local_irq_save(flags);
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
- trace_ctx = tracing_gen_ctx_flags(flags);
- if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
- tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
+ disabled = atomic_read(&data->disabled);
+ if (likely(!disabled)) {
+ trace_ctx = tracing_gen_ctx();
+ if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
+ tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
-
ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
- } else
+ } else {
ret = __trace_graph_entry(tr, trace, trace_ctx);
- } else {
- ret = 0;
+ }
}
-
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ preempt_enable_notrace();
return ret;
}
@@ -314,13 +310,12 @@ static void handle_nosleeptime(struct ftrace_graph_ret *trace,
}
void trace_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops, struct ftrace_regs *fregs)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
- unsigned long flags;
unsigned int trace_ctx;
long disabled;
int size;
@@ -341,20 +336,20 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
trace->calltime = ftimes->calltime;
- local_irq_save(flags);
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
- trace_ctx = tracing_gen_ctx_flags(flags);
+ disabled = atomic_read(&data->disabled);
+ if (likely(!disabled)) {
+ trace_ctx = tracing_gen_ctx();
__trace_graph_return(tr, trace, trace_ctx);
}
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ preempt_enable_notrace();
}
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_times *ftimes;
int size;
@@ -378,7 +373,7 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
(trace->rettime - ftimes->calltime < tracing_thresh))
return;
else
- trace_graph_return(trace, gops);
+ trace_graph_return(trace, gops, fregs);
}
static struct fgraph_ops funcgraph_ops = {
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index fce064e20570..08786c59d397 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -176,12 +176,14 @@ static int irqsoff_display_graph(struct trace_array *tr, int set)
}
static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
+ u64 *calltime;
int ret;
if (ftrace_graph_ignore_func(gops, trace))
@@ -199,6 +201,12 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
if (!func_prolog_dec(tr, &data, &flags))
return 0;
+ calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
+ if (!calltime)
+ return 0;
+
+ *calltime = trace_clock_local();
+
trace_ctx = tracing_gen_ctx_flags(flags);
ret = __trace_graph_entry(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
@@ -207,18 +215,26 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
+ u64 *calltime;
+ int size;
ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_dec(tr, &data, &flags))
return;
+ calltime = fgraph_retrieve_data(gops->idx, &size);
+ if (!calltime)
+ return;
+ trace->calltime = *calltime;
+
trace_ctx = tracing_gen_ctx_flags(flags);
__trace_graph_return(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 263fac44d3ca..0642ea174849 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -725,7 +725,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
static struct notifier_block trace_kprobe_module_nb = {
.notifier_call = trace_kprobe_module_callback,
- .priority = 1 /* Invoked after kprobe module callback */
+ .priority = 2 /* Invoked after kprobe and jump_label module callback */
};
static int trace_kprobe_register_module_notifier(void)
{
@@ -940,8 +940,10 @@ static int __trace_kprobe_create(int argc, const char *argv[])
}
/* a symbol specified */
symbol = kstrdup(argv[1], GFP_KERNEL);
- if (!symbol)
- return -ENOMEM;
+ if (!symbol) {
+ ret = -ENOMEM;
+ goto error;
+ }
tmp = strchr(symbol, '%');
if (tmp) {
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 2caf0d2afb32..f39b37fcdb3b 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -232,7 +232,7 @@ array:
/* Sum up total data length for dynamic arrays (strings) */
static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs, void *edata)
+__get_data_size(struct trace_probe *tp, void *regs, void *edata)
{
struct probe_arg *arg;
int i, len, ret = 0;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 573b5d8e8a28..cb49f7279dc8 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -442,7 +442,7 @@ int trace_alloc_tgid_map(void)
if (tgid_map)
return 0;
- tgid_map_max = pid_max;
+ tgid_map_max = init_pid_ns.pid_max;
map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
GFP_KERNEL);
if (!map)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index d6c7f18daa15..f372252dc8bb 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -113,11 +113,13 @@ static int wakeup_display_graph(struct trace_array *tr, int set)
}
static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
+ u64 *calltime;
int ret = 0;
if (ftrace_graph_ignore_func(gops, trace))
@@ -135,6 +137,12 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return 0;
+ calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
+ if (!calltime)
+ return 0;
+
+ *calltime = trace_clock_local();
+
ret = __trace_graph_entry(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
preempt_enable_notrace();
@@ -143,17 +151,25 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
+ u64 *calltime;
+ int size;
ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return;
+ calltime = fgraph_retrieve_data(gops->idx, &size);
+ if (!calltime)
+ return;
+ trace->calltime = *calltime;
+
__trace_graph_return(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 38b5754790c9..d88c44f1dfa5 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -774,7 +774,8 @@ struct fgraph_fixture {
};
static __init int store_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
const char *type = fixture->store_type_name;
@@ -807,7 +808,8 @@ static __init int store_entry(struct ftrace_graph_ent *trace,
}
static __init void store_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
const char *type = fixture->store_type_name;
@@ -1025,7 +1027,8 @@ static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
/* This is harmlessly racy, we want to approximately detect a hang */
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
@@ -1039,7 +1042,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace,
return 0;
}
- return trace_graph_entry(trace, gops);
+ return trace_graph_entry(trace, gops, fregs);
}
static struct fgraph_ops fgraph_ops __initdata = {
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index fed382b7881b..4875e7f5de3d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1402,9 +1402,13 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
#ifdef CONFIG_BPF_EVENTS
if (bpf_prog_array_valid(call)) {
+ const struct bpf_prog_array *array;
u32 ret;
- ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run);
+ rcu_read_lock_trace();
+ array = rcu_dereference_check(call->prog_array, rcu_read_lock_trace_held());
+ ret = bpf_prog_run_array_uprobe(array, regs, bpf_prog_run);
+ rcu_read_unlock_trace();
if (!ret)
return;
}