summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-04-17 12:23:49 -0700
committerJakub Kicinski <kuba@kernel.org>2025-04-17 12:26:50 -0700
commit240ce924d2718b8f6f622f2a9a9c219b9da736e8 (patch)
treea417bc3d31264e2c317614b25c4a7a6f688916b5 /kernel
parent22ab6b9467c1822291a1175a0eb825b7ec057ef9 (diff)
parentb5c6891b2c5b54bf58069966296917da46cda6f2 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.15-rc3). No conflicts. Adjacent changes: tools/net/ynl/pyynl/ynl_gen_c.py 4d07bbf2d456 ("tools: ynl-gen: don't declare loop iterator in place") 7e8ba0c7de2b ("tools: ynl: don't use genlmsghdr in classic netlink") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/queue_stack_maps.c35
-rw-r--r--kernel/bpf/ringbuf.c17
-rw-r--r--kernel/bpf/rqspinlock.c2
-rw-r--r--kernel/events/core.c70
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/time/hrtimer.c2
-rw-r--r--kernel/time/tick-common.c22
-rw-r--r--kernel/trace/ftrace.c314
-rw-r--r--kernel/trace/rv/rv.c7
-rw-r--r--kernel/trace/trace.c7
-rw-r--r--kernel/trace/trace_events_synth.c1
-rw-r--r--kernel/trace/trace_functions_graph.c11
12 files changed, 272 insertions, 231 deletions
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index d869f51ea93a..9a5f94371e50 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -9,13 +9,14 @@
#include <linux/slab.h>
#include <linux/btf_ids.h>
#include "percpu_freelist.h"
+#include <asm/rqspinlock.h>
#define QUEUE_STACK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_queue_stack {
struct bpf_map map;
- raw_spinlock_t lock;
+ rqspinlock_t lock;
u32 head, tail;
u32 size; /* max_entries + 1 */
@@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
qs->size = size;
- raw_spin_lock_init(&qs->lock);
+ raw_res_spin_lock_init(&qs->lock);
return &qs->map;
}
@@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
int err = 0;
void *ptr;
- if (in_nmi()) {
- if (!raw_spin_trylock_irqsave(&qs->lock, flags))
- return -EBUSY;
- } else {
- raw_spin_lock_irqsave(&qs->lock, flags);
- }
+ if (raw_res_spin_lock_irqsave(&qs->lock, flags))
+ return -EBUSY;
if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
@@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
}
out:
- raw_spin_unlock_irqrestore(&qs->lock, flags);
+ raw_res_spin_unlock_irqrestore(&qs->lock, flags);
return err;
}
@@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
void *ptr;
u32 index;
- if (in_nmi()) {
- if (!raw_spin_trylock_irqsave(&qs->lock, flags))
- return -EBUSY;
- } else {
- raw_spin_lock_irqsave(&qs->lock, flags);
- }
+ if (raw_res_spin_lock_irqsave(&qs->lock, flags))
+ return -EBUSY;
if (queue_stack_map_is_empty(qs)) {
memset(value, 0, qs->map.value_size);
@@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
qs->head = index;
out:
- raw_spin_unlock_irqrestore(&qs->lock, flags);
+ raw_res_spin_unlock_irqrestore(&qs->lock, flags);
return err;
}
@@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
if (flags & BPF_NOEXIST || flags > BPF_EXIST)
return -EINVAL;
- if (in_nmi()) {
- if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
- return -EBUSY;
- } else {
- raw_spin_lock_irqsave(&qs->lock, irq_flags);
- }
+ if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags))
+ return -EBUSY;
if (queue_stack_map_is_full(qs)) {
if (!replace) {
@@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
qs->head = 0;
out:
- raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+ raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags);
return err;
}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 1499d8caa9a3..719d73299397 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -11,6 +11,7 @@
#include <linux/kmemleak.h>
#include <uapi/linux/btf.h>
#include <linux/btf_ids.h>
+#include <asm/rqspinlock.h>
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
@@ -29,7 +30,7 @@ struct bpf_ringbuf {
u64 mask;
struct page **pages;
int nr_pages;
- raw_spinlock_t spinlock ____cacheline_aligned_in_smp;
+ rqspinlock_t spinlock ____cacheline_aligned_in_smp;
/* For user-space producer ring buffers, an atomic_t busy bit is used
* to synchronize access to the ring buffers in the kernel, rather than
* the spinlock that is used for kernel-producer ring buffers. This is
@@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
if (!rb)
return NULL;
- raw_spin_lock_init(&rb->spinlock);
+ raw_res_spin_lock_init(&rb->spinlock);
atomic_set(&rb->busy, 0);
init_waitqueue_head(&rb->waitq);
init_irq_work(&rb->work, bpf_ringbuf_notify);
@@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
cons_pos = smp_load_acquire(&rb->consumer_pos);
- if (in_nmi()) {
- if (!raw_spin_trylock_irqsave(&rb->spinlock, flags))
- return NULL;
- } else {
- raw_spin_lock_irqsave(&rb->spinlock, flags);
- }
+ if (raw_res_spin_lock_irqsave(&rb->spinlock, flags))
+ return NULL;
pend_pos = rb->pending_pos;
prod_pos = rb->producer_pos;
@@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
*/
if (new_prod_pos - cons_pos > rb->mask ||
new_prod_pos - pend_pos > rb->mask) {
- raw_spin_unlock_irqrestore(&rb->spinlock, flags);
+ raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
return NULL;
}
@@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
/* pairs with consumer's smp_load_acquire() */
smp_store_release(&rb->producer_pos, new_prod_pos);
- raw_spin_unlock_irqrestore(&rb->spinlock, flags);
+ raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
return (void *)hdr + BPF_RINGBUF_HDR_SZ;
}
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index b896c4a75a5c..338305c8852c 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
})
#else
#define RES_CHECK_TIMEOUT(ts, ret, mask) \
- ({ (ret) = check_timeout(&(ts)); })
+ ({ (ret) = check_timeout((lock), (mask), &(ts)); })
#endif
/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 128db74e9eab..e93c19565914 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event,
static void perf_free_addr_filters(struct perf_event *event);
-static void perf_pending_task_sync(struct perf_event *event)
-{
- struct callback_head *head = &event->pending_task;
-
- if (!event->pending_work)
- return;
- /*
- * If the task is queued to the current task's queue, we
- * obviously can't wait for it to complete. Simply cancel it.
- */
- if (task_work_cancel(current, head)) {
- event->pending_work = 0;
- local_dec(&event->ctx->nr_no_switch_fast);
- return;
- }
-
- /*
- * All accesses related to the event are within the same RCU section in
- * perf_pending_task(). The RCU grace period before the event is freed
- * will make sure all those accesses are complete by then.
- */
- rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
-}
-
/* vs perf_event_alloc() error */
static void __free_event(struct perf_event *event)
{
@@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending_irq);
irq_work_sync(&event->pending_disable_irq);
- perf_pending_task_sync(event);
unaccount_event(event);
@@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event)
static void put_event(struct perf_event *event)
{
+ struct perf_event *parent;
+
if (!atomic_long_dec_and_test(&event->refcount))
return;
+ parent = event->parent;
_free_event(event);
+
+ /* Matches the refcount bump in inherit_event() */
+ if (parent)
+ put_event(parent);
}
/*
@@ -5779,11 +5761,6 @@ again:
if (tmp == child) {
perf_remove_from_context(child, DETACH_GROUP);
list_move(&child->child_list, &free_list);
- /*
- * This matches the refcount bump in inherit_event();
- * this can't be the last reference.
- */
- put_event(event);
} else {
var = &ctx->refcount;
}
@@ -5809,7 +5786,8 @@ again:
void *var = &child->ctx->refcount;
list_del(&child->child_list);
- free_event(child);
+ /* Last reference unless ->pending_task work is pending */
+ put_event(child);
/*
* Wake any perf_event_free_task() waiting for this event to be
@@ -5820,7 +5798,11 @@ again:
}
no_ctx:
- put_event(event); /* Must be the 'last' reference */
+ /*
+ * Last reference unless ->pending_task work is pending on this event
+ * or any of its children.
+ */
+ put_event(event);
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -7236,12 +7218,6 @@ static void perf_pending_task(struct callback_head *head)
int rctx;
/*
- * All accesses to the event must belong to the same implicit RCU read-side
- * critical section as the ->pending_work reset. See comment in
- * perf_pending_task_sync().
- */
- rcu_read_lock();
- /*
* If we 'fail' here, that's OK, it means recursion is already disabled
* and we won't recurse 'further'.
*/
@@ -7251,9 +7227,8 @@ static void perf_pending_task(struct callback_head *head)
event->pending_work = 0;
perf_sigtrap(event);
local_dec(&event->ctx->nr_no_switch_fast);
- rcuwait_wake_up(&event->pending_work_wait);
}
- rcu_read_unlock();
+ put_event(event);
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
@@ -10248,6 +10223,7 @@ static int __perf_event_overflow(struct perf_event *event,
!task_work_add(current, &event->pending_task, notify_mode)) {
event->pending_work = pending_id;
local_inc(&event->ctx->nr_no_switch_fast);
+ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
event->pending_addr = 0;
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
@@ -12610,7 +12586,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_irq_work(&event->pending_irq, perf_pending_irq);
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
init_task_work(&event->pending_task, perf_pending_task);
- rcuwait_init(&event->pending_work_wait);
mutex_init(&event->mmap_mutex);
raw_spin_lock_init(&event->addr_filters.lock);
@@ -13747,8 +13722,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
* Kick perf_poll() for is_event_hup();
*/
perf_event_wakeup(parent_event);
- free_event(event);
- put_event(parent_event);
+ put_event(event);
return;
}
@@ -13872,13 +13846,11 @@ static void perf_free_event(struct perf_event *event,
list_del_init(&event->child_list);
mutex_unlock(&parent->child_mutex);
- put_event(parent);
-
raw_spin_lock_irq(&ctx->lock);
perf_group_detach(event);
list_del_event(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
- free_event(event);
+ put_event(event);
}
/*
@@ -14016,6 +13988,9 @@ inherit_event(struct perf_event *parent_event,
if (IS_ERR(child_event))
return child_event;
+ get_ctx(child_ctx);
+ child_event->ctx = child_ctx;
+
pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
if (IS_ERR(pmu_ctx)) {
free_event(child_event);
@@ -14037,8 +14012,6 @@ inherit_event(struct perf_event *parent_event,
return NULL;
}
- get_ctx(child_ctx);
-
/*
* Make the child state follow the state of the parent event,
* not its attr.disabled bit. We hold the parent's mutex,
@@ -14059,7 +14032,6 @@ inherit_event(struct perf_event *parent_event,
local64_set(&hwc->period_left, sample_period);
}
- child_event->ctx = child_ctx;
child_event->overflow_handler = parent_event->overflow_handler;
child_event->overflow_handler_context
= parent_event->overflow_handler_context;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 615b4e6d22c7..8d783b5882b6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1956,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask,
* to-be-reused return instances for future uretprobes. If ri_timer()
* happens to be running right now, though, we fallback to safety and
* just perform RCU-delated freeing of ri.
+ * Admittedly, this is a rather simple use of seqcount, but it nicely
+ * abstracts away all the necessary memory barriers, so we use
+ * a well-supported kernel primitive here.
*/
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
/* immediate reuse of ri without RCU GP is OK */
@@ -2016,12 +2019,20 @@ static void ri_timer(struct timer_list *timer)
/* RCU protects return_instance from freeing. */
guard(rcu)();
- write_seqcount_begin(&utask->ri_seqcount);
+ /*
+ * See free_ret_instance() for notes on seqcount use.
+ * We also employ raw API variants to avoid lockdep false-positive
+ * warning complaining about enabled preemption. The timer can only be
+ * invoked once for a uprobe_task. Therefore there can only be one
+ * writer. The reader does not require an even sequence count to make
+ * progress, so it is OK to remain preemptible on PREEMPT_RT.
+ */
+ raw_write_seqcount_begin(&utask->ri_seqcount);
for_each_ret_instance_rcu(ri, utask->return_instances)
hprobe_expire(&ri->hprobe, false);
- write_seqcount_end(&utask->ri_seqcount);
+ raw_write_seqcount_end(&utask->ri_seqcount);
}
static struct uprobe_task *alloc_utask(void)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 517ee2590a29..30899a8cc52c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -366,7 +366,7 @@ static const struct debug_obj_descr hrtimer_debug_descr;
static void *hrtimer_debug_hint(void *addr)
{
- return ((struct hrtimer *) addr)->function;
+ return ACCESS_PRIVATE((struct hrtimer *)addr, function);
}
/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a47bcf71defc..9a3859443c04 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -509,6 +509,7 @@ void tick_resume(void)
#ifdef CONFIG_SUSPEND
static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
+static DEFINE_WAIT_OVERRIDE_MAP(tick_freeze_map, LD_WAIT_SLEEP);
static unsigned int tick_freeze_depth;
/**
@@ -528,9 +529,22 @@ void tick_freeze(void)
if (tick_freeze_depth == num_online_cpus()) {
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
+ /*
+ * All other CPUs have their interrupts disabled and are
+ * suspended to idle. Other tasks have been frozen so there
+ * is no scheduling happening. This means that there is no
+ * concurrency in the system at this point. Therefore it is
+ * okay to acquire a sleeping lock on PREEMPT_RT, such as a
+ * spinlock, because the lock cannot be held by other CPUs
+ * or threads and acquiring it cannot block.
+ *
+ * Inform lockdep about the situation.
+ */
+ lock_map_acquire_try(&tick_freeze_map);
system_state = SYSTEM_SUSPEND;
sched_clock_suspend();
timekeeping_suspend();
+ lock_map_release(&tick_freeze_map);
} else {
tick_suspend_local();
}
@@ -552,8 +566,16 @@ void tick_unfreeze(void)
raw_spin_lock(&tick_freeze_lock);
if (tick_freeze_depth == num_online_cpus()) {
+ /*
+ * Similar to tick_freeze(). On resumption the first CPU may
+ * acquire uncontended sleeping locks while other CPUs block on
+ * tick_freeze_lock.
+ */
+ lock_map_acquire_try(&tick_freeze_map);
timekeeping_resume();
sched_clock_resume();
+ lock_map_release(&tick_freeze_map);
+
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1a48aedb5255..a8a02868b435 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3256,6 +3256,31 @@ static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
}
/*
+ * Remove functions from @hash that are in @notrace_hash
+ */
+static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash)
+{
+ struct ftrace_func_entry *entry;
+ struct hlist_node *tmp;
+ int size;
+ int i;
+
+ /* If the notrace hash is empty, there's nothing to do */
+ if (ftrace_hash_empty(notrace_hash))
+ return;
+
+ size = 1 << hash->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) {
+ if (!__ftrace_lookup_ip(notrace_hash, entry->ip))
+ continue;
+ remove_hash_entry(hash, entry);
+ kfree(entry);
+ }
+ }
+}
+
+/*
* Add to @hash only those that are in both @new_hash1 and @new_hash2
*
* The notrace_hash updates uses just the intersect_hash() function
@@ -3295,67 +3320,6 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
return 0;
}
-/* Return a new hash that has a union of all @ops->filter_hash entries */
-static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
-{
- struct ftrace_hash *new_hash = NULL;
- struct ftrace_ops *subops;
- int size_bits;
- int ret;
-
- if (ops->func_hash->filter_hash)
- size_bits = ops->func_hash->filter_hash->size_bits;
- else
- size_bits = FTRACE_HASH_DEFAULT_BITS;
-
- list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
- if (ret < 0) {
- free_ftrace_hash(new_hash);
- return NULL;
- }
- /* Nothing more to do if new_hash is empty */
- if (ftrace_hash_empty(new_hash))
- break;
- }
- /* Can't return NULL as that means this failed */
- return new_hash ? : EMPTY_HASH;
-}
-
-/* Make @ops trace evenything except what all its subops do not trace */
-static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops)
-{
- struct ftrace_hash *new_hash = NULL;
- struct ftrace_ops *subops;
- int size_bits;
- int ret;
-
- list_for_each_entry(subops, &ops->subop_list, list) {
- struct ftrace_hash *next_hash;
-
- if (!new_hash) {
- size_bits = subops->func_hash->notrace_hash->size_bits;
- new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash);
- if (!new_hash)
- return NULL;
- continue;
- }
- size_bits = new_hash->size_bits;
- next_hash = new_hash;
- new_hash = alloc_ftrace_hash(size_bits);
- ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash);
- free_ftrace_hash(next_hash);
- if (ret < 0) {
- free_ftrace_hash(new_hash);
- return NULL;
- }
- /* Nothing more to do if new_hash is empty */
- if (ftrace_hash_empty(new_hash))
- break;
- }
- return new_hash;
-}
-
static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B)
{
struct ftrace_func_entry *entry;
@@ -3427,6 +3391,93 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_
return 0;
}
+static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash,
+ struct ftrace_ops_hash *func_hash)
+{
+ /* If the filter hash is not empty, simply remove the nohash from it */
+ if (!ftrace_hash_empty(func_hash->filter_hash)) {
+ *filter_hash = copy_hash(func_hash->filter_hash);
+ if (!*filter_hash)
+ return -ENOMEM;
+ remove_hash(*filter_hash, func_hash->notrace_hash);
+ *notrace_hash = EMPTY_HASH;
+
+ } else {
+ *notrace_hash = copy_hash(func_hash->notrace_hash);
+ if (!*notrace_hash)
+ return -ENOMEM;
+ *filter_hash = EMPTY_HASH;
+ }
+ return 0;
+}
+
+static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash,
+ struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash)
+{
+ int size_bits;
+ int ret;
+
+ /* If the subops trace all functions so must the main ops */
+ if (ftrace_hash_empty(ops_hash->filter_hash) ||
+ ftrace_hash_empty(subops_hash->filter_hash)) {
+ *filter_hash = EMPTY_HASH;
+ } else {
+ /*
+ * The main ops filter hash is not empty, so its
+ * notrace_hash had better be, as the notrace hash
+ * is only used for empty main filter hashes.
+ */
+ WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash));
+
+ size_bits = max(ops_hash->filter_hash->size_bits,
+ subops_hash->filter_hash->size_bits);
+
+ /* Copy the subops hash */
+ *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash);
+ if (!filter_hash)
+ return -ENOMEM;
+ /* Remove any notrace functions from the copy */
+ remove_hash(*filter_hash, subops_hash->notrace_hash);
+
+ ret = append_hash(filter_hash, ops_hash->filter_hash,
+ size_bits);
+ if (ret < 0) {
+ free_ftrace_hash(*filter_hash);
+ return ret;
+ }
+ }
+
+ /*
+ * Only process notrace hashes if the main filter hash is empty
+ * (tracing all functions), otherwise the filter hash will just
+ * remove the notrace hash functions, and the notrace hash is
+ * not needed.
+ */
+ if (ftrace_hash_empty(*filter_hash)) {
+ /*
+ * Intersect the notrace functions. That is, if two
+ * subops are not tracing a set of functions, the
+ * main ops will only not trace the functions that are
+ * in both subops, but has to trace the functions that
+ * are only notrace in one of the subops, for the other
+ * subops to be able to trace them.
+ */
+ size_bits = max(ops_hash->notrace_hash->size_bits,
+ subops_hash->notrace_hash->size_bits);
+ *notrace_hash = alloc_ftrace_hash(size_bits);
+ if (!*notrace_hash)
+ return -ENOMEM;
+
+ ret = intersect_hash(notrace_hash, ops_hash->notrace_hash,
+ subops_hash->notrace_hash);
+ if (ret < 0) {
+ free_ftrace_hash(*notrace_hash);
+ return ret;
+ }
+ }
+ return 0;
+}
+
/**
* ftrace_startup_subops - enable tracing for subops of an ops
* @ops: Manager ops (used to pick all the functions of its subops)
@@ -3443,7 +3494,6 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
struct ftrace_hash *notrace_hash;
struct ftrace_hash *save_filter_hash;
struct ftrace_hash *save_notrace_hash;
- int size_bits;
int ret;
if (unlikely(ftrace_disabled))
@@ -3467,14 +3517,14 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
/* For the first subops to ops just enable it normally */
if (list_empty(&ops->subop_list)) {
- /* Just use the subops hashes */
- filter_hash = copy_hash(subops->func_hash->filter_hash);
- notrace_hash = copy_hash(subops->func_hash->notrace_hash);
- if (!filter_hash || !notrace_hash) {
- free_ftrace_hash(filter_hash);
- free_ftrace_hash(notrace_hash);
- return -ENOMEM;
- }
+
+ /* The ops was empty, should have empty hashes */
+ WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash));
+ WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash));
+
+ ret = add_first_hash(&filter_hash, &notrace_hash, subops->func_hash);
+ if (ret < 0)
+ return ret;
save_filter_hash = ops->func_hash->filter_hash;
save_notrace_hash = ops->func_hash->notrace_hash;
@@ -3500,48 +3550,16 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
/*
* Here there's already something attached. Here are the rules:
- * o If either filter_hash is empty then the final stays empty
- * o Otherwise, the final is a superset of both hashes
- * o If either notrace_hash is empty then the final stays empty
- * o Otherwise, the final is an intersection between the hashes
+ * If the new subops and main ops filter hashes are not empty:
+ * o Make a copy of the subops filter hash
+ * o Remove all functions in the nohash from it.
+ * o Add in the main hash filter functions
+ * o Remove any of these functions from the main notrace hash
*/
- if (ftrace_hash_empty(ops->func_hash->filter_hash) ||
- ftrace_hash_empty(subops->func_hash->filter_hash)) {
- filter_hash = EMPTY_HASH;
- } else {
- size_bits = max(ops->func_hash->filter_hash->size_bits,
- subops->func_hash->filter_hash->size_bits);
- filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
- if (!filter_hash)
- return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
- size_bits);
- if (ret < 0) {
- free_ftrace_hash(filter_hash);
- return ret;
- }
- }
-
- if (ftrace_hash_empty(ops->func_hash->notrace_hash) ||
- ftrace_hash_empty(subops->func_hash->notrace_hash)) {
- notrace_hash = EMPTY_HASH;
- } else {
- size_bits = max(ops->func_hash->filter_hash->size_bits,
- subops->func_hash->filter_hash->size_bits);
- notrace_hash = alloc_ftrace_hash(size_bits);
- if (!notrace_hash) {
- free_ftrace_hash(filter_hash);
- return -ENOMEM;
- }
- ret = intersect_hash(&notrace_hash, ops->func_hash->filter_hash,
- subops->func_hash->filter_hash);
- if (ret < 0) {
- free_ftrace_hash(filter_hash);
- free_ftrace_hash(notrace_hash);
- return ret;
- }
- }
+ ret = add_next_hash(&filter_hash, &notrace_hash, ops->func_hash, subops->func_hash);
+ if (ret < 0)
+ return ret;
list_add(&subops->list, &ops->subop_list);
@@ -3557,6 +3575,42 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
return ret;
}
+static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash,
+ struct ftrace_ops *ops)
+{
+ struct ftrace_ops_hash temp_hash;
+ struct ftrace_ops *subops;
+ bool first = true;
+ int ret;
+
+ temp_hash.filter_hash = EMPTY_HASH;
+ temp_hash.notrace_hash = EMPTY_HASH;
+
+ list_for_each_entry(subops, &ops->subop_list, list) {
+ *filter_hash = EMPTY_HASH;
+ *notrace_hash = EMPTY_HASH;
+
+ if (first) {
+ ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash);
+ if (ret < 0)
+ return ret;
+ first = false;
+ } else {
+ ret = add_next_hash(filter_hash, notrace_hash,
+ &temp_hash, subops->func_hash);
+ if (ret < 0) {
+ free_ftrace_hash(temp_hash.filter_hash);
+ free_ftrace_hash(temp_hash.notrace_hash);
+ return ret;
+ }
+ }
+
+ temp_hash.filter_hash = *filter_hash;
+ temp_hash.notrace_hash = *notrace_hash;
+ }
+ return 0;
+}
+
/**
* ftrace_shutdown_subops - Remove a subops from a manager ops
* @ops: A manager ops to remove @subops from
@@ -3605,14 +3659,9 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in
}
/* Rebuild the hashes without subops */
- filter_hash = append_hashes(ops);
- notrace_hash = intersect_hashes(ops);
- if (!filter_hash || !notrace_hash) {
- free_ftrace_hash(filter_hash);
- free_ftrace_hash(notrace_hash);
- list_add(&subops->list, &ops->subop_list);
- return -ENOMEM;
- }
+ ret = rebuild_hashes(&filter_hash, &notrace_hash, ops);
+ if (ret < 0)
+ return ret;
ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
if (ret < 0) {
@@ -3628,11 +3677,11 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in
static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
struct ftrace_hash **orig_subhash,
- struct ftrace_hash *hash,
- int enable)
+ struct ftrace_hash *hash)
{
struct ftrace_ops *ops = subops->managed;
- struct ftrace_hash **orig_hash;
+ struct ftrace_hash *notrace_hash;
+ struct ftrace_hash *filter_hash;
struct ftrace_hash *save_hash;
struct ftrace_hash *new_hash;
int ret;
@@ -3649,24 +3698,15 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
return -ENOMEM;
}
- /* Create a new_hash to hold the ops new functions */
- if (enable) {
- orig_hash = &ops->func_hash->filter_hash;
- new_hash = append_hashes(ops);
- } else {
- orig_hash = &ops->func_hash->notrace_hash;
- new_hash = intersect_hashes(ops);
- }
-
- /* Move the hash over to the new hash */
- ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable);
-
- free_ftrace_hash(new_hash);
+ ret = rebuild_hashes(&filter_hash, &notrace_hash, ops);
+ if (!ret)
+ ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
if (ret) {
/* Put back the original hash */
- free_ftrace_hash_rcu(*orig_subhash);
+ new_hash = *orig_subhash;
*orig_subhash = save_hash;
+ free_ftrace_hash_rcu(new_hash);
} else {
free_ftrace_hash_rcu(save_hash);
}
@@ -4890,7 +4930,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
int enable)
{
if (ops->flags & FTRACE_OPS_FL_SUBOP)
- return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable);
+ return ftrace_hash_move_and_update_subops(ops, orig_hash, hash);
/*
* If this ops is not enabled, it could be sharing its filters
@@ -4909,7 +4949,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
list_for_each_entry(subops, &op->subop_list, list) {
if ((subops->flags & FTRACE_OPS_FL_ENABLED) &&
subops->func_hash == ops->func_hash) {
- return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable);
+ return ftrace_hash_move_and_update_subops(subops, orig_hash, hash);
}
}
} while_for_each_ftrace_op(op);
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 968c5c3b0246..e4077500a91d 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -225,7 +225,12 @@ bool rv_is_nested_monitor(struct rv_monitor_def *mdef)
*/
bool rv_is_container_monitor(struct rv_monitor_def *mdef)
{
- struct rv_monitor_def *next = list_next_entry(mdef, list);
+ struct rv_monitor_def *next;
+
+ if (list_is_last(&mdef->list, &rv_monitors_list))
+ return false;
+
+ next = list_next_entry(mdef, list);
return next->parent == mdef->monitor || !mdef->monitor->enable;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b581e388a9d9..8ddf6b17215c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9806,6 +9806,7 @@ static int instance_mkdir(const char *name)
return ret;
}
+#ifdef CONFIG_MMU
static u64 map_pages(unsigned long start, unsigned long size)
{
unsigned long vmap_start, vmap_end;
@@ -9828,6 +9829,12 @@ static u64 map_pages(unsigned long start, unsigned long size)
return (u64)vmap_start;
}
+#else
+static inline u64 map_pages(unsigned long start, unsigned long size)
+{
+ return 0;
+}
+#endif
/**
* trace_array_get_by_name - Create/Lookup a trace array, given its name.
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 969f48742d72..33cfbd4ed76d 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -370,7 +370,6 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
union trace_synth_field *data = &entry->fields[n_u64];
trace_seq_printf(s, print_fmt, se->fields[i]->name,
- STR_VAR_LEN_MAX,
(char *)entry + data->as_dynamic.offset,
i == se->n_fields - 1 ? "" : " ");
n_u64++;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2f077d4158e5..0c357a89c58e 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -880,8 +880,6 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr
if (print_retval || print_retaddr)
trace_seq_puts(s, " /*");
- else
- trace_seq_putc(s, '\n');
} else {
print_retaddr = false;
trace_seq_printf(s, "} /* %ps", func);
@@ -899,7 +897,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr
}
if (!entry || print_retval || print_retaddr)
- trace_seq_puts(s, " */\n");
+ trace_seq_puts(s, " */");
}
#else
@@ -975,7 +973,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
} else
trace_seq_puts(s, "();");
}
- trace_seq_printf(s, "\n");
+ trace_seq_putc(s, '\n');
print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -1313,10 +1311,11 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
* that if the funcgraph-tail option is enabled.
*/
if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
- trace_seq_puts(s, "}\n");
+ trace_seq_puts(s, "}");
else
- trace_seq_printf(s, "} /* %ps */\n", (void *)func);
+ trace_seq_printf(s, "} /* %ps */", (void *)func);
}
+ trace_seq_putc(s, '\n');
/* Overrun */
if (flags & TRACE_GRAPH_PRINT_OVERRUN)