summaryrefslogtreecommitdiff
path: root/include/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 13:25:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 13:25:39 -0800
commit02baaa67d9afc2e56c6e1ac6a1fb1f1dd2be366f (patch)
tree13ae2fec8be92b2f774cfb3fd725c027740be3ac /include/trace
parent8449d3252c2603a51ffc7c36cb5bd94874378b7d (diff)
parent1dd6c84f1c544e552848a8968599220bd464e338 (diff)
Merge tag 'sched_ext-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext updates from Tejun Heo: - Improve recovery from misbehaving BPF schedulers. When a scheduler puts many tasks with varying affinity restrictions on a shared DSQ, CPUs scanning through tasks they cannot run can overwhelm the system, causing lockups. Bypass mode now uses per-CPU DSQs with a load balancer to avoid this, and hooks into the hardlockup detector to attempt recovery. Add scx_cpu0 example scheduler to demonstrate this scenario. - Add lockless peek operation for DSQs to reduce lock contention for schedulers that need to query queue state during load balancing. - Allow scx_bpf_reenqueue_local() to be called from anywhere in preparation for deprecating cpu_acquire/release() callbacks in favor of generic BPF hooks. - Prepare for hierarchical scheduler support: add scx_bpf_task_set_slice() and scx_bpf_task_set_dsq_vtime() kfuncs, make scx_bpf_dsq_insert*() return bool, and wrap kfunc args in structs for future aux__prog parameter. - Implement cgroup_set_idle() callback to notify BPF schedulers when a cgroup's idle state changes. - Fix migration tasks being incorrectly downgraded from stop_sched_class to rt_sched_class across sched_ext enable/disable. Applied late as the fix is low risk and the bug subtle but needs stable backporting. - Various fixes and cleanups including cgroup exit ordering, SCX_KICK_WAIT reliability, and backward compatibility improvements. * tag 'sched_ext-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (44 commits) sched_ext: Fix incorrect sched_class settings for per-cpu migration tasks sched_ext: tools: Removing duplicate targets during non-cross compilation sched_ext: Use kvfree_rcu() to release per-cpu ksyncs object sched_ext: Pass locked CPU parameter to scx_hardlockup() and add docs sched_ext: Update comments replacing breather with aborting mechanism sched_ext: Implement load balancer for bypass mode sched_ext: Factor out abbreviated dispatch dequeue into dispatch_dequeue_locked() sched_ext: Factor out scx_dsq_list_node cursor initialization into INIT_DSQ_LIST_CURSOR sched_ext: Add scx_cpu0 example scheduler sched_ext: Hook up hardlockup detector sched_ext: Make handle_lockup() propagate scx_verror() result sched_ext: Refactor lockup handlers into handle_lockup() sched_ext: Make scx_exit() and scx_vexit() return bool sched_ext: Exit dispatch and move operations immediately when aborting sched_ext: Simplify breather mechanism with scx_aborting flag sched_ext: Use per-CPU DSQs instead of per-node global DSQs in bypass mode sched_ext: Refactor do_enqueue_task() local and global DSQ paths sched_ext: Use shorter slice in bypass mode sched_ext: Mark racy bitfields to prevent adding fields that can't tolerate races sched_ext: Minor cleanups to scx_task_iter ...
Diffstat (limited to 'include/trace')
-rw-r--r--include/trace/events/sched_ext.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/include/trace/events/sched_ext.h b/include/trace/events/sched_ext.h
index 50e4b712735a..d1bf5acd59c5 100644
--- a/include/trace/events/sched_ext.h
+++ b/include/trace/events/sched_ext.h
@@ -45,6 +45,45 @@ TRACE_EVENT(sched_ext_event,
)
);
+TRACE_EVENT(sched_ext_bypass_lb,
+
+ TP_PROTO(__u32 node, __u32 nr_cpus, __u32 nr_tasks, __u32 nr_balanced,
+ __u32 before_min, __u32 before_max,
+ __u32 after_min, __u32 after_max),
+
+ TP_ARGS(node, nr_cpus, nr_tasks, nr_balanced,
+ before_min, before_max, after_min, after_max),
+
+ TP_STRUCT__entry(
+ __field( __u32, node )
+ __field( __u32, nr_cpus )
+ __field( __u32, nr_tasks )
+ __field( __u32, nr_balanced )
+ __field( __u32, before_min )
+ __field( __u32, before_max )
+ __field( __u32, after_min )
+ __field( __u32, after_max )
+ ),
+
+ TP_fast_assign(
+ __entry->node = node;
+ __entry->nr_cpus = nr_cpus;
+ __entry->nr_tasks = nr_tasks;
+ __entry->nr_balanced = nr_balanced;
+ __entry->before_min = before_min;
+ __entry->before_max = before_max;
+ __entry->after_min = after_min;
+ __entry->after_max = after_max;
+ ),
+
+ TP_printk("node %u: nr_cpus=%u nr_tasks=%u nr_balanced=%u min=%u->%u max=%u->%u",
+ __entry->node, __entry->nr_cpus,
+ __entry->nr_tasks, __entry->nr_balanced,
+ __entry->before_min, __entry->after_min,
+ __entry->before_max, __entry->after_max
+ )
+);
+
#endif /* _TRACE_SCHED_EXT_H */
/* This part must be outside protection */