summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h271
1 files changed, 210 insertions, 61 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f9d0515db130..8590113e4a60 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
#ifndef _KERNEL_SCHED_SCHED_H
#define _KERNEL_SCHED_SCHED_H
+#include <linux/prandom.h>
#include <linux/sched/affinity.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/cpufreq.h>
@@ -20,7 +21,6 @@
#include <linux/sched/task_flags.h>
#include <linux/sched/task.h>
#include <linux/sched/topology.h>
-
#include <linux/atomic.h>
#include <linux/bitmap.h>
#include <linux/bug.h>
@@ -405,6 +405,7 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
* naturally thottled to once per period, avoiding high context switch
* workloads from spamming the hrtimer program/cancel paths.
*/
+extern void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
@@ -412,8 +413,6 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);
-extern void dl_server_update_idle_time(struct rq *rq,
- struct task_struct *p);
extern void fair_server_init(struct rq *rq);
extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
@@ -682,10 +681,10 @@ struct cfs_rq {
s64 avg_vruntime;
u64 avg_load;
- u64 min_vruntime;
+ u64 zero_vruntime;
#ifdef CONFIG_SCHED_CORE
unsigned int forceidle_seq;
- u64 min_vruntime_fi;
+ u64 zero_vruntime_fi;
#endif
struct rb_root_cached tasks_timeline;
@@ -780,7 +779,6 @@ enum scx_rq_flags {
*/
SCX_RQ_ONLINE = 1 << 0,
SCX_RQ_CAN_STOP_TICK = 1 << 1,
- SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
SCX_RQ_BYPASSING = 1 << 4,
SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */
@@ -1120,6 +1118,8 @@ struct rq {
/* runqueue lock: */
raw_spinlock_t __lock;
+ /* Per class runqueue modification mask; bits in class order. */
+ unsigned int queue_mask;
unsigned int nr_running;
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
@@ -1349,6 +1349,12 @@ static inline bool is_migration_disabled(struct task_struct *p)
}
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+DECLARE_PER_CPU(struct rnd_state, sched_rnd_state);
+
+static inline u32 sched_rng(void)
+{
+ return prandom_u32_state(this_cpu_ptr(&sched_rnd_state));
+}
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() this_cpu_ptr(&runqueues)
@@ -1432,6 +1438,9 @@ static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
if (!sched_core_enabled(rq))
return true;
+ if (rq->core->core_cookie == p->core_cookie)
+ return true;
+
for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
if (!available_idle_cpu(cpu)) {
idle_core = false;
@@ -1443,7 +1452,7 @@ static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
* A CPU in an idle core is always the best choice for tasks with
* cookies.
*/
- return idle_core || rq->core->core_cookie == p->core_cookie;
+ return idle_core;
}
static inline bool sched_group_cookie_match(struct rq *rq,
@@ -1827,7 +1836,8 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
__acquires(rq->lock);
-static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
+static inline void
+__task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
@@ -1839,8 +1849,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock)
__releases(p->pi_lock)
{
- rq_unpin_lock(rq, rf);
- raw_spin_rq_unlock(rq);
+ __task_rq_unlock(rq, p, rf);
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
@@ -1849,6 +1858,11 @@ DEFINE_LOCK_GUARD_1(task_rq_lock, struct task_struct,
task_rq_unlock(_T->rq, _T->lock, &_T->rf),
struct rq *rq; struct rq_flags rf)
+DEFINE_LOCK_GUARD_1(__task_rq_lock, struct task_struct,
+ _T->rq = __task_rq_lock(_T->lock, &_T->rf),
+ __task_rq_unlock(_T->rq, _T->lock, &_T->rf),
+ struct rq *rq; struct rq_flags rf)
+
static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
@@ -2343,8 +2357,7 @@ extern const u32 sched_prio_to_wmult[40];
/*
* {de,en}queue flags:
*
- * DEQUEUE_SLEEP - task is no longer runnable
- * ENQUEUE_WAKEUP - task just became runnable
+ * SLEEP/WAKEUP - task is no-longer/just-became runnable
*
* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
* are in a known state which allows modification. Such pairs
@@ -2357,34 +2370,46 @@ extern const u32 sched_prio_to_wmult[40];
*
* MIGRATION - p->on_rq == TASK_ON_RQ_MIGRATING (used for DEADLINE)
*
+ * DELAYED - de/re-queue a sched_delayed task
+ *
+ * CLASS - going to update p->sched_class; makes sched_change call the
+ * various switch methods.
+ *
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
* ENQUEUE_MIGRATED - the task was migrated during wakeup
* ENQUEUE_RQ_SELECTED - ->select_task_rq() was called
*
+ * XXX SAVE/RESTORE in combination with CLASS doesn't really make sense, but
+ * SCHED_DEADLINE seems to rely on this for now.
*/
-#define DEQUEUE_SLEEP 0x01 /* Matches ENQUEUE_WAKEUP */
-#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
-#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
-#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
-#define DEQUEUE_SPECIAL 0x10
-#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
-#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
-#define DEQUEUE_THROTTLE 0x800
-
-#define ENQUEUE_WAKEUP 0x01
-#define ENQUEUE_RESTORE 0x02
-#define ENQUEUE_MOVE 0x04
-#define ENQUEUE_NOCLOCK 0x08
-
-#define ENQUEUE_HEAD 0x10
-#define ENQUEUE_REPLENISH 0x20
-#define ENQUEUE_MIGRATED 0x40
-#define ENQUEUE_INITIAL 0x80
-#define ENQUEUE_MIGRATING 0x100
-#define ENQUEUE_DELAYED 0x200
-#define ENQUEUE_RQ_SELECTED 0x400
+#define DEQUEUE_SLEEP 0x0001 /* Matches ENQUEUE_WAKEUP */
+#define DEQUEUE_SAVE 0x0002 /* Matches ENQUEUE_RESTORE */
+#define DEQUEUE_MOVE 0x0004 /* Matches ENQUEUE_MOVE */
+#define DEQUEUE_NOCLOCK 0x0008 /* Matches ENQUEUE_NOCLOCK */
+
+#define DEQUEUE_MIGRATING 0x0010 /* Matches ENQUEUE_MIGRATING */
+#define DEQUEUE_DELAYED 0x0020 /* Matches ENQUEUE_DELAYED */
+#define DEQUEUE_CLASS 0x0040 /* Matches ENQUEUE_CLASS */
+
+#define DEQUEUE_SPECIAL 0x00010000
+#define DEQUEUE_THROTTLE 0x00020000
+
+#define ENQUEUE_WAKEUP 0x0001
+#define ENQUEUE_RESTORE 0x0002
+#define ENQUEUE_MOVE 0x0004
+#define ENQUEUE_NOCLOCK 0x0008
+
+#define ENQUEUE_MIGRATING 0x0010
+#define ENQUEUE_DELAYED 0x0020
+#define ENQUEUE_CLASS 0x0040
+
+#define ENQUEUE_HEAD 0x00010000
+#define ENQUEUE_REPLENISH 0x00020000
+#define ENQUEUE_MIGRATED 0x00040000
+#define ENQUEUE_INITIAL 0x00080000
+#define ENQUEUE_RQ_SELECTED 0x00100000
#define RETRY_TASK ((void *)-1UL)
@@ -2401,16 +2426,61 @@ struct sched_class {
#ifdef CONFIG_UCLAMP_TASK
int uclamp_enabled;
#endif
+ /*
+ * idle: 0
+ * ext: 1
+ * fair: 2
+ * rt: 4
+ * dl: 8
+ * stop: 16
+ */
+ unsigned int queue_mask;
+ /*
+ * move_queued_task/activate_task/enqueue_task: rq->lock
+ * ttwu_do_activate/activate_task/enqueue_task: rq->lock
+ * wake_up_new_task/activate_task/enqueue_task: task_rq_lock
+ * ttwu_runnable/enqueue_task: task_rq_lock
+ * proxy_task_current: rq->lock
+ * sched_change_end
+ */
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+ /*
+ * move_queued_task/deactivate_task/dequeue_task: rq->lock
+ * __schedule/block_task/dequeue_task: rq->lock
+ * proxy_task_current: rq->lock
+ * wait_task_inactive: task_rq_lock
+ * sched_change_begin
+ */
bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
+
+ /*
+ * do_sched_yield: rq->lock
+ */
void (*yield_task) (struct rq *rq);
+ /*
+ * yield_to: rq->lock (double)
+ */
bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
+ /*
+ * move_queued_task: rq->lock
+ * __migrate_swap_task: rq->lock
+ * ttwu_do_activate: rq->lock
+ * ttwu_runnable: task_rq_lock
+ * wake_up_new_task: task_rq_lock
+ */
void (*wakeup_preempt)(struct rq *rq, struct task_struct *p, int flags);
+ /*
+ * schedule/pick_next_task/prev_balance: rq->lock
+ */
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
- struct task_struct *(*pick_task)(struct rq *rq);
+
+ /*
+ * schedule/pick_next_task: rq->lock
+ */
+ struct task_struct *(*pick_task)(struct rq *rq, struct rq_flags *rf);
/*
* Optional! When implemented pick_next_task() should be equivalent to:
*
@@ -2420,55 +2490,123 @@ struct sched_class {
* set_next_task_first(next);
* }
*/
- struct task_struct *(*pick_next_task)(struct rq *rq, struct task_struct *prev);
+ struct task_struct *(*pick_next_task)(struct rq *rq, struct task_struct *prev,
+ struct rq_flags *rf);
+ /*
+ * sched_change:
+ * __schedule: rq->lock
+ */
void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct task_struct *next);
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
+ /*
+ * select_task_rq: p->pi_lock
+ * sched_exec: p->pi_lock
+ */
int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
+ /*
+ * set_task_cpu: p->pi_lock || rq->lock (ttwu like)
+ */
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
+ /*
+ * ttwu_do_activate: rq->lock
+ * wake_up_new_task: task_rq_lock
+ */
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
+ /*
+ * do_set_cpus_allowed: task_rq_lock + sched_change
+ */
void (*set_cpus_allowed)(struct task_struct *p, struct affinity_context *ctx);
+ /*
+ * sched_set_rq_{on,off}line: rq->lock
+ */
void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq);
+ /*
+ * push_cpu_stop: p->pi_lock && rq->lock
+ */
struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
+ /*
+ * hrtick: rq->lock
+ * sched_tick: rq->lock
+ * sched_tick_remote: rq->lock
+ */
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
+ /*
+ * sched_cgroup_fork: p->pi_lock
+ */
void (*task_fork)(struct task_struct *p);
+ /*
+ * finish_task_switch: no locks
+ */
void (*task_dead)(struct task_struct *p);
/*
- * The switched_from() call is allowed to drop rq->lock, therefore we
- * cannot assume the switched_from/switched_to pair is serialized by
- * rq->lock. They are however serialized by p->pi_lock.
+ * sched_change
+ */
+ void (*switching_from)(struct rq *this_rq, struct task_struct *task);
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+ void (*switching_to) (struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
+ u64 (*get_prio) (struct rq *this_rq, struct task_struct *task);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ u64 oldprio);
+
+ /*
+ * set_load_weight: task_rq_lock + sched_change
+ * __setscheduler_parms: task_rq_lock + sched_change
*/
- void (*switching_to) (struct rq *this_rq, struct task_struct *task);
- void (*switched_from)(struct rq *this_rq, struct task_struct *task);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*reweight_task)(struct rq *this_rq, struct task_struct *task,
const struct load_weight *lw);
- void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
- int oldprio);
+ /*
+ * sched_rr_get_interval: task_rq_lock
+ */
unsigned int (*get_rr_interval)(struct rq *rq,
struct task_struct *task);
+ /*
+ * task_sched_runtime: task_rq_lock
+ */
void (*update_curr)(struct rq *rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+ * sched_change_group: task_rq_lock + sched_change
+ */
void (*task_change_group)(struct task_struct *p);
#endif
#ifdef CONFIG_SCHED_CORE
+ /*
+ * pick_next_task: rq->lock
+ * try_steal_cookie: rq->lock (double)
+ */
int (*task_is_throttled)(struct task_struct *p, int cpu);
#endif
};
+/*
+ * Does not nest; only used around sched_class::pick_task() rq-lock-breaks.
+ */
+static inline void rq_modified_clear(struct rq *rq)
+{
+ rq->queue_mask = 0;
+}
+
+static inline bool rq_modified_above(struct rq *rq, const struct sched_class * class)
+{
+ unsigned int mask = class->queue_mask;
+ return rq->queue_mask & ~((mask << 1) - 1);
+}
+
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->donor != prev);
@@ -2580,8 +2718,9 @@ static inline bool sched_fair_runnable(struct rq *rq)
return rq->cfs.nr_queued > 0;
}
-extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
-extern struct task_struct *pick_task_idle(struct rq *rq);
+extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev,
+ struct rq_flags *rf);
+extern struct task_struct *pick_task_idle(struct rq *rq, struct rq_flags *rf);
#define SCA_CHECK 0x01
#define SCA_MIGRATE_DISABLE 0x02
@@ -2611,7 +2750,7 @@ static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
static inline cpumask_t *alloc_user_cpus_ptr(int node)
{
/*
- * See do_set_cpus_allowed() above for the rcu_head usage.
+ * See set_cpus_allowed_force() above for the rcu_head usage.
*/
int size = max_t(int, cpumask_size(), sizeof(struct rcu_head));
@@ -3805,32 +3944,42 @@ extern void set_load_weight(struct task_struct *p, bool update_load);
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
-extern void check_class_changing(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class);
-extern void check_class_changed(struct rq *rq, struct task_struct *p,
- const struct sched_class *prev_class,
- int oldprio);
-
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
-#ifdef CONFIG_SCHED_CLASS_EXT
/*
- * Used by SCX in the enable/disable paths to move tasks between sched_classes
- * and establish invariants.
+ * The 'sched_change' pattern is the safe, easy and slow way of changing a
+ * task's scheduling properties. It dequeues a task, such that the scheduler
+ * is fully unaware of it; at which point its properties can be modified;
+ * after which it is enqueued again.
+ *
+ * Typically this must be called while holding task_rq_lock, since most/all
+ * properties are serialized under those locks. There is currently one
+ * exception to this rule in sched/ext which only holds rq->lock.
+ */
+
+/*
+ * This structure is a temporary, used to preserve/convey the queueing state
+ * of the task between sched_change_begin() and sched_change_end(). Ensuring
+ * the task's queueing state is idempotent across the operation.
*/
-struct sched_enq_and_set_ctx {
+struct sched_change_ctx {
+ u64 prio;
struct task_struct *p;
- int queue_flags;
+ int flags;
bool queued;
bool running;
};
-void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
- struct sched_enq_and_set_ctx *ctx);
-void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
+struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags);
+void sched_change_end(struct sched_change_ctx *ctx);
-#endif /* CONFIG_SCHED_CLASS_EXT */
+DEFINE_CLASS(sched_change, struct sched_change_ctx *,
+ sched_change_end(_T),
+ sched_change_begin(p, flags),
+ struct task_struct *p, unsigned int flags)
+
+DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
#include "ext.h"