summaryrefslogtreecommitdiff
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h289
1 files changed, 43 insertions, 246 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4838dda75b10..bf227c27b889 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3540,286 +3540,83 @@ extern void sched_dynamic_update(int mode);
extern const char *preempt_modes[];
#ifdef CONFIG_SCHED_MM_CID
-
-#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
-#define MM_CID_SCAN_DELAY 100 /* 100ms */
-
-extern raw_spinlock_t cid_lock;
-extern int use_cid_lock;
-
-extern void sched_mm_cid_migrate_from(struct task_struct *t);
-extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
-extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
-extern void init_sched_mm_cid(struct task_struct *t);
-
-static inline void __mm_cid_put(struct mm_struct *mm, int cid)
-{
- if (cid < 0)
- return;
- cpumask_clear_cpu(cid, mm_cidmask(mm));
-}
-
-/*
- * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
- * the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
- * be held to transition to other states.
- *
- * State transitions synchronized with cmpxchg or try_cmpxchg need to be
- * consistent across CPUs, which prevents use of this_cpu_cmpxchg.
- */
-static inline void mm_cid_put_lazy(struct task_struct *t)
+static inline void init_sched_mm_cid(struct task_struct *t)
{
struct mm_struct *mm = t->mm;
- struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
- int cid;
+ unsigned int max_cid;
- lockdep_assert_irqs_disabled();
- cid = __this_cpu_read(pcpu_cid->cid);
- if (!mm_cid_is_lazy_put(cid) ||
- !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
+ if (!mm)
return;
- __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
-}
-static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
-{
- struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
- int cid, res;
-
- lockdep_assert_irqs_disabled();
- cid = __this_cpu_read(pcpu_cid->cid);
- for (;;) {
- if (mm_cid_is_unset(cid))
- return MM_CID_UNSET;
- /*
- * Attempt transition from valid or lazy-put to unset.
- */
- res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
- if (res == cid)
- break;
- cid = res;
- }
- return cid;
+ /* Preset last_mm_cid */
+ max_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed), atomic_read(&mm->mm_users));
+ t->last_mm_cid = max_cid - 1;
}
-static inline void mm_cid_put(struct mm_struct *mm)
+static inline bool __mm_cid_get(struct task_struct *t, unsigned int cid, unsigned int max_cids)
{
- int cid;
+ struct mm_struct *mm = t->mm;
- lockdep_assert_irqs_disabled();
- cid = mm_cid_pcpu_unset(mm);
- if (cid == MM_CID_UNSET)
- return;
- __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
+ if (cid >= max_cids)
+ return false;
+ if (cpumask_test_and_set_cpu(cid, mm_cidmask(mm)))
+ return false;
+ t->mm_cid = t->last_mm_cid = cid;
+ __this_cpu_write(mm->pcpu_cid->cid, cid);
+ return true;
}
-static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
+static inline bool mm_cid_get(struct task_struct *t)
{
- struct cpumask *cidmask = mm_cidmask(mm);
- struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
- int cid, max_nr_cid, allowed_max_nr_cid;
+ struct mm_struct *mm = t->mm;
+ unsigned int max_cids;
- /*
- * After shrinking the number of threads or reducing the number
- * of allowed cpus, reduce the value of max_nr_cid so expansion
- * of cid allocation will preserve cache locality if the number
- * of threads or allowed cpus increase again.
- */
- max_nr_cid = atomic_read(&mm->max_nr_cid);
- while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed),
- atomic_read(&mm->mm_users))),
- max_nr_cid > allowed_max_nr_cid) {
- /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */
- if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) {
- max_nr_cid = allowed_max_nr_cid;
- break;
- }
- }
- /* Try to re-use recent cid. This improves cache locality. */
- cid = __this_cpu_read(pcpu_cid->recent_cid);
- if (!mm_cid_is_unset(cid) && cid < max_nr_cid &&
- !cpumask_test_and_set_cpu(cid, cidmask))
- return cid;
- /*
- * Expand cid allocation if the maximum number of concurrency
- * IDs allocated (max_nr_cid) is below the number cpus allowed
- * and number of threads. Expanding cid allocation as much as
- * possible improves cache locality.
- */
- cid = max_nr_cid;
- while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
- /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */
- if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
- continue;
- if (!cpumask_test_and_set_cpu(cid, cidmask))
- return cid;
- }
- /*
- * Find the first available concurrency id.
- * Retry finding first zero bit if the mask is temporarily
- * filled. This only happens during concurrent remote-clear
- * which owns a cid without holding a rq lock.
- */
- for (;;) {
- cid = cpumask_first_zero(cidmask);
- if (cid < READ_ONCE(mm->nr_cpus_allowed))
- break;
- cpu_relax();
- }
- if (cpumask_test_and_set_cpu(cid, cidmask))
- return -1;
+ max_cids = min_t(int, READ_ONCE(mm->nr_cpus_allowed), atomic_read(&mm->mm_users));
- return cid;
-}
+ /* Try to reuse the last CID of this task */
+ if (__mm_cid_get(t, t->last_mm_cid, max_cids))
+ return true;
-/*
- * Save a snapshot of the current runqueue time of this cpu
- * with the per-cpu cid value, allowing to estimate how recently it was used.
- */
-static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
-{
- struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
+ /* Try to reuse the last CID of this mm on this CPU */
+ if (__mm_cid_get(t, __this_cpu_read(mm->pcpu_cid->cid), max_cids))
+ return true;
- lockdep_assert_rq_held(rq);
- WRITE_ONCE(pcpu_cid->time, rq->clock);
+ /* Try the first zero bit in the cidmask. */
+ return __mm_cid_get(t, cpumask_first_zero(mm_cidmask(mm)), max_cids);
}
-static inline int __mm_cid_get(struct rq *rq, struct task_struct *t,
- struct mm_struct *mm)
+static inline void mm_cid_select(struct task_struct *t)
{
- int cid;
-
/*
- * All allocations (even those using the cid_lock) are lock-free. If
- * use_cid_lock is set, hold the cid_lock to perform cid allocation to
- * guarantee forward progress.
+ * mm_cid_get() can fail when the maximum CID, which is determined
+ * by min(mm->nr_cpus_allowed, mm->mm_users) changes concurrently.
+ * That's a transient failure as there cannot be more tasks
+ * concurrently on a CPU (or about to be scheduled in) than that.
*/
- if (!READ_ONCE(use_cid_lock)) {
- cid = __mm_cid_try_get(t, mm);
- if (cid >= 0)
- goto end;
- raw_spin_lock(&cid_lock);
- } else {
- raw_spin_lock(&cid_lock);
- cid = __mm_cid_try_get(t, mm);
- if (cid >= 0)
- goto unlock;
- }
-
- /*
- * cid concurrently allocated. Retry while forcing following
- * allocations to use the cid_lock to ensure forward progress.
- */
- WRITE_ONCE(use_cid_lock, 1);
- /*
- * Set use_cid_lock before allocation. Only care about program order
- * because this is only required for forward progress.
- */
- barrier();
- /*
- * Retry until it succeeds. It is guaranteed to eventually succeed once
- * all newcoming allocations observe the use_cid_lock flag set.
- */
- do {
- cid = __mm_cid_try_get(t, mm);
- cpu_relax();
- } while (cid < 0);
- /*
- * Allocate before clearing use_cid_lock. Only care about
- * program order because this is for forward progress.
- */
- barrier();
- WRITE_ONCE(use_cid_lock, 0);
-unlock:
- raw_spin_unlock(&cid_lock);
-end:
- mm_cid_snapshot_time(rq, mm);
-
- return cid;
-}
-
-static inline int mm_cid_get(struct rq *rq, struct task_struct *t,
- struct mm_struct *mm)
-{
- struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
- int cid;
-
- lockdep_assert_rq_held(rq);
- cid = __this_cpu_read(pcpu_cid->cid);
- if (mm_cid_is_valid(cid)) {
- mm_cid_snapshot_time(rq, mm);
- return cid;
- }
- if (mm_cid_is_lazy_put(cid)) {
- if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
- __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
+ for (;;) {
+ if (mm_cid_get(t))
+ break;
}
- cid = __mm_cid_get(rq, t, mm);
- __this_cpu_write(pcpu_cid->cid, cid);
- __this_cpu_write(pcpu_cid->recent_cid, cid);
-
- return cid;
}
-static inline void switch_mm_cid(struct rq *rq,
- struct task_struct *prev,
- struct task_struct *next)
+static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next)
{
- /*
- * Provide a memory barrier between rq->curr store and load of
- * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
- *
- * Should be adapted if context_switch() is modified.
- */
- if (!next->mm) { // to kernel
- /*
- * user -> kernel transition does not guarantee a barrier, but
- * we can use the fact that it performs an atomic operation in
- * mmgrab().
- */
- if (prev->mm) // from user
- smp_mb__after_mmgrab();
- /*
- * kernel -> kernel transition does not change rq->curr->mm
- * state. It stays NULL.
- */
- } else { // to user
- /*
- * kernel -> user transition does not provide a barrier
- * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
- * Provide it here.
- */
- if (!prev->mm) { // from kernel
- smp_mb();
- } else { // from user
- /*
- * user->user transition relies on an implicit
- * memory barrier in switch_mm() when
- * current->mm changes. If the architecture
- * switch_mm() does not have an implicit memory
- * barrier, it is emitted here. If current->mm
- * is unchanged, no barrier is needed.
- */
- smp_mb__after_switch_mm();
- }
- }
if (prev->mm_cid_active) {
- mm_cid_snapshot_time(rq, prev->mm);
- mm_cid_put_lazy(prev);
- prev->mm_cid = -1;
+ if (prev->mm_cid != MM_CID_UNSET)
+ cpumask_clear_cpu(prev->mm_cid, mm_cidmask(prev->mm));
+ prev->mm_cid = MM_CID_UNSET;
}
+
if (next->mm_cid_active) {
- next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
+ mm_cid_select(next);
rseq_sched_set_task_mm_cid(next, next->mm_cid);
}
}
#else /* !CONFIG_SCHED_MM_CID: */
-static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
-static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
-static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
-static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
static inline void init_sched_mm_cid(struct task_struct *t) { }
+static inline void mm_cid_select(struct task_struct *t) { }
+static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
#endif /* !CONFIG_SCHED_MM_CID */
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);