diff options
| -rw-r--r-- | include/linux/rseq_types.h | 6 | ||||
| -rw-r--r-- | kernel/fork.c | 1 | ||||
| -rw-r--r-- | kernel/sched/core.c | 59 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 3 |
4 files changed, 50 insertions, 19 deletions
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index d7e8071b626a..0fab369999b6 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -117,14 +117,20 @@ struct mm_cid_pcpu { /** * struct mm_mm_cid - Storage for per MM CID data * @pcpu: Per CPU storage for CIDs associated to a CPU + * @max_cids: The exclusive maximum CID value for allocation and convergence * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map * is growth only. + * @users: The number of tasks sharing this MM. Separate from mm::mm_users + * as that is modified by mmget()/mm_put() by other entities which + * do not actually share the MM. * @lock: Spinlock to protect all fields except @pcpu. It also protects * the MM cid cpumask and the MM cidmask bitmap. */ struct mm_mm_cid { struct mm_cid_pcpu __percpu *pcpu; + unsigned int max_cids; unsigned int nr_cpus_allowed; + unsigned int users; raw_spinlock_t lock; }____cacheline_aligned_in_smp; #else /* CONFIG_SCHED_MM_CID */ diff --git a/kernel/fork.c b/kernel/fork.c index 74bc7c9f1bb3..6c23219e1169 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2455,6 +2455,7 @@ bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) { + sched_mm_cid_exit(p); mm_clear_owner(p->mm, p); mmput(p->mm); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 34b6c31eca3a..f9295c42da22 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4485,7 +4485,6 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p) init_numa_balancing(clone_flags, p); p->wake_entry.u_flags = CSD_TYPE_TTWU; p->migration_pending = NULL; - init_sched_mm_cid(p); } DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); @@ -10371,15 +10370,27 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count) #ifdef CONFIG_SCHED_MM_CID /* - * When a task exits, the MM CID held by the task is not longer required as - * the task cannot return to user space. + * Update the CID range properties when the constraints change. Invoked via + * fork(), exit() and affinity changes */ +static void mm_update_max_cids(struct mm_struct *mm) +{ + struct mm_mm_cid *mc = &mm->mm_cid; + unsigned int max_cids; + + lockdep_assert_held(&mm->mm_cid.lock); + + /* Calculate the new maximum constraint */ + max_cids = min(mc->nr_cpus_allowed, mc->users); + WRITE_ONCE(mc->max_cids, max_cids); +} + static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk) { struct cpumask *mm_allowed; unsigned int weight; - if (!mm) + if (!mm || !READ_ONCE(mm->mm_cid.users)) return; /* @@ -10389,9 +10400,30 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu guard(raw_spinlock)(&mm->mm_cid.lock); mm_allowed = mm_cpus_allowed(mm); weight = cpumask_weighted_or(mm_allowed, mm_allowed, affmsk); + if (weight == mm->mm_cid.nr_cpus_allowed) + return; WRITE_ONCE(mm->mm_cid.nr_cpus_allowed, weight); + mm_update_max_cids(mm); +} + +void sched_mm_cid_fork(struct task_struct *t) +{ + struct mm_struct *mm = t->mm; + + WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET); + + guard(raw_spinlock)(&mm->mm_cid.lock); + t->mm_cid.active = 1; + mm->mm_cid.users++; + /* Preset last_cid for mm_cid_select() */ + t->mm_cid.last_cid = READ_ONCE(mm->mm_cid.max_cids) - 1; + mm_update_max_cids(mm); } +/* + * When a task exits, the MM CID held by the task is not longer required as + * the task cannot return to user space. + */ void sched_mm_cid_exit(struct task_struct *t) { struct mm_struct *mm = t->mm; @@ -10399,12 +10431,14 @@ void sched_mm_cid_exit(struct task_struct *t) if (!mm || !t->mm_cid.active) return; - guard(preempt)(); + guard(raw_spinlock)(&mm->mm_cid.lock); t->mm_cid.active = 0; + mm->mm_cid.users--; if (t->mm_cid.cid != MM_CID_UNSET) { clear_bit(t->mm_cid.cid, mm_cidmask(mm)); t->mm_cid.cid = MM_CID_UNSET; } + mm_update_max_cids(mm); } /* Deactivate MM CID allocation across execve() */ @@ -10416,22 +10450,11 @@ void sched_mm_cid_before_execve(struct task_struct *t) /* Reactivate MM CID after successful execve() */ void sched_mm_cid_after_execve(struct task_struct *t) { - struct mm_struct *mm = t->mm; - - if (!mm) - return; - + sched_mm_cid_fork(t); guard(preempt)(); - t->mm_cid.active = 1; mm_cid_select(t); } -void sched_mm_cid_fork(struct task_struct *t) -{ - WARN_ON_ONCE(!t->mm || t->mm_cid.cid != MM_CID_UNSET); - t->mm_cid.active = 1; -} - void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { struct mm_cid_pcpu __percpu *pcpu = mm->mm_cid.pcpu; @@ -10440,7 +10463,9 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p) for_each_possible_cpu(cpu) per_cpu_ptr(pcpu, cpu)->cid = MM_CID_UNSET; + mm->mm_cid.max_cids = 0; mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed; + mm->mm_cid.users = 0; raw_spin_lock_init(&mm->mm_cid.lock); cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); bitmap_zero(mm_cidmask(mm), num_possible_cpus()); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31f2e431db5e..d539fb269957 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3571,7 +3571,7 @@ static inline bool mm_cid_get(struct task_struct *t) struct mm_struct *mm = t->mm; unsigned int max_cids; - max_cids = min_t(int, READ_ONCE(mm->mm_cid.nr_cpus_allowed), atomic_read(&mm->mm_users)); + max_cids = READ_ONCE(mm->mm_cid.max_cids); /* Try to reuse the last CID of this task */ if (__mm_cid_get(t, t->mm_cid.last_cid, max_cids)) @@ -3614,7 +3614,6 @@ static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *n } #else /* !CONFIG_SCHED_MM_CID: */ -static inline void init_sched_mm_cid(struct task_struct *t) { } static inline void mm_cid_select(struct task_struct *t) { } static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_SCHED_MM_CID */ |