summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/rseq_types.h6
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/sched/core.c59
-rw-r--r--kernel/sched/sched.h3
4 files changed, 50 insertions, 19 deletions
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
index d7e8071b626a..0fab369999b6 100644
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -117,14 +117,20 @@ struct mm_cid_pcpu {
/**
* struct mm_mm_cid - Storage for per MM CID data
* @pcpu: Per CPU storage for CIDs associated to a CPU
+ * @max_cids: The exclusive maximum CID value for allocation and convergence
* @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map
* is growth only.
+ * @users: The number of tasks sharing this MM. Separate from mm::mm_users
+ * as that is modified by mmget()/mm_put() by other entities which
+ * do not actually share the MM.
* @lock: Spinlock to protect all fields except @pcpu. It also protects
* the MM cid cpumask and the MM cidmask bitmap.
*/
struct mm_mm_cid {
struct mm_cid_pcpu __percpu *pcpu;
+ unsigned int max_cids;
unsigned int nr_cpus_allowed;
+ unsigned int users;
raw_spinlock_t lock;
}____cacheline_aligned_in_smp;
#else /* CONFIG_SCHED_MM_CID */
diff --git a/kernel/fork.c b/kernel/fork.c
index 74bc7c9f1bb3..6c23219e1169 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2455,6 +2455,7 @@ bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
if (p->mm) {
+ sched_mm_cid_exit(p);
mm_clear_owner(p->mm, p);
mmput(p->mm);
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 34b6c31eca3a..f9295c42da22 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4485,7 +4485,6 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
init_numa_balancing(clone_flags, p);
p->wake_entry.u_flags = CSD_TYPE_TTWU;
p->migration_pending = NULL;
- init_sched_mm_cid(p);
}
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -10371,15 +10370,27 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
#ifdef CONFIG_SCHED_MM_CID
/*
- * When a task exits, the MM CID held by the task is not longer required as
- * the task cannot return to user space.
+ * Update the CID range properties when the constraints change. Invoked via
+ * fork(), exit() and affinity changes
*/
+static void mm_update_max_cids(struct mm_struct *mm)
+{
+ struct mm_mm_cid *mc = &mm->mm_cid;
+ unsigned int max_cids;
+
+ lockdep_assert_held(&mm->mm_cid.lock);
+
+ /* Calculate the new maximum constraint */
+ max_cids = min(mc->nr_cpus_allowed, mc->users);
+ WRITE_ONCE(mc->max_cids, max_cids);
+}
+
static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk)
{
struct cpumask *mm_allowed;
unsigned int weight;
- if (!mm)
+ if (!mm || !READ_ONCE(mm->mm_cid.users))
return;
/*
@@ -10389,9 +10400,30 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu
guard(raw_spinlock)(&mm->mm_cid.lock);
mm_allowed = mm_cpus_allowed(mm);
weight = cpumask_weighted_or(mm_allowed, mm_allowed, affmsk);
+ if (weight == mm->mm_cid.nr_cpus_allowed)
+ return;
WRITE_ONCE(mm->mm_cid.nr_cpus_allowed, weight);
+ mm_update_max_cids(mm);
+}
+
+void sched_mm_cid_fork(struct task_struct *t)
+{
+ struct mm_struct *mm = t->mm;
+
+ WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET);
+
+ guard(raw_spinlock)(&mm->mm_cid.lock);
+ t->mm_cid.active = 1;
+ mm->mm_cid.users++;
+ /* Preset last_cid for mm_cid_select() */
+ t->mm_cid.last_cid = READ_ONCE(mm->mm_cid.max_cids) - 1;
+ mm_update_max_cids(mm);
}
+/*
+ * When a task exits, the MM CID held by the task is not longer required as
+ * the task cannot return to user space.
+ */
void sched_mm_cid_exit(struct task_struct *t)
{
struct mm_struct *mm = t->mm;
@@ -10399,12 +10431,14 @@ void sched_mm_cid_exit(struct task_struct *t)
if (!mm || !t->mm_cid.active)
return;
- guard(preempt)();
+ guard(raw_spinlock)(&mm->mm_cid.lock);
t->mm_cid.active = 0;
+ mm->mm_cid.users--;
if (t->mm_cid.cid != MM_CID_UNSET) {
clear_bit(t->mm_cid.cid, mm_cidmask(mm));
t->mm_cid.cid = MM_CID_UNSET;
}
+ mm_update_max_cids(mm);
}
/* Deactivate MM CID allocation across execve() */
@@ -10416,22 +10450,11 @@ void sched_mm_cid_before_execve(struct task_struct *t)
/* Reactivate MM CID after successful execve() */
void sched_mm_cid_after_execve(struct task_struct *t)
{
- struct mm_struct *mm = t->mm;
-
- if (!mm)
- return;
-
+ sched_mm_cid_fork(t);
guard(preempt)();
- t->mm_cid.active = 1;
mm_cid_select(t);
}
-void sched_mm_cid_fork(struct task_struct *t)
-{
- WARN_ON_ONCE(!t->mm || t->mm_cid.cid != MM_CID_UNSET);
- t->mm_cid.active = 1;
-}
-
void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
{
struct mm_cid_pcpu __percpu *pcpu = mm->mm_cid.pcpu;
@@ -10440,7 +10463,9 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
for_each_possible_cpu(cpu)
per_cpu_ptr(pcpu, cpu)->cid = MM_CID_UNSET;
+ mm->mm_cid.max_cids = 0;
mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed;
+ mm->mm_cid.users = 0;
raw_spin_lock_init(&mm->mm_cid.lock);
cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
bitmap_zero(mm_cidmask(mm), num_possible_cpus());
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 31f2e431db5e..d539fb269957 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3571,7 +3571,7 @@ static inline bool mm_cid_get(struct task_struct *t)
struct mm_struct *mm = t->mm;
unsigned int max_cids;
- max_cids = min_t(int, READ_ONCE(mm->mm_cid.nr_cpus_allowed), atomic_read(&mm->mm_users));
+ max_cids = READ_ONCE(mm->mm_cid.max_cids);
/* Try to reuse the last CID of this task */
if (__mm_cid_get(t, t->mm_cid.last_cid, max_cids))
@@ -3614,7 +3614,6 @@ static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *n
}
#else /* !CONFIG_SCHED_MM_CID: */
-static inline void init_sched_mm_cid(struct task_struct *t) { }
static inline void mm_cid_select(struct task_struct *t) { }
static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
#endif /* !CONFIG_SCHED_MM_CID */