46 files changed, 1340 insertions, 451 deletions
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 422270d64820..54e581072617 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
 	  to keep data or state alive across the kexec. For this to work,
 	  both source and target kernels need to have this option enabled.
 
+config KEXEC_HANDOVER_DEBUG
+	bool "Enable Kexec Handover debug checks"
+	depends on KEXEC_HANDOVER
+	help
+	  This option enables extra sanity checks for the Kexec Handover
+	  subsystem. Since, KHO performance is crucial in live update
+	  scenarios and the extra code might be adding overhead it is
+	  only optionally enabled.
+
 config CRASH_DUMP
 	bool "kernel crash dumps"
 	default ARCH_DEFAULT_CRASH_DUMP
diff --git a/kernel/Makefile b/kernel/Makefile
index df3dd8291bb6..9fe722305c9b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
 obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
 obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
+obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup/
diff --git a/kernel/acct.c b/kernel/acct.c
index 61630110e29d..2a2b3c874acd 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -520,26 +520,23 @@ static void fill_ac(struct bsd_acct_struct *acct)
 static void acct_write_process(struct bsd_acct_struct *acct)
 {
 	struct file *file = acct->file;
-	const struct cred *cred;
 	acct_t *ac = &acct->ac;
 
 	/* Perform file operations on behalf of whoever enabled accounting */
-	cred = override_creds(file->f_cred);
-
-	/*
-	 * First check to see if there is enough free_space to continue
-	 * the process accounting system. Then get freeze protection. If
-	 * the fs is frozen, just skip the write as we could deadlock
-	 * the system otherwise.
-	 */
-	if (check_free_space(acct) && file_start_write_trylock(file)) {
-		/* it's been opened O_APPEND, so position is irrelevant */
-		loff_t pos = 0;
-		__kernel_write(file, ac, sizeof(acct_t), &pos);
-		file_end_write(file);
+	scoped_with_creds(file->f_cred) {
+		/*
+		 * First check to see if there is enough free_space to continue
+		 * the process accounting system. Then get freeze protection. If
+		 * the fs is frozen, just skip the write as we could deadlock
+		 * the system otherwise.
+		 */
+		if (check_free_space(acct) && file_start_write_trylock(file)) {
+			/* it's been opened O_APPEND, so position is irrelevant */
+			loff_t pos = 0;
+			__kernel_write(file, ac, sizeof(acct_t), &pos);
+			file_end_write(file);
+		}
 	}
-
-	revert_creds(cred);
 }
 
 static void do_acct_process(struct bsd_acct_struct *acct)
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 6ac35430c573..eec60b57bd3d 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -634,37 +634,24 @@ release_prog:
 int bpf_iter_new_fd(struct bpf_link *link)
 {
 	struct bpf_iter_link *iter_link;
-	struct file *file;
 	unsigned int flags;
-	int err, fd;
+	int err;
 
 	if (link->ops != &bpf_iter_link_lops)
 		return -EINVAL;
 
 	flags = O_RDONLY | O_CLOEXEC;
-	fd = get_unused_fd_flags(flags);
-	if (fd < 0)
-		return fd;
-
-	file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
-	if (IS_ERR(file)) {
-		err = PTR_ERR(file);
-		goto free_fd;
-	}
+
+	FD_PREPARE(fdf, flags, anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags));
+	if (fdf.err)
+		return fdf.err;
 
 	iter_link = container_of(link, struct bpf_iter_link, link);
-	err = prepare_seq_file(file, iter_link);
+	err = prepare_seq_file(fd_prepare_file(fdf), iter_link);
 	if (err)
-		goto free_file;
+		return err; /* Automatic cleanup handles fput */
 
-	fd_install(fd, file);
-	return fd;
-
-free_file:
-	fput(file);
-free_fd:
-	put_unused_fd(fd);
-	return err;
+	return fd_publish(fdf);
 }
 
 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index eb25e70e0bdc..e4007fea4909 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -4169,7 +4169,8 @@ release_prog:
 }
 
 /**
- * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
+ * bpf_task_work_schedule_signal_impl - Schedule BPF callback using task_work_add with TWA_SIGNAL
+ * mode
  * @task: Task struct for which callback should be scheduled
  * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
  * @map__map: bpf_map that embeds struct bpf_task_work in the values
@@ -4178,15 +4179,17 @@ release_prog:
  *
  * Return: 0 if task work has been scheduled successfully, negative error code otherwise
  */
-__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
-					      void *map__map, bpf_task_work_callback_t callback,
-					      void *aux__prog)
+__bpf_kfunc int bpf_task_work_schedule_signal_impl(struct task_struct *task,
+						   struct bpf_task_work *tw, void *map__map,
+						   bpf_task_work_callback_t callback,
+						   void *aux__prog)
 {
 	return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL);
 }
 
 /**
- * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
+ * bpf_task_work_schedule_resume_impl - Schedule BPF callback using task_work_add with TWA_RESUME
+ * mode
  * @task: Task struct for which callback should be scheduled
  * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
  * @map__map: bpf_map that embeds struct bpf_task_work in the values
@@ -4195,9 +4198,10 @@ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct b
  *
  * Return: 0 if task work has been scheduled successfully, negative error code otherwise
  */
-__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
-					      void *map__map, bpf_task_work_callback_t callback,
-					      void *aux__prog)
+__bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task,
+						   struct bpf_task_work *tw, void *map__map,
+						   bpf_task_work_callback_t callback,
+						   void *aux__prog)
 {
 	return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
 }
@@ -4376,9 +4380,9 @@ BTF_ID_FLAGS(func, bpf_strnstr);
 #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS)
 BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
 #endif
-BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c
index eb6c5a21c2ef..ff16c631951b 100644
--- a/kernel/bpf/stream.c
+++ b/kernel/bpf/stream.c
@@ -355,7 +355,8 @@ __bpf_kfunc_start_defs();
  * Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the
  * enum in headers.
  */
-__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, u32 len__sz, void *aux__prog)
+__bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
+					u32 len__sz, void *aux__prog)
 {
 	struct bpf_bprintf_data data = {
 		.get_bin_args	= true,
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
index 0bbe412f854e..feecd8f4dbf9 100644
--- a/kernel/bpf/token.c
+++ b/kernel/bpf/token.c
@@ -110,16 +110,15 @@ const struct file_operations bpf_token_fops = {
 
 int bpf_token_create(union bpf_attr *attr)
 {
+	struct bpf_token *token __free(kfree) = NULL;
 	struct bpf_mount_opts *mnt_opts;
-	struct bpf_token *token = NULL;
 	struct user_namespace *userns;
 	struct inode *inode;
-	struct file *file;
 	CLASS(fd, f)(attr->token_create.bpffs_fd);
 	struct path path;
 	struct super_block *sb;
 	umode_t mode;
-	int err, fd;
+	int err;
 
 	if (fd_empty(f))
 		return -EBADF;
@@ -166,23 +165,20 @@ int bpf_token_create(union bpf_attr *attr)
 	inode->i_fop = &bpf_token_fops;
 	clear_nlink(inode); /* make sure it is unlinked */
 
-	file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
-	if (IS_ERR(file)) {
-		iput(inode);
-		return PTR_ERR(file);
-	}
+	FD_PREPARE(fdf, O_CLOEXEC,
+		   alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME,
+				     O_RDWR, &bpf_token_fops));
+	if (fdf.err)
+		return fdf.err;
 
 	token = kzalloc(sizeof(*token), GFP_USER);
-	if (!token) {
-		err = -ENOMEM;
-		goto out_file;
-	}
+	if (!token)
+		return -ENOMEM;
 
 	atomic64_set(&token->refcnt, 1);
 
-	/* remember bpffs owning userns for future ns_capable() checks */
-	token->userns = get_user_ns(userns);
-
+	/* remember bpffs owning userns for future ns_capable() checks. */
+	token->userns = userns;
 	token->allowed_cmds = mnt_opts->delegate_cmds;
 	token->allowed_maps = mnt_opts->delegate_maps;
 	token->allowed_progs = mnt_opts->delegate_progs;
@@ -190,24 +186,11 @@ int bpf_token_create(union bpf_attr *attr)
 
 	err = security_bpf_token_create(token, attr, &path);
 	if (err)
-		goto out_token;
-
-	fd = get_unused_fd_flags(O_CLOEXEC);
-	if (fd < 0) {
-		err = fd;
-		goto out_token;
-	}
-
-	file->private_data = token;
-	fd_install(fd, file);
-
-	return fd;
+		return err;
 
-out_token:
-	bpf_token_free(token);
-out_file:
-	fput(file);
-	return err;
+	get_user_ns(token->userns);
+	fd_prepare_file(fdf)->private_data = no_free_ptr(token);
+	return fd_publish(fdf);
 }
 
 int bpf_token_get_info_by_fd(struct bpf_token *token,
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 5949095e51c3..f2cb0b097093 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -479,11 +479,6 @@ again:
 		 * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
 		 * trampoline again, and retry register.
 		 */
-		/* reset fops->func and fops->trampoline for re-register */
-		tr->fops->func = NULL;
-		tr->fops->trampoline = 0;
-
-		/* free im memory and reallocate later */
 		bpf_tramp_image_free(im);
 		goto again;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ff40e5e65c43..fbe4bb91c564 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8866,7 +8866,7 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
 				   struct bpf_verifier_state *cur)
 {
 	struct bpf_func_state *fold, *fcur;
-	int i, fr;
+	int i, fr, num_slots;
 
 	reset_idmap_scratch(env);
 	for (fr = old->curframe; fr >= 0; fr--) {
@@ -8879,7 +8879,9 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
 					&fcur->regs[i],
 					&env->idmap_scratch);
 
-		for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
+		num_slots = min(fold->allocated_stack / BPF_REG_SIZE,
+				fcur->allocated_stack / BPF_REG_SIZE);
+		for (i = 0; i < num_slots; i++) {
 			if (!is_spilled_reg(&fold->stack[i]) ||
 			    !is_spilled_reg(&fcur->stack[i]))
 				continue;
@@ -12259,8 +12261,8 @@ enum special_kfunc_type {
 	KF_bpf_res_spin_lock_irqsave,
 	KF_bpf_res_spin_unlock_irqrestore,
 	KF___bpf_trap,
-	KF_bpf_task_work_schedule_signal,
-	KF_bpf_task_work_schedule_resume,
+	KF_bpf_task_work_schedule_signal_impl,
+	KF_bpf_task_work_schedule_resume_impl,
 };
 
 BTF_ID_LIST(special_kfunc_list)
@@ -12331,13 +12333,13 @@ BTF_ID(func, bpf_res_spin_unlock)
 BTF_ID(func, bpf_res_spin_lock_irqsave)
 BTF_ID(func, bpf_res_spin_unlock_irqrestore)
 BTF_ID(func, __bpf_trap)
-BTF_ID(func, bpf_task_work_schedule_signal)
-BTF_ID(func, bpf_task_work_schedule_resume)
+BTF_ID(func, bpf_task_work_schedule_signal_impl)
+BTF_ID(func, bpf_task_work_schedule_resume_impl)
 
 static bool is_task_work_add_kfunc(u32 func_id)
 {
-	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
-	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
+	return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal_impl] ||
+	       func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume_impl];
 }
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index fdee387f0d6b..ae1eb7a85eb4 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -250,12 +250,9 @@ bool cgroup_enable_per_threadgroup_rwsem __read_mostly;
 
 /* cgroup namespace for init task */
 struct cgroup_namespace init_cgroup_ns = {
-	.ns.__ns_ref	= REFCOUNT_INIT(2),
+	.ns		= NS_COMMON_INIT(init_cgroup_ns),
 	.user_ns	= &init_user_ns,
-	.ns.ops		= &cgroupns_operations,
-	.ns.inum	= ns_init_inum(&init_cgroup_ns),
 	.root_cset	= &init_css_set,
-	.ns.ns_type	= ns_common_type(&init_cgroup_ns),
 };
 
 static struct file_system_type cgroup2_fs_type;
@@ -1522,9 +1519,9 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
 	} else {
 		/*
 		 * NOTE: This function may be called from bpf_cgroup_from_id()
-		 * on a task which has already passed exit_task_namespaces() and
-		 * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all
-		 * cgroups visible for lookups.
+		 * on a task which has already passed exit_nsproxy_namespaces()
+		 * and nsproxy == NULL. Fall back to cgrp_dfl_root which will
+		 * make all cgroups visible for lookups.
 		 */
 		return &cgrp_dfl_root.cgrp;
 	}
@@ -5363,7 +5360,6 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	struct cgroup_file_ctx *ctx = of->priv;
 	struct cgroup *src_cgrp, *dst_cgrp;
 	struct task_struct *task;
-	const struct cred *saved_cred;
 	ssize_t ret;
 	enum cgroup_attach_lock_mode lock_mode;
 
@@ -5386,11 +5382,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 	 * permissions using the credentials from file open to protect against
 	 * inherited fd attacks.
 	 */
-	saved_cred = override_creds(of->file->f_cred);
-	ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
-					of->file->f_path.dentry->d_sb,
-					threadgroup, ctx->ns);
-	revert_creds(saved_cred);
+	scoped_with_creds(of->file->f_cred)
+		ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
+						of->file->f_path.dentry->d_sb,
+						threadgroup, ctx->ns);
 	if (ret)
 		goto out_finish;
 
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index fdbe57578e68..db9617556dd7 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -30,7 +30,6 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 	ret = ns_common_init(new_ns);
 	if (ret)
 		return ERR_PTR(ret);
-	ns_tree_add(new_ns);
 	return no_free_ptr(new_ns);
 }
 
@@ -86,6 +85,7 @@ struct cgroup_namespace *copy_cgroup_ns(u64 flags,
 	new_ns->ucounts = ucounts;
 	new_ns->root_cset = cset;
 
+	ns_tree_add(new_ns);
 	return new_ns;
 }
 
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 3b1c43382eec..99dac1aa972a 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct resource *old_res,
 		old_res->start = 0;
 		old_res->end   = 0;
 	} else {
-		crashk_res.end = ram_res->start - 1;
+		old_res->end = ram_res->start - 1;
 	}
 
 	crash_free_reserved_phys_range(ram_res->start, ram_res->end);
diff --git a/kernel/cred.c b/kernel/cred.c
index dbf6b687dc5c..a6f686b30da1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,33 +35,6 @@ do {									\
 
 static struct kmem_cache *cred_jar;
 
-/* init to 2 - one for init_task, one to ensure it is never freed */
-static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) };
-
-/*
- * The initial credentials for the initial task
- */
-struct cred init_cred = {
-	.usage			= ATOMIC_INIT(4),
-	.uid			= GLOBAL_ROOT_UID,
-	.gid			= GLOBAL_ROOT_GID,
-	.suid			= GLOBAL_ROOT_UID,
-	.sgid			= GLOBAL_ROOT_GID,
-	.euid			= GLOBAL_ROOT_UID,
-	.egid			= GLOBAL_ROOT_GID,
-	.fsuid			= GLOBAL_ROOT_UID,
-	.fsgid			= GLOBAL_ROOT_GID,
-	.securebits		= SECUREBITS_DEFAULT,
-	.cap_inheritable	= CAP_EMPTY_SET,
-	.cap_permitted		= CAP_FULL_SET,
-	.cap_effective		= CAP_FULL_SET,
-	.cap_bset		= CAP_FULL_SET,
-	.user			= INIT_USER,
-	.user_ns		= &init_user_ns,
-	.group_info		= &init_groups,
-	.ucounts		= &init_ucounts,
-};
-
 /*
  * The RCU callback to actually dispose of a set of credentials
  */
@@ -306,6 +279,7 @@ int copy_creds(struct task_struct *p, u64 clone_flags)
 		kdebug("share_creds(%p{%ld})",
 		       p->cred, atomic_long_read(&p->cred->usage));
 		inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
+		get_cred_namespaces(p);
 		return 0;
 	}
 
@@ -343,6 +317,8 @@ int copy_creds(struct task_struct *p, u64 clone_flags)
 
 	p->cred = p->real_cred = get_cred(new);
 	inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
+	get_cred_namespaces(p);
+
 	return 0;
 
 error_put:
@@ -435,10 +411,13 @@ int commit_creds(struct cred *new)
 	 */
 	if (new->user != old->user || new->user_ns != old->user_ns)
 		inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
+
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user || new->user_ns != old->user_ns)
 		dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
+	if (new->user_ns != old->user_ns)
+		switch_cred_namespaces(old, new);
 
 	/* send notifications */
 	if (!uid_eq(new->uid,   old->uid)  ||
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 1f9ee9759426..f973e7e73c90 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -481,6 +481,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		case PCI_P2PDMA_MAP_BUS_ADDR:
 			sg->dma_address = pci_p2pdma_bus_addr_map(&p2pdma_state,
 					sg_phys(sg));
+			sg_dma_len(sg) = sg->length;
 			sg_dma_mark_bus_address(sg);
 			continue;
 		default:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fd347da9026..2c35acc2722b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11901,7 +11901,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags)
 
 static void cpu_clock_event_del(struct perf_event *event, int flags)
 {
-	cpu_clock_event_stop(event, flags);
+	cpu_clock_event_stop(event, PERF_EF_UPDATE);
 }
 
 static void cpu_clock_event_read(struct perf_event *event)
diff --git a/kernel/exit.c b/kernel/exit.c
index 9f74e8f1c431..988e16efd66b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -291,6 +291,7 @@ repeat:
 	write_unlock_irq(&tasklist_lock);
 	/* @thread_pid can't go away until free_pids() below */
 	proc_flush_pid(thread_pid);
+	exit_cred_namespaces(p);
 	add_device_randomness(&p->se.sum_exec_runtime,
 			      sizeof(p->se.sum_exec_runtime));
 	free_pids(post.pids);
@@ -962,7 +963,7 @@ void __noreturn do_exit(long code)
 	exit_fs(tsk);
 	if (group_dead)
 		disassociate_ctty(1);
-	exit_task_namespaces(tsk);
+	exit_nsproxy_namespaces(tsk);
 	exit_task_work(tsk);
 	exit_thread(tsk);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 3da0f08615a9..f1857672426e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2453,7 +2453,7 @@ bad_fork_cleanup_io:
 	if (p->io_context)
 		exit_io_context(p);
 bad_fork_cleanup_namespaces:
-	exit_task_namespaces(p);
+	exit_nsproxy_namespaces(p);
 bad_fork_cleanup_mm:
 	if (p->mm) {
 		mm_clear_owner(p->mm, p);
@@ -2487,6 +2487,7 @@ bad_fork_cleanup_delayacct:
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
+	exit_cred_namespaces(p);
 	exit_creds(p);
 bad_fork_free:
 	WRITE_ONCE(p->__state, TASK_DEAD);
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index a08cc076f332..ffde93d051a4 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
 #include <linux/mm.h>
 #include "gcov.h"
 
-#if (__GNUC__ >= 14)
+#if (__GNUC__ >= 15)
+#define GCOV_COUNTERS			10
+#elif (__GNUC__ >= 14)
 #define GCOV_COUNTERS			9
 #elif (__GNUC__ >= 10)
 #define GCOV_COUNTERS			8
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 76f0940fb485..03d12e27189f 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -8,6 +8,7 @@
 
 #define pr_fmt(fmt) "KHO: " fmt
 
+#include <linux/cleanup.h>
 #include <linux/cma.h>
 #include <linux/count_zeros.h>
 #include <linux/debugfs.h>
@@ -22,6 +23,7 @@
 
 #include <asm/early_ioremap.h>
 
+#include "kexec_handover_internal.h"
 /*
  * KHO is tightly coupled with mm init and needs access to some of mm
  * internal APIs.
@@ -67,10 +69,10 @@ early_param("kho", kho_parse_enable);
  * Keep track of memory that is to be preserved across KHO.
  *
  * The serializing side uses two levels of xarrays to manage chunks of per-order
- * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a
- * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations
- * each bitmap will cover 16M of address space. Thus, for 16G of memory at most
- * 512K of bitmap memory will be needed for order 0.
+ * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order
+ * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0
+ * allocations each bitmap will cover 128M of address space. Thus, for 16G of
+ * memory at most 512K of bitmap memory will be needed for order 0.
  *
  * This approach is fully incremental, as the serialization progresses folios
  * can continue be aggregated to the tracker. The final step, immediately prior
@@ -78,12 +80,14 @@ early_param("kho", kho_parse_enable);
  * successor kernel to parse.
  */
 
-#define PRESERVE_BITS (512 * 8)
+#define PRESERVE_BITS (PAGE_SIZE * 8)
 
 struct kho_mem_phys_bits {
 	DECLARE_BITMAP(preserve, PRESERVE_BITS);
 };
 
+static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE);
+
 struct kho_mem_phys {
 	/*
 	 * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized
@@ -131,28 +135,28 @@ static struct kho_out kho_out = {
 	.finalized = false,
 };
 
-static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
+static void *xa_load_or_alloc(struct xarray *xa, unsigned long index)
 {
-	void *elm, *res;
+	void *res = xa_load(xa, index);
+
+	if (res)
+		return res;
 
-	elm = xa_load(xa, index);
-	if (elm)
-		return elm;
+	void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL);
 
-	elm = kzalloc(sz, GFP_KERNEL);
 	if (!elm)
 		return ERR_PTR(-ENOMEM);
 
+	if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE)))
+		return ERR_PTR(-EINVAL);
+
 	res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
 	if (xa_is_err(res))
-		res = ERR_PTR(xa_err(res));
-
-	if (res) {
-		kfree(elm);
+		return ERR_PTR(xa_err(res));
+	else if (res)
 		return res;
-	}
 
-	return elm;
+	return no_free_ptr(elm);
 }
 
 static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
@@ -167,12 +171,12 @@ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
 		const unsigned long pfn_high = pfn >> order;
 
 		physxa = xa_load(&track->orders, order);
-		if (!physxa)
-			continue;
+		if (WARN_ON_ONCE(!physxa))
+			return;
 
 		bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
-		if (!bits)
-			continue;
+		if (WARN_ON_ONCE(!bits))
+			return;
 
 		clear_bit(pfn_high % PRESERVE_BITS, bits->preserve);
 
@@ -216,8 +220,7 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
 		}
 	}
 
-	bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS,
-				sizeof(*bits));
+	bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS);
 	if (IS_ERR(bits))
 		return PTR_ERR(bits);
 
@@ -345,15 +348,19 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
 static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
 					  unsigned long order)
 {
-	struct khoser_mem_chunk *chunk;
+	struct khoser_mem_chunk *chunk __free(free_page) = NULL;
 
-	chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	chunk = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!chunk)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
+
+	if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
+		return ERR_PTR(-EINVAL);
+
 	chunk->hdr.order = order;
 	if (cur_chunk)
 		KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
-	return chunk;
+	return no_free_ptr(chunk);
 }
 
 static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
@@ -374,14 +381,17 @@ static int kho_mem_serialize(struct kho_serialization *ser)
 	struct khoser_mem_chunk *chunk = NULL;
 	struct kho_mem_phys *physxa;
 	unsigned long order;
+	int err = -ENOMEM;
 
 	xa_for_each(&ser->track.orders, order, physxa) {
 		struct kho_mem_phys_bits *bits;
 		unsigned long phys;
 
 		chunk = new_chunk(chunk, order);
-		if (!chunk)
+		if (IS_ERR(chunk)) {
+			err = PTR_ERR(chunk);
 			goto err_free;
+		}
 
 		if (!first_chunk)
 			first_chunk = chunk;
@@ -391,8 +401,10 @@ static int kho_mem_serialize(struct kho_serialization *ser)
 
 			if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
 				chunk = new_chunk(chunk, order);
-				if (!chunk)
+				if (IS_ERR(chunk)) {
+					err = PTR_ERR(chunk);
 					goto err_free;
+				}
 			}
 
 			elm = &chunk->bitmaps[chunk->hdr.num_elms];
@@ -409,7 +421,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
 
 err_free:
 	kho_mem_ser_free(first_chunk);
-	return -ENOMEM;
+	return err;
 }
 
 static void __init deserialize_bitmap(unsigned int order,
@@ -465,8 +477,8 @@ static void __init kho_mem_deserialize(const void *fdt)
  * area for early allocations that happen before page allocator is
  * initialized.
  */
-static struct kho_scratch *kho_scratch;
-static unsigned int kho_scratch_cnt;
+struct kho_scratch *kho_scratch;
+unsigned int kho_scratch_cnt;
 
 /*
  * The scratch areas are scaled by default as percent of memory allocated from
@@ -752,6 +764,9 @@ int kho_preserve_folio(struct folio *folio)
 	const unsigned int order = folio_order(folio);
 	struct kho_mem_track *track = &kho_out.ser.track;
 
+	if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
+		return -EINVAL;
+
 	return __kho_preserve_order(track, pfn, order);
 }
 EXPORT_SYMBOL_GPL(kho_preserve_folio);
@@ -775,6 +790,11 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages)
 	unsigned long failed_pfn = 0;
 	int err = 0;
 
+	if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
+					nr_pages << PAGE_SHIFT))) {
+		return -EINVAL;
+	}
+
 	while (pfn < end_pfn) {
 		const unsigned int order =
 			min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
@@ -862,16 +882,17 @@ err_free:
 	return NULL;
 }
 
-static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk)
+static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk,
+					 unsigned short order)
 {
 	struct kho_mem_track *track = &kho_out.ser.track;
 	unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
 
 	__kho_unpreserve(track, pfn, pfn + 1);
 
-	for (int i = 0; chunk->phys[i]; i++) {
+	for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
 		pfn = PHYS_PFN(chunk->phys[i]);
-		__kho_unpreserve(track, pfn, pfn + 1);
+		__kho_unpreserve(track, pfn, pfn + (1 << order));
 	}
 }
 
@@ -882,7 +903,7 @@ static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc)
 	while (chunk) {
 		struct kho_vmalloc_chunk *tmp = chunk;
 
-		kho_vmalloc_unpreserve_chunk(chunk);
+		kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order);
 
 		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
 		free_page((unsigned long)tmp);
@@ -992,7 +1013,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
 	while (chunk) {
 		struct page *page;
 
-		for (int i = 0; chunk->phys[i]; i++) {
+		for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) {
 			phys_addr_t phys = chunk->phys[i];
 
 			if (idx + contig_pages > total_pages)
diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c
new file mode 100644
index 000000000000..6efb696f5426
--- /dev/null
+++ b/kernel/kexec_handover_debug.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec_handover_debug.c - kexec handover optional debug functionality
+ * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
+ */
+
+#define pr_fmt(fmt) "KHO: " fmt
+
+#include "kexec_handover_internal.h"
+
+bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+	phys_addr_t scratch_start, scratch_end;
+	unsigned int i;
+
+	for (i = 0; i < kho_scratch_cnt; i++) {
+		scratch_start = kho_scratch[i].addr;
+		scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
+
+		if (phys < scratch_end && (phys + size) > scratch_start)
+			return true;
+	}
+
+	return false;
+}
diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h
new file mode 100644
index 000000000000..3c3c7148ceed
--- /dev/null
+++ b/kernel/kexec_handover_internal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
+#define LINUX_KEXEC_HANDOVER_INTERNAL_H
+
+#include <linux/kexec_handover.h>
+#include <linux/types.h>
+
+extern struct kho_scratch *kho_scratch;
+extern unsigned int kho_scratch_cnt;
+
+#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
+bool kho_scratch_overlap(phys_addr_t phys, size_t size);
+#else
+static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
+{
+	return false;
+}
+#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
+
+#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 949103fd8e9b..2c6b02d4699b 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -78,16 +78,8 @@ void debug_mutex_unlock(struct mutex *lock)
 	}
 }
 
-void debug_mutex_init(struct mutex *lock, const char *name,
-		      struct lock_class_key *key)
+void debug_mutex_init(struct mutex *lock)
 {
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/*
-	 * Make sure we are not reinitializing a held lock:
-	 */
-	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
-#endif
 	lock->magic = lock;
 }
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index de7d6702cd96..2a1d165b3167 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -43,8 +43,7 @@
 # define MUTEX_WARN_ON(cond)
 #endif
 
-void
-__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
+static void __mutex_init_generic(struct mutex *lock)
 {
 	atomic_long_set(&lock->owner, 0);
 	raw_spin_lock_init(&lock->wait_lock);
@@ -52,10 +51,8 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 	osq_lock_init(&lock->osq);
 #endif
-
-	debug_mutex_init(lock, name, key);
+	debug_mutex_init(lock);
 }
-EXPORT_SYMBOL(__mutex_init);
 
 static inline struct task_struct *__owner_task(unsigned long owner)
 {
@@ -142,6 +139,11 @@ static inline bool __mutex_trylock(struct mutex *lock)
  * There is nothing that would stop spreading the lockdep annotations outwards
  * except more code.
  */
+void mutex_init_generic(struct mutex *lock)
+{
+	__mutex_init_generic(lock);
+}
+EXPORT_SYMBOL(mutex_init_generic);
 
 /*
  * Optimistic trylock that only works in the uncontended case. Make sure to
@@ -166,7 +168,21 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
 
 	return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL);
 }
-#endif
+
+#else /* !CONFIG_DEBUG_LOCK_ALLOC */
+
+void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key)
+{
+	__mutex_init_generic(lock);
+
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(mutex_init_lockep);
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
 {
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 2e8080a9bee3..9ad4da8cea00 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -59,8 +59,7 @@ extern void debug_mutex_add_waiter(struct mutex *lock,
 extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 				      struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
-extern void debug_mutex_init(struct mutex *lock, const char *name,
-			     struct lock_class_key *key);
+extern void debug_mutex_init(struct mutex *lock);
 #else /* CONFIG_DEBUG_MUTEXES */
 # define debug_mutex_lock_common(lock, waiter)		do { } while (0)
 # define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
@@ -68,6 +67,6 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 # define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
 # define debug_mutex_remove_waiter(lock, waiter, ti)	do { } while (0)
 # define debug_mutex_unlock(lock)			do { } while (0)
-# define debug_mutex_init(lock, name, key)		do { } while (0)
+# define debug_mutex_init(lock)				do { } while (0)
 #endif /* !CONFIG_DEBUG_MUTEXES */
 #endif /* CONFIG_PREEMPT_RT */
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index bafd5af98eae..59dbd29cb219 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -515,13 +515,11 @@ void rt_mutex_debug_task_free(struct task_struct *task)
 
 #ifdef CONFIG_PREEMPT_RT
 /* Mutexes */
-void __mutex_rt_init(struct mutex *mutex, const char *name,
-		     struct lock_class_key *key)
+static void __mutex_rt_init_generic(struct mutex *mutex)
 {
+	rt_mutex_base_init(&mutex->rtmutex);
 	debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
-	lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
 }
-EXPORT_SYMBOL(__mutex_rt_init);
 
 static __always_inline int __mutex_lock_common(struct mutex *lock,
 					       unsigned int state,
@@ -542,6 +540,13 @@ static __always_inline int __mutex_lock_common(struct mutex *lock,
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
+void mutex_rt_init_lockdep(struct mutex *mutex, const char *name, struct lock_class_key *key)
+{
+	__mutex_rt_init_generic(mutex);
+	lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(mutex_rt_init_lockdep);
+
 void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
@@ -598,6 +603,12 @@ int __sched _mutex_trylock_nest_lock(struct mutex *lock,
 EXPORT_SYMBOL_GPL(_mutex_trylock_nest_lock);
 #else /* CONFIG_DEBUG_LOCK_ALLOC */
 
+void mutex_rt_init_generic(struct mutex *mutex)
+{
+	__mutex_rt_init_generic(mutex);
+}
+EXPORT_SYMBOL(mutex_rt_init_generic);
+
 void __sched mutex_lock(struct mutex *lock)
 {
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 87b03d2e41db..2338b3adfb55 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -184,8 +184,8 @@ void do_raw_read_unlock(rwlock_t *lock)
 static inline void debug_write_lock_before(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
-	RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+	RWLOCK_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+	RWLOCK_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
 							lock, "cpu recursion");
 }
 
diff --git a/kernel/nscommon.c b/kernel/nscommon.c
index c1fb2bad6d72..bdc3c86231d3 100644
--- a/kernel/nscommon.c
+++ b/kernel/nscommon.c
@@ -1,7 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
 
 #include <linux/ns_common.h>
+#include <linux/nstree.h>
 #include <linux/proc_ns.h>
+#include <linux/user_namespace.h>
 #include <linux/vfsdebug.h>
 
 #ifdef CONFIG_DEBUG_VFS
@@ -52,26 +55,257 @@ static void ns_debug(struct ns_common *ns, const struct proc_ns_operations *ops)
 
 int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum)
 {
+	int ret = 0;
+
 	refcount_set(&ns->__ns_ref, 1);
 	ns->stashed = NULL;
 	ns->ops = ops;
 	ns->ns_id = 0;
 	ns->ns_type = ns_type;
-	RB_CLEAR_NODE(&ns->ns_tree_node);
-	INIT_LIST_HEAD(&ns->ns_list_node);
+	ns_tree_node_init(&ns->ns_tree_node);
+	ns_tree_node_init(&ns->ns_unified_node);
+	ns_tree_node_init(&ns->ns_owner_node);
+	ns_tree_root_init(&ns->ns_owner_root);
 
 #ifdef CONFIG_DEBUG_VFS
 	ns_debug(ns, ops);
 #endif
 
-	if (inum) {
+	if (inum)
 		ns->inum = inum;
-		return 0;
-	}
-	return proc_alloc_inum(&ns->inum);
+	else
+		ret = proc_alloc_inum(&ns->inum);
+	if (ret)
+		return ret;
+	/*
+	 * Tree ref starts at 0. It's incremented when namespace enters
+	 * active use (installed in nsproxy) and decremented when all
+	 * active uses are gone. Initial namespaces are always active.
+	 */
+	if (is_ns_init_inum(ns))
+		atomic_set(&ns->__ns_ref_active, 1);
+	else
+		atomic_set(&ns->__ns_ref_active, 0);
+	return 0;
 }
 
 void __ns_common_free(struct ns_common *ns)
 {
 	proc_free_inum(ns->inum);
 }
+
+struct ns_common *__must_check ns_owner(struct ns_common *ns)
+{
+	struct user_namespace *owner;
+
+	if (unlikely(!ns->ops))
+		return NULL;
+	VFS_WARN_ON_ONCE(!ns->ops->owner);
+	owner = ns->ops->owner(ns);
+	VFS_WARN_ON_ONCE(!owner && ns != to_ns_common(&init_user_ns));
+	if (!owner)
+		return NULL;
+	/* Skip init_user_ns as it's always active */
+	if (owner == &init_user_ns)
+		return NULL;
+	return to_ns_common(owner);
+}
+
+/*
+ * The active reference count works by having each namespace that gets
+ * created take a single active reference on its owning user namespace.
+ * That single reference is only released once the child namespace's
+ * active count itself goes down.
+ *
+ * A regular namespace tree might look as follow:
+ * Legend:
+ * + : adding active reference
+ * - : dropping active reference
+ * x : always active (initial namespace)
+ *
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        +      +
+ *                        user_ns1 (2)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        +   +   +
+ *                        user_ns2 (3)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * If both net_ns and pid_ns put their last active reference on
+ * themselves it will cascade to user_ns1 dropping its own active
+ * reference and dropping one active reference on user_ns2:
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        -      -
+ *                        user_ns1 (0)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        +   -   +
+ *                        user_ns2 (2)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * The iteration stops once we reach a namespace that still has active
+ * references.
+ */
+void __ns_ref_active_put(struct ns_common *ns)
+{
+	/* Initial namespaces are always active. */
+	if (is_ns_init_id(ns))
+		return;
+
+	if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
+		VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
+		return;
+	}
+
+	VFS_WARN_ON_ONCE(is_ns_init_id(ns));
+	VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
+
+	for (;;) {
+		ns = ns_owner(ns);
+		if (!ns)
+			return;
+		VFS_WARN_ON_ONCE(is_ns_init_id(ns));
+		if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
+			VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
+			return;
+		}
+	}
+}
+
+/*
+ * The active reference count works by having each namespace that gets
+ * created take a single active reference on its owning user namespace.
+ * That single reference is only released once the child namespace's
+ * active count itself goes down. This makes it possible to efficiently
+ * resurrect a namespace tree:
+ *
+ * A regular namespace tree might look as follow:
+ * Legend:
+ * + : adding active reference
+ * - : dropping active reference
+ * x : always active (initial namespace)
+ *
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        +      +
+ *                        user_ns1 (2)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        +   +   +
+ *                        user_ns2 (3)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * If both net_ns and pid_ns put their last active reference on
+ * themselves it will cascade to user_ns1 dropping its own active
+ * reference and dropping one active reference on user_ns2:
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        -      -
+ *                        user_ns1 (0)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        +   -   +
+ *                        user_ns2 (2)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * Assume the whole tree is dead but all namespaces are still active:
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        -      -
+ *                        user_ns1 (0)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        -   -   -
+ *                        user_ns2 (0)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * Now assume the net_ns gets resurrected (.e.g., via the SIOCGSKNS ioctl()):
+ *
+ *                 net_ns          pid_ns
+ *                       \        /
+ *                        +      -
+ *                        user_ns1 (0)
+ *                            |
+ *                 ipc_ns     |     uts_ns
+ *                       \    |    /
+ *                        -   +   -
+ *                        user_ns2 (0)
+ *                            |
+ *            cgroup_ns       |       mnt_ns
+ *                     \      |      /
+ *                      x     x     x
+ *                      init_user_ns (1)
+ *
+ * If net_ns had a zero reference count and we bumped it we also need to
+ * take another reference on its owning user namespace. Similarly, if
+ * pid_ns had a zero reference count it also needs to take another
+ * reference on its owning user namespace. So both net_ns and pid_ns
+ * will each have their own reference on the owning user namespace.
+ *
+ * If the owning user namespace user_ns1 had a zero reference count then
+ * it also needs to take another reference on its owning user namespace
+ * and so on.
+ */
+void __ns_ref_active_get(struct ns_common *ns)
+{
+	int prev;
+
+	/* Initial namespaces are always active. */
+	if (is_ns_init_id(ns))
+		return;
+
+	/* If we didn't resurrect the namespace we're done. */
+	prev = atomic_fetch_add(1, &ns->__ns_ref_active);
+	VFS_WARN_ON_ONCE(prev < 0);
+	if (likely(prev))
+		return;
+
+	/*
+	 * We did resurrect it. Walk the ownership hierarchy upwards
+	 * until we found an owning user namespace that is active.
+	 */
+	for (;;) {
+		ns = ns_owner(ns);
+		if (!ns)
+			return;
+
+		VFS_WARN_ON_ONCE(is_ns_init_id(ns));
+		prev = atomic_fetch_add(1, &ns->__ns_ref_active);
+		VFS_WARN_ON_ONCE(prev < 0);
+		if (likely(prev))
+			return;
+	}
+}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 19aa64ab08c8..259c4b4f1eeb 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
 #include <linux/syscalls.h>
 #include <linux/cgroup.h>
 #include <linux/perf_event.h>
+#include <linux/nstree.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
@@ -59,6 +60,25 @@ static inline struct nsproxy *create_nsproxy(void)
 	return nsproxy;
 }
 
+static inline void nsproxy_free(struct nsproxy *ns)
+{
+	put_mnt_ns(ns->mnt_ns);
+	put_uts_ns(ns->uts_ns);
+	put_ipc_ns(ns->ipc_ns);
+	put_pid_ns(ns->pid_ns_for_children);
+	put_time_ns(ns->time_ns);
+	put_time_ns(ns->time_ns_for_children);
+	put_cgroup_ns(ns->cgroup_ns);
+	put_net(ns->net_ns);
+	kmem_cache_free(nsproxy_cachep, ns);
+}
+
+void deactivate_nsproxy(struct nsproxy *ns)
+{
+	nsproxy_ns_active_put(ns);
+	nsproxy_free(ns);
+}
+
 /*
  * Create new nsproxy and all of its the associated namespaces.
  * Return the newly created nsproxy.  Do not attach this to the task,
@@ -179,23 +199,11 @@ int copy_namespaces(u64 flags, struct task_struct *tsk)
 	if ((flags & CLONE_VM) == 0)
 		timens_on_fork(new_ns, tsk);
 
+	nsproxy_ns_active_get(new_ns);
 	tsk->nsproxy = new_ns;
 	return 0;
 }
 
-void free_nsproxy(struct nsproxy *ns)
-{
-	put_mnt_ns(ns->mnt_ns);
-	put_uts_ns(ns->uts_ns);
-	put_ipc_ns(ns->ipc_ns);
-	put_pid_ns(ns->pid_ns_for_children);
-	put_time_ns(ns->time_ns);
-	put_time_ns(ns->time_ns_for_children);
-	put_cgroup_ns(ns->cgroup_ns);
-	put_net(ns->net_ns);
-	kmem_cache_free(nsproxy_cachep, ns);
-}
-
 /*
  * Called from unshare. Unshare all the namespaces part of nsproxy.
  * On success, returns the new nsproxy.
@@ -232,6 +240,9 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 
 	might_sleep();
 
+	if (new)
+		nsproxy_ns_active_get(new);
+
 	task_lock(p);
 	ns = p->nsproxy;
 	p->nsproxy = new;
@@ -241,11 +252,27 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 		put_nsproxy(ns);
 }
 
-void exit_task_namespaces(struct task_struct *p)
+void exit_nsproxy_namespaces(struct task_struct *p)
 {
 	switch_task_namespaces(p, NULL);
 }
 
+void switch_cred_namespaces(const struct cred *old, const struct cred *new)
+{
+	ns_ref_active_get(new->user_ns);
+	ns_ref_active_put(old->user_ns);
+}
+
+void get_cred_namespaces(struct task_struct *tsk)
+{
+	ns_ref_active_get(tsk->real_cred->user_ns);
+}
+
+void exit_cred_namespaces(struct task_struct *tsk)
+{
+	ns_ref_active_put(tsk->real_cred->user_ns);
+}
+
 int exec_task_namespaces(void)
 {
 	struct task_struct *tsk = current;
@@ -315,7 +342,7 @@ static void put_nsset(struct nsset *nsset)
 	if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
 		free_fs_struct(nsset->fs);
 	if (nsset->nsproxy)
-		free_nsproxy(nsset->nsproxy);
+		nsproxy_free(nsset->nsproxy);
 }
 
 static int prepare_nsset(unsigned flags, struct nsset *nsset)
diff --git a/kernel/nstree.c b/kernel/nstree.c
index b24a320a11a6..f36c59e6951d 100644
--- a/kernel/nstree.c
+++ b/kernel/nstree.c
@@ -1,140 +1,261 @@
 // SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
 
 #include <linux/nstree.h>
 #include <linux/proc_ns.h>
+#include <linux/rculist.h>
 #include <linux/vfsdebug.h>
+#include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 
-/**
- * struct ns_tree - Namespace tree
- * @ns_tree: Rbtree of namespaces of a particular type
- * @ns_list: Sequentially walkable list of all namespaces of this type
- * @ns_tree_lock: Seqlock to protect the tree and list
- * @type: type of namespaces in this tree
- */
-struct ns_tree {
-       struct rb_root ns_tree;
-       struct list_head ns_list;
-       seqlock_t ns_tree_lock;
-       int type;
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
+
+DEFINE_LOCK_GUARD_0(ns_tree_writer,
+		    write_seqlock(&ns_tree_lock),
+		    write_sequnlock(&ns_tree_lock))
+
+DEFINE_LOCK_GUARD_0(ns_tree_locked_reader,
+		    read_seqlock_excl(&ns_tree_lock),
+		    read_sequnlock_excl(&ns_tree_lock))
+
+static struct ns_tree_root ns_unified_root = { /* protected by ns_tree_lock */
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(ns_unified_root.ns_list_head),
 };
 
-struct ns_tree mnt_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(mnt_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWNS,
+struct ns_tree_root mnt_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(mnt_ns_tree.ns_list_head),
 };
 
-struct ns_tree net_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(net_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWNET,
+struct ns_tree_root net_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(net_ns_tree.ns_list_head),
 };
 EXPORT_SYMBOL_GPL(net_ns_tree);
 
-struct ns_tree uts_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(uts_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWUTS,
+struct ns_tree_root uts_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(uts_ns_tree.ns_list_head),
 };
 
-struct ns_tree user_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(user_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWUSER,
+struct ns_tree_root user_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(user_ns_tree.ns_list_head),
 };
 
-struct ns_tree ipc_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(ipc_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWIPC,
+struct ns_tree_root ipc_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(ipc_ns_tree.ns_list_head),
 };
 
-struct ns_tree pid_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(pid_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWPID,
+struct ns_tree_root pid_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(pid_ns_tree.ns_list_head),
 };
 
-struct ns_tree cgroup_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(cgroup_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWCGROUP,
+struct ns_tree_root cgroup_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(cgroup_ns_tree.ns_list_head),
 };
 
-struct ns_tree time_ns_tree = {
-	.ns_tree = RB_ROOT,
-	.ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list),
-	.ns_tree_lock = __SEQLOCK_UNLOCKED(time_ns_tree.ns_tree_lock),
-	.type = CLONE_NEWTIME,
+struct ns_tree_root time_ns_tree = {
+	.ns_rb = RB_ROOT,
+	.ns_list_head = LIST_HEAD_INIT(time_ns_tree.ns_list_head),
 };
 
-DEFINE_COOKIE(namespace_cookie);
+/**
+ * ns_tree_node_init - Initialize a namespace tree node
+ * @node: The node to initialize
+ *
+ * Initializes both the rbtree node and list entry.
+ */
+void ns_tree_node_init(struct ns_tree_node *node)
+{
+	RB_CLEAR_NODE(&node->ns_node);
+	INIT_LIST_HEAD(&node->ns_list_entry);
+}
+
+/**
+ * ns_tree_root_init - Initialize a namespace tree root
+ * @root: The root to initialize
+ *
+ * Initializes both the rbtree root and list head.
+ */
+void ns_tree_root_init(struct ns_tree_root *root)
+{
+	root->ns_rb = RB_ROOT;
+	INIT_LIST_HEAD(&root->ns_list_head);
+}
+
+/**
+ * ns_tree_node_empty - Check if a namespace tree node is empty
+ * @node: The node to check
+ *
+ * Returns true if the node is not in any tree.
+ */
+bool ns_tree_node_empty(const struct ns_tree_node *node)
+{
+	return RB_EMPTY_NODE(&node->ns_node);
+}
+
+/**
+ * ns_tree_node_add - Add a node to a namespace tree
+ * @node: The node to add
+ * @root: The tree root to add to
+ * @cmp: Comparison function for rbtree insertion
+ *
+ * Adds the node to both the rbtree and the list, maintaining sorted order.
+ * The list is maintained in the same order as the rbtree to enable efficient
+ * iteration.
+ *
+ * Returns: NULL if insertion succeeded, existing node if duplicate found
+ */
+struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
+				  struct ns_tree_root *root,
+				  int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+	struct rb_node *ret, *prev;
+
+	/* Add to rbtree */
+	ret = rb_find_add_rcu(&node->ns_node, &root->ns_rb, cmp);
+
+	/* Add to list in sorted order */
+	prev = rb_prev(&node->ns_node);
+	if (!prev) {
+		/* No previous node, add at head */
+		list_add_rcu(&node->ns_list_entry, &root->ns_list_head);
+	} else {
+		/* Add after previous node */
+		struct ns_tree_node *prev_node;
+		prev_node = rb_entry(prev, struct ns_tree_node, ns_node);
+		list_add_rcu(&node->ns_list_entry, &prev_node->ns_list_entry);
+	}
+
+	return ret;
+}
+
+/**
+ * ns_tree_node_del - Remove a node from a namespace tree
+ * @node: The node to remove
+ * @root: The tree root to remove from
+ *
+ * Removes the node from both the rbtree and the list atomically.
+ */
+void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root)
+{
+	rb_erase(&node->ns_node, &root->ns_rb);
+	RB_CLEAR_NODE(&node->ns_node);
+	list_bidir_del_rcu(&node->ns_list_entry);
+}
 
 static inline struct ns_common *node_to_ns(const struct rb_node *node)
 {
 	if (!node)
 		return NULL;
-	return rb_entry(node, struct ns_common, ns_tree_node);
+	return rb_entry(node, struct ns_common, ns_tree_node.ns_node);
 }
 
-static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
+static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
 {
-	struct ns_common *ns_a = node_to_ns(a);
-	struct ns_common *ns_b = node_to_ns(b);
-	u64 ns_id_a = ns_a->ns_id;
-	u64 ns_id_b = ns_b->ns_id;
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct ns_common, ns_unified_node.ns_node);
+}
 
-	if (ns_id_a < ns_id_b)
+static inline struct ns_common *node_to_ns_owner(const struct rb_node *node)
+{
+	if (!node)
+		return NULL;
+	return rb_entry(node, struct ns_common, ns_owner_node.ns_node);
+}
+
+static int ns_id_cmp(u64 id_a, u64 id_b)
+{
+	if (id_a < id_b)
 		return -1;
-	if (ns_id_a > ns_id_b)
+	if (id_a > id_b)
 		return 1;
 	return 0;
 }
 
-void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
+static int ns_cmp(struct rb_node *a, const struct rb_node *b)
+{
+	return ns_id_cmp(node_to_ns(a)->ns_id, node_to_ns(b)->ns_id);
+}
+
+static int ns_cmp_unified(struct rb_node *a, const struct rb_node *b)
+{
+	return ns_id_cmp(node_to_ns_unified(a)->ns_id, node_to_ns_unified(b)->ns_id);
+}
+
+static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b)
 {
-	struct rb_node *node, *prev;
+	return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id);
+}
+
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree)
+{
+	struct rb_node *node;
+	const struct proc_ns_operations *ops = ns->ops;
 
 	VFS_WARN_ON_ONCE(!ns->ns_id);
 
-	write_seqlock(&ns_tree->ns_tree_lock);
+	guard(ns_tree_writer)();
 
-	VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
+	/* Add to per-type tree and list */
+	node = ns_tree_node_add(&ns->ns_tree_node, ns_tree, ns_cmp);
 
-	node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp);
-	/*
-	 * If there's no previous entry simply add it after the
-	 * head and if there is add it after the previous entry.
-	 */
-	prev = rb_prev(&ns->ns_tree_node);
-	if (!prev)
-		list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list);
-	else
-		list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);
+	/* Add to unified tree and list */
+	ns_tree_node_add(&ns->ns_unified_node, &ns_unified_root, ns_cmp_unified);
+
+	/* Add to owner's tree if applicable */
+	if (ops) {
+		struct user_namespace *user_ns;
 
-	write_sequnlock(&ns_tree->ns_tree_lock);
+		VFS_WARN_ON_ONCE(!ops->owner);
+		user_ns = ops->owner(ns);
+		if (user_ns) {
+			struct ns_common *owner = &user_ns->ns;
+			VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
+
+			/* Insert into owner's tree and list */
+			ns_tree_node_add(&ns->ns_owner_node, &owner->ns_owner_root, ns_cmp_owner);
+		} else {
+			/* Only the initial user namespace doesn't have an owner. */
+			VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns));
+		}
+	}
 
 	VFS_WARN_ON_ONCE(node);
 }
 
-void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree)
 {
-	VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));
-	VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
-	VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
+	const struct proc_ns_operations *ops = ns->ops;
+	struct user_namespace *user_ns;
+
+	VFS_WARN_ON_ONCE(ns_tree_node_empty(&ns->ns_tree_node));
+	VFS_WARN_ON_ONCE(list_empty(&ns->ns_tree_node.ns_list_entry));
+
+	write_seqlock(&ns_tree_lock);
+
+	/* Remove from per-type tree and list */
+	ns_tree_node_del(&ns->ns_tree_node, ns_tree);
+
+	/* Remove from unified tree and list */
+	ns_tree_node_del(&ns->ns_unified_node, &ns_unified_root);
 
-	write_seqlock(&ns_tree->ns_tree_lock);
-	rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
-	list_bidir_del_rcu(&ns->ns_list_node);
-	RB_CLEAR_NODE(&ns->ns_tree_node);
-	write_sequnlock(&ns_tree->ns_tree_lock);
+	/* Remove from owner's tree if applicable */
+	if (ops) {
+		user_ns = ops->owner(ns);
+		if (user_ns) {
+			struct ns_common *owner = &user_ns->ns;
+			ns_tree_node_del(&ns->ns_owner_node, &owner->ns_owner_root);
+		}
+	}
+
+	write_sequnlock(&ns_tree_lock);
 }
 EXPORT_SYMBOL_GPL(__ns_tree_remove);
 
@@ -150,8 +271,19 @@ static int ns_find(const void *key, const struct rb_node *node)
 	return 0;
 }
 
+static int ns_find_unified(const void *key, const struct rb_node *node)
+{
+	const u64 ns_id = *(u64 *)key;
+	const struct ns_common *ns = node_to_ns_unified(node);
 
-static struct ns_tree *ns_tree_from_type(int ns_type)
+	if (ns_id < ns->ns_id)
+		return -1;
+	if (ns_id > ns->ns_id)
+		return 1;
+	return 0;
+}
+
+static struct ns_tree_root *ns_tree_from_type(int ns_type)
 {
 	switch (ns_type) {
 	case CLONE_NEWCGROUP:
@@ -175,73 +307,507 @@ static struct ns_tree *ns_tree_from_type(int ns_type)
 	return NULL;
 }
 
-struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
 {
-	struct ns_tree *ns_tree;
 	struct rb_node *node;
 	unsigned int seq;
 
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
+	do {
+		seq = read_seqbegin(&ns_tree_lock);
+		node = rb_find_rcu(&ns_id, &ns_unified_root.ns_rb, ns_find_unified);
+		if (node)
+			break;
+	} while (read_seqretry(&ns_tree_lock, seq));
+
+	return node_to_ns_unified(node);
+}
+
+static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+{
+	struct ns_tree_root *ns_tree;
+	struct rb_node *node;
+	unsigned int seq;
 
 	ns_tree = ns_tree_from_type(ns_type);
 	if (!ns_tree)
 		return NULL;
 
 	do {
-		seq = read_seqbegin(&ns_tree->ns_tree_lock);
-		node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find);
+		seq = read_seqbegin(&ns_tree_lock);
+		node = rb_find_rcu(&ns_id, &ns_tree->ns_rb, ns_find);
 		if (node)
 			break;
-	} while (read_seqretry(&ns_tree->ns_tree_lock, seq));
+	} while (read_seqretry(&ns_tree_lock, seq));
 
-	if (!node)
-		return NULL;
+	return node_to_ns(node);
+}
 
-	VFS_WARN_ON_ONCE(node_to_ns(node)->ns_type != ns_type);
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
 
-	return node_to_ns(node);
+	if (ns_type)
+		return __ns_tree_lookup_rcu(ns_id, ns_type);
+
+	return __ns_unified_tree_lookup_rcu(ns_id);
 }
 
 /**
- * ns_tree_adjoined_rcu - find the next/previous namespace in the same
+ * __ns_tree_adjoined_rcu - find the next/previous namespace in the same
  * tree
  * @ns: namespace to start from
+ * @ns_tree: namespace tree to search in
  * @previous: if true find the previous namespace, otherwise the next
  *
  * Find the next or previous namespace in the same tree as @ns. If
  * there is no next/previous namespace, -ENOENT is returned.
  */
 struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
-					 struct ns_tree *ns_tree, bool previous)
+					 struct ns_tree_root *ns_tree, bool previous)
 {
 	struct list_head *list;
 
 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
 
 	if (previous)
-		list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node));
+		list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_tree_node.ns_list_entry));
 	else
-		list = rcu_dereference(list_next_rcu(&ns->ns_list_node));
-	if (list_is_head(list, &ns_tree->ns_list))
+		list = rcu_dereference(list_next_rcu(&ns->ns_tree_node.ns_list_entry));
+	if (list_is_head(list, &ns_tree->ns_list_head))
 		return ERR_PTR(-ENOENT);
 
-	VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ns_type != ns_tree->type);
-
-	return list_entry_rcu(list, struct ns_common, ns_list_node);
+	return list_entry_rcu(list, struct ns_common, ns_tree_node.ns_list_entry);
 }
 
 /**
- * ns_tree_gen_id - generate a new namespace id
+ * __ns_tree_gen_id - generate a new namespace id
  * @ns: namespace to generate id for
+ * @id: if non-zero, this is the initial namespace and this is a fixed id
  *
  * Generates a new namespace id and assigns it to the namespace. All
  * namespaces types share the same id space and thus can be compared
  * directly. IOW, when two ids of two namespace are equal, they are
  * identical.
  */
-u64 ns_tree_gen_id(struct ns_common *ns)
+u64 __ns_tree_gen_id(struct ns_common *ns, u64 id)
 {
-	guard(preempt)();
-	ns->ns_id = gen_cookie_next(&namespace_cookie);
+	static atomic64_t namespace_cookie = ATOMIC64_INIT(NS_LAST_INIT_ID + 1);
+
+	if (id)
+		ns->ns_id = id;
+	else
+		ns->ns_id = atomic64_inc_return(&namespace_cookie);
 	return ns->ns_id;
 }
+
+struct klistns {
+	u64 __user *uns_ids;
+	u32 nr_ns_ids;
+	u64 last_ns_id;
+	u64 user_ns_id;
+	u32 ns_type;
+	struct user_namespace *user_ns;
+	bool userns_capable;
+	struct ns_common *first_ns;
+};
+
+static void __free_klistns_free(const struct klistns *kls)
+{
+	if (kls->user_ns_id != LISTNS_CURRENT_USER)
+		put_user_ns(kls->user_ns);
+	if (kls->first_ns && kls->first_ns->ops)
+		kls->first_ns->ops->put(kls->first_ns);
+}
+
+#define NS_ALL (PID_NS | USER_NS | MNT_NS | UTS_NS | IPC_NS | NET_NS | CGROUP_NS | TIME_NS)
+
+static int copy_ns_id_req(const struct ns_id_req __user *req,
+			  struct ns_id_req *kreq)
+{
+	int ret;
+	size_t usize;
+
+	BUILD_BUG_ON(sizeof(struct ns_id_req) != NS_ID_REQ_SIZE_VER0);
+
+	ret = get_user(usize, &req->size);
+	if (ret)
+		return -EFAULT;
+	if (unlikely(usize > PAGE_SIZE))
+		return -E2BIG;
+	if (unlikely(usize < NS_ID_REQ_SIZE_VER0))
+		return -EINVAL;
+	memset(kreq, 0, sizeof(*kreq));
+	ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
+	if (ret)
+		return ret;
+	if (kreq->spare != 0)
+		return -EINVAL;
+	if (kreq->ns_type & ~NS_ALL)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static inline int prepare_klistns(struct klistns *kls, struct ns_id_req *kreq,
+				  u64 __user *ns_ids, size_t nr_ns_ids)
+{
+	kls->last_ns_id = kreq->ns_id;
+	kls->user_ns_id = kreq->user_ns_id;
+	kls->nr_ns_ids	= nr_ns_ids;
+	kls->ns_type	= kreq->ns_type;
+	kls->uns_ids	= ns_ids;
+	return 0;
+}
+
+/*
+ * Lookup a namespace owned by owner with id >= ns_id.
+ * Returns the namespace with the smallest id that is >= ns_id.
+ */
+static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner)
+{
+	struct ns_common *ret = NULL;
+	struct rb_node *node;
+
+	VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
+
+	guard(ns_tree_locked_reader)();
+
+	node = owner->ns_owner_root.ns_rb.rb_node;
+	while (node) {
+		struct ns_common *ns;
+
+		ns = node_to_ns_owner(node);
+		if (ns_id <= ns->ns_id) {
+			ret = ns;
+			if (ns_id == ns->ns_id)
+				break;
+			node = node->rb_left;
+		} else {
+			node = node->rb_right;
+		}
+	}
+
+	if (ret)
+		ret = ns_get_unless_inactive(ret);
+	return ret;
+}
+
+static struct ns_common *lookup_ns_id(u64 mnt_ns_id, int ns_type)
+{
+	struct ns_common *ns;
+
+	guard(rcu)();
+	ns = ns_tree_lookup_rcu(mnt_ns_id, ns_type);
+	if (!ns)
+		return NULL;
+
+	if (!ns_get_unless_inactive(ns))
+		return NULL;
+
+	return ns;
+}
+
+static inline bool __must_check ns_requested(const struct klistns *kls,
+					     const struct ns_common *ns)
+{
+	return !kls->ns_type || (kls->ns_type & ns->ns_type);
+}
+
+static inline bool __must_check may_list_ns(const struct klistns *kls,
+					    struct ns_common *ns)
+{
+	if (kls->user_ns) {
+		if (kls->userns_capable)
+			return true;
+	} else {
+		struct ns_common *owner;
+		struct user_namespace *user_ns;
+
+		owner = ns_owner(ns);
+		if (owner)
+			user_ns = to_user_ns(owner);
+		else
+			user_ns = &init_user_ns;
+		if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN))
+			return true;
+	}
+
+	if (is_current_namespace(ns))
+		return true;
+
+	if (ns->ns_type != CLONE_NEWUSER)
+		return false;
+
+	if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN))
+		return true;
+
+	return false;
+}
+
+static inline void ns_put(struct ns_common *ns)
+{
+	if (ns && ns->ops)
+		ns->ops->put(ns);
+}
+
+DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) ns_put(_T))
+
+static inline struct ns_common *__must_check legitimize_ns(const struct klistns *kls,
+							   struct ns_common *candidate)
+{
+	struct ns_common *ns __free(ns_put) = NULL;
+
+	if (!ns_requested(kls, candidate))
+		return NULL;
+
+	ns = ns_get_unless_inactive(candidate);
+	if (!ns)
+		return NULL;
+
+	if (!may_list_ns(kls, ns))
+		return NULL;
+
+	return no_free_ptr(ns);
+}
+
+static ssize_t do_listns_userns(struct klistns *kls)
+{
+	u64 __user *ns_ids = kls->uns_ids;
+	size_t nr_ns_ids = kls->nr_ns_ids;
+	struct ns_common *ns = NULL, *first_ns = NULL, *prev = NULL;
+	const struct list_head *head;
+	ssize_t ret;
+
+	VFS_WARN_ON_ONCE(!kls->user_ns_id);
+
+	if (kls->user_ns_id == LISTNS_CURRENT_USER)
+		ns = to_ns_common(current_user_ns());
+	else if (kls->user_ns_id)
+		ns = lookup_ns_id(kls->user_ns_id, CLONE_NEWUSER);
+	if (!ns)
+		return -EINVAL;
+	kls->user_ns = to_user_ns(ns);
+
+	/*
+	 * Use the rbtree to find the first namespace we care about and
+	 * then use it's list entry to iterate from there.
+	 */
+	if (kls->last_ns_id) {
+		kls->first_ns = lookup_ns_owner_at(kls->last_ns_id + 1, ns);
+		if (!kls->first_ns)
+			return -ENOENT;
+		first_ns = kls->first_ns;
+	}
+
+	ret = 0;
+	head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
+	kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
+
+	rcu_read_lock();
+
+	if (!first_ns)
+		first_ns = list_entry_rcu(head->next, typeof(*first_ns), ns_owner_node.ns_list_entry);
+
+	ns = first_ns;
+	list_for_each_entry_from_rcu(ns, head, ns_owner_node.ns_list_entry) {
+		struct ns_common *valid;
+
+		if (!nr_ns_ids)
+			break;
+
+		valid = legitimize_ns(kls, ns);
+		if (!valid)
+			continue;
+
+		rcu_read_unlock();
+
+		ns_put(prev);
+		prev = valid;
+
+		if (put_user(valid->ns_id, ns_ids + ret)) {
+			ns_put(prev);
+			return -EFAULT;
+		}
+
+		nr_ns_ids--;
+		ret++;
+
+		rcu_read_lock();
+	}
+
+	rcu_read_unlock();
+	ns_put(prev);
+	return ret;
+}
+
+/*
+ * Lookup a namespace with id >= ns_id in either the unified tree or a type-specific tree.
+ * Returns the namespace with the smallest id that is >= ns_id.
+ */
+static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
+{
+	struct ns_common *ret = NULL;
+	struct ns_tree_root *ns_tree = NULL;
+	struct rb_node *node;
+
+	if (ns_type) {
+		ns_tree = ns_tree_from_type(ns_type);
+		if (!ns_tree)
+			return NULL;
+	}
+
+	guard(ns_tree_locked_reader)();
+
+	if (ns_tree)
+		node = ns_tree->ns_rb.rb_node;
+	else
+		node = ns_unified_root.ns_rb.rb_node;
+
+	while (node) {
+		struct ns_common *ns;
+
+		if (ns_type)
+			ns = node_to_ns(node);
+		else
+			ns = node_to_ns_unified(node);
+
+		if (ns_id <= ns->ns_id) {
+			if (ns_type)
+				ret = node_to_ns(node);
+			else
+				ret = node_to_ns_unified(node);
+			if (ns_id == ns->ns_id)
+				break;
+			node = node->rb_left;
+		} else {
+			node = node->rb_right;
+		}
+	}
+
+	if (ret)
+		ret = ns_get_unless_inactive(ret);
+	return ret;
+}
+
+static inline struct ns_common *first_ns_common(const struct list_head *head,
+						struct ns_tree_root *ns_tree)
+{
+	if (ns_tree)
+		return list_entry_rcu(head->next, struct ns_common, ns_tree_node.ns_list_entry);
+	return list_entry_rcu(head->next, struct ns_common, ns_unified_node.ns_list_entry);
+}
+
+static inline struct ns_common *next_ns_common(struct ns_common *ns,
+					       struct ns_tree_root *ns_tree)
+{
+	if (ns_tree)
+		return list_entry_rcu(ns->ns_tree_node.ns_list_entry.next, struct ns_common, ns_tree_node.ns_list_entry);
+	return list_entry_rcu(ns->ns_unified_node.ns_list_entry.next, struct ns_common, ns_unified_node.ns_list_entry);
+}
+
+static inline bool ns_common_is_head(struct ns_common *ns,
+				     const struct list_head *head,
+				     struct ns_tree_root *ns_tree)
+{
+	if (ns_tree)
+		return &ns->ns_tree_node.ns_list_entry == head;
+	return &ns->ns_unified_node.ns_list_entry == head;
+}
+
+static ssize_t do_listns(struct klistns *kls)
+{
+	u64 __user *ns_ids = kls->uns_ids;
+	size_t nr_ns_ids = kls->nr_ns_ids;
+	struct ns_common *ns, *first_ns = NULL, *prev = NULL;
+	struct ns_tree_root *ns_tree = NULL;
+	const struct list_head *head;
+	u32 ns_type;
+	ssize_t ret;
+
+	if (hweight32(kls->ns_type) == 1)
+		ns_type = kls->ns_type;
+	else
+		ns_type = 0;
+
+	if (ns_type) {
+		ns_tree = ns_tree_from_type(ns_type);
+		if (!ns_tree)
+			return -EINVAL;
+	}
+
+	if (kls->last_ns_id) {
+		kls->first_ns = lookup_ns_id_at(kls->last_ns_id + 1, ns_type);
+		if (!kls->first_ns)
+			return -ENOENT;
+		first_ns = kls->first_ns;
+	}
+
+	ret = 0;
+	if (ns_tree)
+		head = &ns_tree->ns_list_head;
+	else
+		head = &ns_unified_root.ns_list_head;
+
+	rcu_read_lock();
+
+	if (!first_ns)
+		first_ns = first_ns_common(head, ns_tree);
+
+	for (ns = first_ns; !ns_common_is_head(ns, head, ns_tree) && nr_ns_ids;
+	     ns = next_ns_common(ns, ns_tree)) {
+		struct ns_common *valid;
+
+		valid = legitimize_ns(kls, ns);
+		if (!valid)
+			continue;
+
+		rcu_read_unlock();
+
+		ns_put(prev);
+		prev = valid;
+
+		if (put_user(valid->ns_id, ns_ids + ret)) {
+			ns_put(prev);
+			return -EFAULT;
+		}
+
+		nr_ns_ids--;
+		ret++;
+
+		rcu_read_lock();
+	}
+
+	rcu_read_unlock();
+	ns_put(prev);
+	return ret;
+}
+
+SYSCALL_DEFINE4(listns, const struct ns_id_req __user *, req,
+		u64 __user *, ns_ids, size_t, nr_ns_ids, unsigned int, flags)
+{
+	struct klistns klns __free(klistns_free) = {};
+	const size_t maxcount = 1000000;
+	struct ns_id_req kreq;
+	ssize_t ret;
+
+	if (flags)
+		return -EINVAL;
+
+	if (unlikely(nr_ns_ids > maxcount))
+		return -EOVERFLOW;
+
+	if (!access_ok(ns_ids, nr_ns_ids * sizeof(*ns_ids)))
+		return -EFAULT;
+
+	ret = copy_ns_id_req(req, &kreq);
+	if (ret)
+		return ret;
+
+	ret = prepare_klistns(&klns, &kreq, ns_ids, nr_ns_ids);
+	if (ret)
+		return ret;
+
+	if (kreq.user_ns_id)
+		return do_listns_userns(&klns);
+
+	return do_listns(&klns);
+}
diff --git a/kernel/pid.c b/kernel/pid.c
index 4fffec767a63..a31771bc89c1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -71,21 +71,16 @@ static int pid_max_max = PID_MAX_LIMIT;
  * the scheme scales to up to 4 million PIDs, runtime.
  */
 struct pid_namespace init_pid_ns = {
-	.ns.__ns_ref = REFCOUNT_INIT(2),
+	.ns = NS_COMMON_INIT(init_pid_ns),
 	.idr = IDR_INIT(init_pid_ns.idr),
 	.pid_allocated = PIDNS_ADDING,
 	.level = 0,
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
-	.ns.inum = ns_init_inum(&init_pid_ns),
-#ifdef CONFIG_PID_NS
-	.ns.ops = &pidns_operations,
-#endif
 	.pid_max = PID_MAX_DEFAULT,
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
 	.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
 #endif
-	.ns.ns_type = ns_common_type(&init_pid_ns),
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
@@ -117,9 +112,13 @@ static void delayed_put_pid(struct rcu_head *rhp)
 void free_pid(struct pid *pid)
 {
 	int i;
+	struct pid_namespace *active_ns;
 
 	lockdep_assert_not_held(&tasklist_lock);
 
+	active_ns = pid->numbers[pid->level].ns;
+	ns_ref_active_put(active_ns);
+
 	spin_lock(&pidmap_lock);
 	for (i = 0; i <= pid->level; i++) {
 		struct upid *upid = pid->numbers + i;
@@ -283,6 +282,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 	}
 	spin_unlock(&pidmap_lock);
 	idr_preload_end();
+	ns_ref_active_get(ns);
 
 	return pid;
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 650be58d8d18..e48f5de41361 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,7 +184,7 @@ struct pid_namespace *copy_pid_ns(u64 flags,
 
 void put_pid_ns(struct pid_namespace *ns)
 {
-	if (ns && ns != &init_pid_ns && ns_ref_put(ns))
+	if (ns && ns_ref_put(ns))
 		schedule_work(&ns->work);
 }
 EXPORT_SYMBOL_GPL(put_pid_ns);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 53166ef86ba4..26e45f86b955 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -821,8 +821,7 @@ int hibernate(void)
 		goto Restore;
 
 	ksys_sync_helper();
-	if (filesystem_freeze_enabled)
-		filesystems_freeze();
+	filesystems_freeze(filesystem_freeze_enabled);
 
 	error = freeze_processes();
 	if (error)
@@ -928,8 +927,7 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data)
 	if (error)
 		goto restore;
 
-	if (filesystem_freeze_enabled)
-		filesystems_freeze();
+	filesystems_freeze(filesystem_freeze_enabled);
 
 	error = freeze_processes();
 	if (error)
@@ -1079,8 +1077,7 @@ static int software_resume(void)
 	if (error)
 		goto Restore;
 
-	if (filesystem_freeze_enabled)
-		filesystems_freeze();
+	filesystems_freeze(filesystem_freeze_enabled);
 
 	pm_pr_dbg("Preparing processes for hibernation restore.\n");
 	error = freeze_processes();
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index b4ca17c2fecf..3d4ebedad69f 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -375,8 +375,7 @@ static int suspend_prepare(suspend_state_t state)
 	if (error)
 		goto Restore;
 
-	if (filesystem_freeze_enabled)
-		filesystems_freeze();
+	filesystems_freeze(filesystem_freeze_enabled);
 	trace_suspend_resume(TPS("freeze_processes"), 0, true);
 	error = suspend_freeze_processes();
 	trace_suspend_resume(TPS("freeze_processes"), 0, false);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 0beff7eeaaba..70ae21f7370d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -635,7 +635,7 @@ struct cmp_data {
 };
 
 /* Indicates the image size after compression */
-static atomic_t compressed_size = ATOMIC_INIT(0);
+static atomic64_t compressed_size = ATOMIC_INIT(0);
 
 /*
  * Compression function that runs in its own thread.
@@ -664,7 +664,7 @@ static int compress_threadfn(void *data)
 		d->ret = crypto_acomp_compress(d->cr);
 		d->cmp_len = d->cr->dlen;
 
-		atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len);
+		atomic64_add(d->cmp_len, &compressed_size);
 		atomic_set_release(&d->stop, 1);
 		wake_up(&d->done);
 	}
@@ -689,14 +689,14 @@ static int save_compressed_image(struct swap_map_handle *handle,
 	ktime_t start;
 	ktime_t stop;
 	size_t off;
-	unsigned thr, run_threads, nr_threads;
+	unsigned int thr, run_threads, nr_threads;
 	unsigned char *page = NULL;
 	struct cmp_data *data = NULL;
 	struct crc_data *crc = NULL;
 
 	hib_init_batch(&hb);
 
-	atomic_set(&compressed_size, 0);
+	atomic64_set(&compressed_size, 0);
 
 	/*
 	 * We'll limit the number of threads for compression to limit memory
@@ -877,11 +877,14 @@ out_finish:
 	stop = ktime_get();
 	if (!ret)
 		ret = err2;
-	if (!ret)
+	if (!ret) {
+		swsusp_show_speed(start, stop, nr_to_write, "Wrote");
+		pr_info("Image size after compression: %lld kbytes\n",
+			(atomic64_read(&compressed_size) / 1024));
 		pr_info("Image saving done\n");
-	swsusp_show_speed(start, stop, nr_to_write, "Wrote");
-	pr_info("Image size after compression: %d kbytes\n",
-		(atomic_read(&compressed_size) / 1024));
+	} else {
+		pr_err("Image saving failed: %d\n", ret);
+	}
 
 out_clean:
 	hib_finish_batch(&hb);
@@ -899,7 +902,8 @@ out_clean:
 		}
 		vfree(data);
 	}
-	if (page) free_page((unsigned long)page);
+	if (page)
+		free_page((unsigned long)page);
 
 	return ret;
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7097de2c8cda..4f97896887ec 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -313,10 +313,8 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct signal_struct *sig = tsk->signal;
-	u64 utime, stime;
 	struct task_struct *t;
-	unsigned int seq, nextseq;
-	unsigned long flags;
+	u64 utime, stime;
 
 	/*
 	 * Update current task runtime to account pending time since last
@@ -329,27 +327,19 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	if (same_thread_group(current, tsk))
 		(void) task_sched_runtime(current);
 
-	rcu_read_lock();
-	/* Attempt a lockless read on the first round. */
-	nextseq = 0;
-	do {
-		seq = nextseq;
-		flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
+	guard(rcu)();
+	scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) {
 		times->utime = sig->utime;
 		times->stime = sig->stime;
 		times->sum_exec_runtime = sig->sum_sched_runtime;
 
-		for_each_thread(tsk, t) {
+		__for_each_thread(sig, t) {
 			task_cputime(t, &utime, &stime);
 			times->utime += utime;
 			times->stime += stime;
 			times->sum_exec_runtime += read_sum_exec_runtime(t);
 		}
-		/* If lockless access failed, take the lock. */
-		nextseq = 1;
-	} while (need_seqretry(&sig->stats_lock, seq));
-	done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
-	rcu_read_unlock();
+	}
 }
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index ecb251e883ea..979484dab2d3 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -25,7 +25,7 @@ static struct scx_sched __rcu *scx_root;
  * guarantee system safety. Maintain a dedicated task list which contains every
  * task between its fork and eventual free.
  */
-static DEFINE_SPINLOCK(scx_tasks_lock);
+static DEFINE_RAW_SPINLOCK(scx_tasks_lock);
 static LIST_HEAD(scx_tasks);
 
 /* ops enable/disable */
@@ -476,7 +476,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter)
 	BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS &
 		     ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1));
 
-	spin_lock_irq(&scx_tasks_lock);
+	raw_spin_lock_irq(&scx_tasks_lock);
 
 	iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
 	list_add(&iter->cursor.tasks_node, &scx_tasks);
@@ -507,14 +507,14 @@ static void scx_task_iter_unlock(struct scx_task_iter *iter)
 	__scx_task_iter_rq_unlock(iter);
 	if (iter->list_locked) {
 		iter->list_locked = false;
-		spin_unlock_irq(&scx_tasks_lock);
+		raw_spin_unlock_irq(&scx_tasks_lock);
 	}
 }
 
 static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter)
 {
 	if (!iter->list_locked) {
-		spin_lock_irq(&scx_tasks_lock);
+		raw_spin_lock_irq(&scx_tasks_lock);
 		iter->list_locked = true;
 	}
 }
@@ -2940,9 +2940,9 @@ void scx_post_fork(struct task_struct *p)
 		}
 	}
 
-	spin_lock_irq(&scx_tasks_lock);
+	raw_spin_lock_irq(&scx_tasks_lock);
 	list_add_tail(&p->scx.tasks_node, &scx_tasks);
-	spin_unlock_irq(&scx_tasks_lock);
+	raw_spin_unlock_irq(&scx_tasks_lock);
 
 	percpu_up_read(&scx_fork_rwsem);
 }
@@ -2966,9 +2966,9 @@ void sched_ext_free(struct task_struct *p)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&scx_tasks_lock, flags);
+	raw_spin_lock_irqsave(&scx_tasks_lock, flags);
 	list_del_init(&p->scx.tasks_node);
-	spin_unlock_irqrestore(&scx_tasks_lock, flags);
+	raw_spin_unlock_irqrestore(&scx_tasks_lock, flags);
 
 	/*
 	 * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED
@@ -4276,7 +4276,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 		size_t avail, used;
 		bool idle;
 
-		rq_lock(rq, &rf);
+		rq_lock_irqsave(rq, &rf);
 
 		idle = list_empty(&rq->scx.runnable_list) &&
 			rq->curr->sched_class == &idle_sched_class;
@@ -4345,7 +4345,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
 		list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node)
 			scx_dump_task(&s, &dctx, p, ' ');
 	next:
-		rq_unlock(rq, &rf);
+		rq_unlock_irqrestore(rq, &rf);
 	}
 
 	dump_newline(&s);
@@ -4479,8 +4479,11 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
 		goto err_free_gdsqs;
 
 	sch->helper = kthread_run_worker(0, "sched_ext_helper");
-	if (!sch->helper)
+	if (IS_ERR(sch->helper)) {
+		ret = PTR_ERR(sch->helper);
 		goto err_free_pcpu;
+	}
+
 	sched_set_fifo(sch->helper->task);
 
 	atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
@@ -5321,8 +5324,8 @@ void __init init_sched_ext_class(void)
 		BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n));
 		BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n));
 		BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n));
-		init_irq_work(&rq->scx.deferred_irq_work, deferred_irq_workfn);
-		init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn);
+		rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn);
+		rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn);
 
 		if (cpu_online(cpu))
 			cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE;
@@ -6401,7 +6404,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf)
 
 	guard(rcu)();
 
-	sch = rcu_dereference(sch);
+	sch = rcu_dereference(scx_root);
 	if (unlikely(!sch))
 		return;
 
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 5b6997f4dc3d..e76be24b132c 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -478,11 +478,8 @@ const struct proc_ns_operations timens_for_children_operations = {
 };
 
 struct time_namespace init_time_ns = {
-	.ns.ns_type	= ns_common_type(&init_time_ns),
-	.ns.__ns_ref	= REFCOUNT_INIT(3),
+	.ns		= NS_COMMON_INIT(init_time_ns),
 	.user_ns	= &init_user_ns,
-	.ns.inum	= ns_init_inum(&init_time_ns),
-	.ns.ops		= &timens_operations,
 	.frozen_offsets	= true,
 };
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index aa3120104a51..56e17b625c72 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -475,12 +475,6 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
 	if (!kc->timer_create)
 		return -EOPNOTSUPP;
 
-	new_timer = alloc_posix_timer();
-	if (unlikely(!new_timer))
-		return -EAGAIN;
-
-	spin_lock_init(&new_timer->it_lock);
-
 	/* Special case for CRIU to restore timers with a given timer ID. */
 	if (unlikely(current->signal->timer_create_restore_ids)) {
 		if (copy_from_user(&req_id, created_timer_id, sizeof(req_id)))
@@ -490,6 +484,12 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
 			return -EINVAL;
 	}
 
+	new_timer = alloc_posix_timer();
+	if (unlikely(!new_timer))
+		return -EAGAIN;
+
+	spin_lock_init(&new_timer->it_lock);
+
 	/*
 	 * Add the timer to the hash table. The timer is not yet valid
 	 * after insertion, but has a unique ID allocated.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c527b421c865..466e083c8272 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1152,16 +1152,15 @@ static bool report_idle_softirq(void)
 			return false;
 	}
 
-	if (ratelimit >= 10)
-		return false;
-
 	/* On RT, softirq handling may be waiting on some lock */
 	if (local_bh_blocked())
 		return false;
 
-	pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
-		pending);
-	ratelimit++;
+	if (ratelimit < 10) {
+		pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
+			pending);
+		ratelimit++;
+	}
 
 	return true;
 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3a4d3b2e3f74..4790da895203 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -3060,29 +3060,34 @@ static const struct attribute_group aux_clock_enable_attr_group = {
 static int __init tk_aux_sysfs_init(void)
 {
 	struct kobject *auxo, *tko = kobject_create_and_add("time", kernel_kobj);
+	int ret = -ENOMEM;
 
 	if (!tko)
-		return -ENOMEM;
+		return ret;
 
 	auxo = kobject_create_and_add("aux_clocks", tko);
-	if (!auxo) {
-		kobject_put(tko);
-		return -ENOMEM;
-	}
+	if (!auxo)
+		goto err_clean;
 
 	for (int i = 0; i < MAX_AUX_CLOCKS; i++) {
 		char id[2] = { [0] = '0' + i, };
 		struct kobject *clk = kobject_create_and_add(id, auxo);
 
-		if (!clk)
-			return -ENOMEM;
-
-		int ret = sysfs_create_group(clk, &aux_clock_enable_attr_group);
+		if (!clk) {
+			ret = -ENOMEM;
+			goto err_clean;
+		}
 
+		ret = sysfs_create_group(clk, &aux_clock_enable_attr_group);
 		if (ret)
-			return ret;
+			goto err_clean;
 	}
 	return 0;
+
+err_clean:
+	kobject_put(auxo);
+	kobject_put(tko);
+	return ret;
 }
 late_initcall(tk_aux_sysfs_init);
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 553fa469d7cc..d5ebb1d927ea 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1458,10 +1458,11 @@ static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown)
 
 	base = lock_timer_base(timer, &flags);
 
-	if (base->running_timer != timer)
+	if (base->running_timer != timer) {
 		ret = detach_if_pending(timer, base, true);
-	if (shutdown)
-		timer->function = NULL;
+		if (shutdown)
+			timer->function = NULL;
+	}
 
 	raw_spin_unlock_irqrestore(&base->lock, flags);
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 42bd2ba68a82..59cfacb8a5bb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1971,7 +1971,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops)
  */
 static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 					 struct ftrace_hash *old_hash,
-					 struct ftrace_hash *new_hash)
+					 struct ftrace_hash *new_hash,
+					 bool update_target)
 {
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec, *end = NULL;
@@ -2006,10 +2007,13 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 		if (rec->flags & FTRACE_FL_DISABLED)
 			continue;
 
-		/* We need to update only differences of filter_hash */
+		/*
+		 * Unless we are updating the target of a direct function,
+		 * we only need to update differences of filter_hash
+		 */
 		in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
 		in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
-		if (in_old == in_new)
+		if (!update_target && (in_old == in_new))
 			continue;
 
 		if (in_new) {
@@ -2020,7 +2024,16 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 				if (is_ipmodify)
 					goto rollback;
 
-				FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);
+				/*
+				 * If this is called by __modify_ftrace_direct()
+				 * then it is only changing where the direct
+				 * pointer is jumping to, and the record already
+				 * points to a direct trampoline. If it isn't,
+				 * then it is a bug to update ipmodify on a direct
+				 * caller.
+				 */
+				FTRACE_WARN_ON(!update_target &&
+					       (rec->flags & FTRACE_FL_DIRECT));
 
 				/*
 				 * Another ops with IPMODIFY is already
@@ -2076,7 +2089,7 @@ static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
 	if (ftrace_hash_empty(hash))
 		hash = NULL;
 
-	return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
+	return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash, false);
 }
 
 /* Disabling always succeeds */
@@ -2087,7 +2100,7 @@ static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
 	if (ftrace_hash_empty(hash))
 		hash = NULL;
 
-	__ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
+	__ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH, false);
 }
 
 static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
@@ -2101,7 +2114,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
 	if (ftrace_hash_empty(new_hash))
 		new_hash = NULL;
 
-	return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
+	return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash, false);
 }
 
 static void print_ip_ins(const char *fmt, const unsigned char *p)
@@ -5953,6 +5966,17 @@ static void register_ftrace_direct_cb(struct rcu_head *rhp)
 	free_ftrace_hash(fhp);
 }
 
+static void reset_direct(struct ftrace_ops *ops, unsigned long addr)
+{
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
+
+	remove_direct_functions_hash(hash, addr);
+
+	/* cleanup for possible another register call */
+	ops->func = NULL;
+	ops->trampoline = 0;
+}
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * for multiple functions registered in @ops
@@ -6048,6 +6072,8 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 	ops->direct_call = addr;
 
 	err = register_ftrace_function_nolock(ops);
+	if (err)
+		reset_direct(ops, addr);
 
  out_unlock:
 	mutex_unlock(&direct_mutex);
@@ -6080,7 +6106,6 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct);
 int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
 			     bool free_filters)
 {
-	struct ftrace_hash *hash = ops->func_hash->filter_hash;
 	int err;
 
 	if (check_direct_multi(ops))
@@ -6090,13 +6115,9 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr,
 
 	mutex_lock(&direct_mutex);
 	err = unregister_ftrace_function(ops);
-	remove_direct_functions_hash(hash, addr);
+	reset_direct(ops, addr);
 	mutex_unlock(&direct_mutex);
 
-	/* cleanup for possible another register call */
-	ops->func = NULL;
-	ops->trampoline = 0;
-
 	if (free_filters)
 		ftrace_free_filter(ops);
 	return err;
@@ -6106,7 +6127,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_direct);
 static int
 __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 {
-	struct ftrace_hash *hash;
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
 	struct ftrace_func_entry *entry, *iter;
 	static struct ftrace_ops tmp_ops = {
 		.func		= ftrace_stub,
@@ -6127,12 +6148,20 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 		return err;
 
 	/*
+	 * Call __ftrace_hash_update_ipmodify() here, so that we can call
+	 * ops->ops_func for the ops. This is needed because the above
+	 * register_ftrace_function_nolock() worked on tmp_ops.
+	 */
+	err = __ftrace_hash_update_ipmodify(ops, hash, hash, true);
+	if (err)
+		goto out;
+
+	/*
 	 * Now the ftrace_ops_list_func() is called to do the direct callers.
 	 * We can safely change the direct functions attached to each entry.
 	 */
 	mutex_lock(&ftrace_lock);
 
-	hash = ops->func_hash->filter_hash;
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
 		hlist_for_each_entry(iter, &hash->buckets[i], hlist) {
@@ -6147,6 +6176,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 
 	mutex_unlock(&ftrace_lock);
 
+out:
 	/* Removing the tmp_ops will add the updated direct callers to the functions */
 	unregister_ftrace_function(&tmp_ops);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d1e527cf2aae..304e93597126 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8781,8 +8781,18 @@ static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
 	put_snapshot_map(iter->tr);
 }
 
+static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
+{
+	/*
+	 * Trace buffer mappings require the complete buffer including
+	 * the meta page. Partial mappings are not supported.
+	 */
+	return -EINVAL;
+}
+
 static const struct vm_operations_struct tracing_buffers_vmops = {
 	.close		= tracing_buffers_mmap_close,
+	.may_split      = tracing_buffers_may_split,
 };
 
 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index c428dafe7496..b15854c75d4f 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1449,12 +1449,7 @@ static struct trace_event_functions user_event_funcs = {
 
 static int user_event_set_call_visible(struct user_event *user, bool visible)
 {
-	int ret;
-	const struct cred *old_cred;
-	struct cred *cred;
-
-	cred = prepare_creds();
-
+	CLASS(prepare_creds, cred)();
 	if (!cred)
 		return -ENOMEM;
 
@@ -1469,17 +1464,12 @@ static int user_event_set_call_visible(struct user_event *user, bool visible)
 	 */
 	cred->fsuid = GLOBAL_ROOT_UID;
 
-	old_cred = override_creds(cred);
-
-	if (visible)
-		ret = trace_add_event_call(&user->call);
-	else
-		ret = trace_remove_event_call(&user->call);
-
-	revert_creds(old_cred);
-	put_cred(cred);
+	scoped_with_creds(cred) {
+		if (visible)
+			return trace_add_event_call(&user->call);
 
-	return ret;
+		return trace_remove_event_call(&user->call);
+	}
 }
 
 static int destroy_user_event(struct user_event *user)
diff --git a/kernel/user.c b/kernel/user.c
index 0163665914c9..7aef4e679a6a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -35,6 +35,7 @@ EXPORT_SYMBOL_GPL(init_binfmt_misc);
  * and 1 for... ?
  */
 struct user_namespace init_user_ns = {
+	.ns = NS_COMMON_INIT(init_user_ns),
 	.uid_map = {
 		{
 			.extent[0] = {
@@ -65,14 +66,8 @@ struct user_namespace init_user_ns = {
 			.nr_extents = 1,
 		},
 	},
-	.ns.ns_type = ns_common_type(&init_user_ns),
-	.ns.__ns_ref = REFCOUNT_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
-	.ns.inum = ns_init_inum(&init_user_ns),
-#ifdef CONFIG_USER_NS
-	.ns.ops = &userns_operations,
-#endif
 	.flags = USERNS_INIT_FLAGS,
 #ifdef CONFIG_KEYS
 	.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index 7e45559521af..52f89f1137da 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -119,9 +119,9 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	offset = note % WATCH_QUEUE_NOTES_PER_PAGE * WATCH_QUEUE_NOTE_SIZE;
 	get_page(page);
 	len = n->info & WATCH_INFO_LENGTH;
-	p = kmap_atomic(page);
+	p = kmap_local_page(page);
 	memcpy(p + offset, n, len);
-	kunmap_atomic(p);
+	kunmap_local(p);
 
 	buf = pipe_buf(pipe, head);
 	buf->page = page;