summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 17:31:22 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 17:31:22 -0800
commitd348c22394ad3c8eaf7bc693cb0ca0edc2ec5246 (patch)
tree57cbdede16efa659dc64f5ab78b82a8e78596272 /kernel
parent959bfe496bbaf3daa5dca32d397e29ea12471779 (diff)
parent7cede21e9f04f16a456d3c3c8a9a8899c8d84757 (diff)
Merge tag 'pm-6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "There are quite a few interesting things here, including new hardware support, new features, some bug fixes and documentation updates. In addition, there are a usual bunch of minor fixes and cleanups all over. In the new hardware support category, there are intel_pstate and intel_rapl driver updates to support new processors, Panther Lake, Wildcat Lake, Noval Lake, and Diamond Rapids in the OOB mode, OPP and bandwidth allocation support in the tegra186 cpufreq driver, and JH7110S SOC support in dt-platdev cpufreq. The new features are the PM QoS CPU latency limit for suspend-to-idle, the netlink support for the energy model management, support for terminating system suspend via a wakeup event during the sync of file systems, configurable number of hibernation compression threads, the runtime PM auto-cleanup macros, and the "poweroff" PM event that is expected to be used during system shutdown. Bugs are mostly fixed in cpuidle governors, but there are also fixes elsewhere, like in the amd-pstate cpufreq driver. Documentation updates include, but are not limited to, a new doc on debugging shutdown hangs, cross-referencing fixes and cleanups in the intel_pstate documentation, and updates of comments in the core hibernation code. Specifics: - Introduce and document a QoS limit on CPU exit latency during wakeup from suspend-to-idle (Ulf Hansson) - Add support for building libcpupower statically (Zuo An) - Add support for sending netlink notifications to user space on energy model updates (Changwoo Mini, Peng Fan) - Minor improvements to the Rust OPP interface (Tamir Duberstein) - Fixes to scope-based pointers in the OPP library (Viresh Kumar) - Use residency threshold in polling state override decisions in the menu cpuidle governor (Aboorva Devarajan) - Add sanity check for exit latency and target residency in the cpufreq core (Rafael Wysocki) - Use this_cpu_ptr() where possible in the teo governor (Christian Loehle) - Rework the handling of tick wakeups in the teo cpuidle governor to increase the likelihood of stopping the scheduler tick in the cases when tick wakeups can be counted as non-timer ones (Rafael Wysocki) - Fix a reverse condition in the teo cpuidle governor and drop a misguided target residency check from it (Rafael Wysocki) - Clean up multiple minor defects in the teo cpuidle governor (Rafael Wysocki) - Update header inclusion to make it follow the Include What You Use principle (Andy Shevchenko) - Enable MSR-based RAPL PMU support in the intel_rapl power capping driver and arrange for using it on the Panther Lake and Wildcat Lake processors (Kuppuswamy Sathyanarayanan) - Add support for Nova Lake and Wildcat Lake processors to the intel_rapl power capping driver (Kaushlendra Kumar, Srinivas Pandruvada) - Add OPP and bandwidth support for Tegra186 (Aaron Kling) - Optimizations for parameter array handling in the amd-pstate cpufreq driver (Mario Limonciello) - Fix for mode changes with offline CPUs in the amd-pstate cpufreq driver (Gautham Shenoy) - Preserve freq_table_sorted across suspend/hibernate in the cpufreq core (Zihuan Zhang) - Adjust energy model rules for Intel hybrid platforms in the intel_pstate cpufreq driver and improve printing of debug messages in it (Rafael Wysocki) - Replace deprecated strcpy() in cpufreq_unregister_governor() (Thorsten Blum) - Fix duplicate hyperlink target errors in the intel_pstate cpufreq driver documentation and use :ref: directive for internal linking in it (Swaraj Gaikwad, Bagas Sanjaya) - Add Diamond Rapids OOB mode support to the intel_pstate cpufreq driver (Kuppuswamy Sathyanarayanan) - Use mutex guard for driver locking in the intel_pstate driver and eliminate some code duplication from it (Rafael Wysocki) - Replace udelay() with usleep_range() in ACPI cpufreq (Kaushlendra Kumar) - Minor improvements to various cpufreq drivers (Christian Marangi, Hal Feng, Jie Zhan, Marco Crivellari, Miaoqian Lin, and Shuhao Fu) - Replace snprintf() with scnprintf() in show_trace_dev_match() (Kaushlendra Kumar) - Fix memory allocation error handling in pm_vt_switch_required() (Malaya Kumar Rout) - Introduce CALL_PM_OP() macro and use it to simplify code in generic PM operations (Kaushlendra Kumar) - Add module param to backtrace all CPUs in the device power management watchdog (Sergey Senozhatsky) - Rework message printing in swsusp_save() (Rafael Wysocki) - Make it possible to change the number of hibernation compression threads (Xueqin Luo) - Clarify that only cgroup1 freezer uses PM freezer (Tejun Heo) - Add document on debugging shutdown hangs to PM documentation and correct a mistaken configuration option in it (Mario Limonciello) - Shut down wakeup source timer before removing the wakeup source from the list (Kaushlendra Kumar, Rafael Wysocki) - Introduce new PMSG_POWEROFF event for system shutdown handling with the help of PM device callbacks (Mario Limonciello) - Make pm_test delay interruptible by wakeup events (Riwen Lu) - Clean up kernel-doc comment style usage in the core hibernation code and remove unuseful comments from it (Sunday Adelodun, Rafael Wysocki) - Add support for handling wakeup events and aborting the suspend process while it is syncing file systems (Samuel Wu, Rafael Wysocki) - Add WQ_UNBOUND to pm_wq workqueue (Marco Crivellari) - Add runtime PM wrapper macros for ACQUIRE()/ACQUIRE_ERR() and use them in the PCI core and the ACPI TAD driver (Rafael Wysocki) - Improve runtime PM in the ACPI TAD driver (Rafael Wysocki) - Update pm_runtime_allow/forbid() documentation (Rafael Wysocki) - Fix typos in runtime.c comments (Malaya Kumar Rout) - Move governor.h from devfreq under include/linux/ and rename to devfreq-governor.h to allow devfreq governor definitions in out of drivers/devfreq/ (Dmitry Baryshkov) - Use min() to improve readability in tegra30-devfreq.c (Thorsten Blum) - Fix potential use-after-free issue of OPP handling in hisi_uncore_freq.c (Pengjie Zhang) - Fix typo in DFSO_DOWNDIFFERENTIAL macro name in governor_simpleondemand.c in devfreq (Riwen Lu)" * tag 'pm-6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (96 commits) PM / devfreq: Fix typo in DFSO_DOWNDIFFERENTIAL macro name cpuidle: Warn instead of bailing out if target residency check fails cpuidle: Update header inclusion Documentation: power/cpuidle: Document the CPU system wakeup latency QoS cpuidle: Respect the CPU system wakeup QoS limit for cpuidle sched: idle: Respect the CPU system wakeup QoS limit for s2idle pmdomain: Respect the CPU system wakeup QoS limit for cpuidle pmdomain: Respect the CPU system wakeup QoS limit for s2idle PM: QoS: Introduce a CPU system wakeup QoS limit cpuidle: governors: teo: Add missing space to the description PM: hibernate: Extra cleanup of comments in swap handling code PM / devfreq: tegra30: use min to simplify actmon_cpu_to_emc_rate PM / devfreq: hisi: Fix potential UAF in OPP handling PM / devfreq: Move governor.h to a public header location powercap: intel_rapl: Enable MSR-based RAPL PMU support powercap: intel_rapl: Prepare read_raw() interface for atomic-context callers cpufreq: qcom-nvmem: fix compilation warning for qcom_cpufreq_ipq806x_match_list PM: sleep: Call pm_sleep_fs_sync() instead of ksys_sync_helper() PM: sleep: Add support for wakeup during filesystem sync cpufreq: ACPI: Replace udelay() with usleep_range() ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/legacy_freezer.c2
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/Makefile4
-rw-r--r--kernel/power/console.c8
-rw-r--r--kernel/power/em_netlink.c308
-rw-r--r--kernel/power/em_netlink.h39
-rw-r--r--kernel/power/em_netlink_autogen.c48
-rw-r--r--kernel/power/em_netlink_autogen.h23
-rw-r--r--kernel/power/energy_model.c90
-rw-r--r--kernel/power/hibernate.c6
-rw-r--r--kernel/power/main.c81
-rw-r--r--kernel/power/power.h1
-rw-r--r--kernel/power/qos.c106
-rw-r--r--kernel/power/snapshot.c13
-rw-r--r--kernel/power/suspend.c12
-rw-r--r--kernel/power/swap.c256
-rw-r--r--kernel/power/user.c4
-rw-r--r--kernel/sched/idle.c12
19 files changed, 886 insertions, 140 deletions
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index dd9417425d92..915b02f65980 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -63,7 +63,7 @@ static struct freezer *parent_freezer(struct freezer *freezer)
return css_freezer(freezer->css.parent);
}
-bool cgroup_freezing(struct task_struct *task)
+bool cgroup1_freezing(struct task_struct *task)
{
bool ret;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ddc11a8bd2ea..a76bf957fb32 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,7 +44,7 @@ bool freezing_slow_path(struct task_struct *p)
if (tsk_is_oom_victim(p))
return false;
- if (pm_nosig_freezing || cgroup_freezing(p))
+ if (pm_nosig_freezing || cgroup1_freezing(p))
return true;
if (pm_freezing && !(p->flags & PF_KTHREAD))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 54a623680019..05337f437cca 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -202,6 +202,17 @@ config PM_WAKELOCKS_GC
depends on PM_WAKELOCKS
default y
+config PM_QOS_CPU_SYSTEM_WAKEUP
+ bool "User space interface for CPU system wakeup QoS"
+ depends on CPU_IDLE
+ help
+ Enable this to allow user space via the cpu_wakeup_latency file to
+ specify a CPU system wakeup latency limit.
+
+ This may be particularly useful for platforms supporting multiple low
+ power states for CPUs during system-wide suspend and s2idle in
+ particular.
+
config PM
bool "Device power management core functionality"
help
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 874ad834dc8d..773e2789412b 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -21,4 +21,6 @@ obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
-obj-$(CONFIG_ENERGY_MODEL) += energy_model.o
+obj-$(CONFIG_ENERGY_MODEL) += em.o
+em-y := energy_model.o
+em-$(CONFIG_NET) += em_netlink_autogen.o em_netlink.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 19c48aa5355d..a906a0ac0f9b 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -44,9 +44,10 @@ static LIST_HEAD(pm_vt_switch_list);
* no_console_suspend argument has been passed on the command line, VT
* switches will occur.
*/
-void pm_vt_switch_required(struct device *dev, bool required)
+int pm_vt_switch_required(struct device *dev, bool required)
{
struct pm_vt_switch *entry, *tmp;
+ int ret = 0;
mutex_lock(&vt_switch_mutex);
list_for_each_entry(tmp, &pm_vt_switch_list, head) {
@@ -58,8 +59,10 @@ void pm_vt_switch_required(struct device *dev, bool required)
}
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
+ if (!entry) {
+ ret = -ENOMEM;
goto out;
+ }
entry->required = required;
entry->dev = dev;
@@ -67,6 +70,7 @@ void pm_vt_switch_required(struct device *dev, bool required)
list_add(&entry->head, &pm_vt_switch_list);
out:
mutex_unlock(&vt_switch_mutex);
+ return ret;
}
EXPORT_SYMBOL(pm_vt_switch_required);
diff --git a/kernel/power/em_netlink.c b/kernel/power/em_netlink.c
new file mode 100644
index 000000000000..4b85da138a06
--- /dev/null
+++ b/kernel/power/em_netlink.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * Generic netlink for energy model.
+ *
+ * Copyright (c) 2025 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+
+#define pr_fmt(fmt) "energy_model: " fmt
+
+#include <linux/energy_model.h>
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <uapi/linux/energy_model.h>
+
+#include "em_netlink.h"
+#include "em_netlink_autogen.h"
+
+#define EM_A_PD_CPUS_LEN 256
+
+/*************************** Command encoding ********************************/
+static int __em_nl_get_pd_size(struct em_perf_domain *pd, void *data)
+{
+ char cpus_buf[EM_A_PD_CPUS_LEN];
+ int *tot_msg_sz = data;
+ int msg_sz, cpus_sz;
+
+ cpus_sz = snprintf(cpus_buf, sizeof(cpus_buf), "%*pb",
+ cpumask_pr_args(to_cpumask(pd->cpus)));
+
+ msg_sz = nla_total_size(0) + /* EM_A_PDS_PD */
+ nla_total_size(sizeof(u32)) + /* EM_A_PD_PD_ID */
+ nla_total_size_64bit(sizeof(u64)) + /* EM_A_PD_FLAGS */
+ nla_total_size(cpus_sz); /* EM_A_PD_CPUS */
+
+ *tot_msg_sz += nlmsg_total_size(genlmsg_msg_size(msg_sz));
+ return 0;
+}
+
+static int __em_nl_get_pd(struct em_perf_domain *pd, void *data)
+{
+ char cpus_buf[EM_A_PD_CPUS_LEN];
+ struct sk_buff *msg = data;
+ struct nlattr *entry;
+
+ entry = nla_nest_start(msg, EM_A_PDS_PD);
+ if (!entry)
+ goto out_cancel_nest;
+
+ if (nla_put_u32(msg, EM_A_PD_PD_ID, pd->id))
+ goto out_cancel_nest;
+
+ if (nla_put_u64_64bit(msg, EM_A_PD_FLAGS, pd->flags, EM_A_PD_PAD))
+ goto out_cancel_nest;
+
+ snprintf(cpus_buf, sizeof(cpus_buf), "%*pb",
+ cpumask_pr_args(to_cpumask(pd->cpus)));
+ if (nla_put_string(msg, EM_A_PD_CPUS, cpus_buf))
+ goto out_cancel_nest;
+
+ nla_nest_end(msg, entry);
+
+ return 0;
+
+out_cancel_nest:
+ nla_nest_cancel(msg, entry);
+
+ return -EMSGSIZE;
+}
+
+int em_nl_get_pds_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+ int cmd = info->genlhdr->cmd;
+ int ret = -EMSGSIZE, msg_sz = 0;
+
+ for_each_em_perf_domain(__em_nl_get_pd_size, &msg_sz);
+
+ msg = genlmsg_new(msg_sz, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put_reply(msg, info, &em_nl_family, 0, cmd);
+ if (!hdr)
+ goto out_free_msg;
+
+ ret = for_each_em_perf_domain(__em_nl_get_pd, msg);
+ if (ret)
+ goto out_cancel_msg;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+out_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+out_free_msg:
+ nlmsg_free(msg);
+
+ return ret;
+}
+
+static struct em_perf_domain *__em_nl_get_pd_table_id(struct nlattr **attrs)
+{
+ struct em_perf_domain *pd;
+ int id;
+
+ if (!attrs[EM_A_PD_TABLE_PD_ID])
+ return NULL;
+
+ id = nla_get_u32(attrs[EM_A_PD_TABLE_PD_ID]);
+ pd = em_perf_domain_get_by_id(id);
+ return pd;
+}
+
+static int __em_nl_get_pd_table_size(const struct em_perf_domain *pd)
+{
+ int id_sz, ps_sz;
+
+ id_sz = nla_total_size(sizeof(u32)); /* EM_A_PD_TABLE_PD_ID */
+ ps_sz = nla_total_size(0) + /* EM_A_PD_TABLE_PS */
+ nla_total_size_64bit(sizeof(u64)) + /* EM_A_PS_PERFORMANCE */
+ nla_total_size_64bit(sizeof(u64)) + /* EM_A_PS_FREQUENCY */
+ nla_total_size_64bit(sizeof(u64)) + /* EM_A_PS_POWER */
+ nla_total_size_64bit(sizeof(u64)) + /* EM_A_PS_COST */
+ nla_total_size_64bit(sizeof(u64)); /* EM_A_PS_FLAGS */
+ ps_sz *= pd->nr_perf_states;
+
+ return nlmsg_total_size(genlmsg_msg_size(id_sz + ps_sz));
+}
+
+static int __em_nl_get_pd_table(struct sk_buff *msg, const struct em_perf_domain *pd)
+{
+ struct em_perf_state *table, *ps;
+ struct nlattr *entry;
+ int i;
+
+ if (nla_put_u32(msg, EM_A_PD_TABLE_PD_ID, pd->id))
+ goto out_err;
+
+ rcu_read_lock();
+ table = em_perf_state_from_pd((struct em_perf_domain *)pd);
+
+ for (i = 0; i < pd->nr_perf_states; i++) {
+ ps = &table[i];
+
+ entry = nla_nest_start(msg, EM_A_PD_TABLE_PS);
+ if (!entry)
+ goto out_unlock_ps;
+
+ if (nla_put_u64_64bit(msg, EM_A_PS_PERFORMANCE,
+ ps->performance, EM_A_PS_PAD))
+ goto out_cancel_ps_nest;
+ if (nla_put_u64_64bit(msg, EM_A_PS_FREQUENCY,
+ ps->frequency, EM_A_PS_PAD))
+ goto out_cancel_ps_nest;
+ if (nla_put_u64_64bit(msg, EM_A_PS_POWER,
+ ps->power, EM_A_PS_PAD))
+ goto out_cancel_ps_nest;
+ if (nla_put_u64_64bit(msg, EM_A_PS_COST,
+ ps->cost, EM_A_PS_PAD))
+ goto out_cancel_ps_nest;
+ if (nla_put_u64_64bit(msg, EM_A_PS_FLAGS,
+ ps->flags, EM_A_PS_PAD))
+ goto out_cancel_ps_nest;
+
+ nla_nest_end(msg, entry);
+ }
+ rcu_read_unlock();
+ return 0;
+
+out_cancel_ps_nest:
+ nla_nest_cancel(msg, entry);
+out_unlock_ps:
+ rcu_read_unlock();
+out_err:
+ return -EMSGSIZE;
+}
+
+int em_nl_get_pd_table_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ int cmd = info->genlhdr->cmd;
+ int msg_sz, ret = -EMSGSIZE;
+ struct em_perf_domain *pd;
+ struct sk_buff *msg;
+ void *hdr;
+
+ pd = __em_nl_get_pd_table_id(info->attrs);
+ if (!pd)
+ return -EINVAL;
+
+ msg_sz = __em_nl_get_pd_table_size(pd);
+
+ msg = genlmsg_new(msg_sz, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put_reply(msg, info, &em_nl_family, 0, cmd);
+ if (!hdr)
+ goto out_free_msg;
+
+ ret = __em_nl_get_pd_table(msg, pd);
+ if (ret)
+ goto out_free_msg;
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+out_free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+
+/**************************** Event encoding *********************************/
+static void __em_notify_pd_table(const struct em_perf_domain *pd, int ntf_type)
+{
+ struct sk_buff *msg;
+ int msg_sz, ret = -EMSGSIZE;
+ void *hdr;
+
+ if (!genl_has_listeners(&em_nl_family, &init_net, EM_NLGRP_EVENT))
+ return;
+
+ msg_sz = __em_nl_get_pd_table_size(pd);
+
+ msg = genlmsg_new(msg_sz, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = genlmsg_put(msg, 0, 0, &em_nl_family, 0, ntf_type);
+ if (!hdr)
+ goto out_free_msg;
+
+ ret = __em_nl_get_pd_table(msg, pd);
+ if (ret)
+ goto out_free_msg;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast(&em_nl_family, msg, 0, EM_NLGRP_EVENT, GFP_KERNEL);
+
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+ return;
+}
+
+void em_notify_pd_created(const struct em_perf_domain *pd)
+{
+ __em_notify_pd_table(pd, EM_CMD_PD_CREATED);
+}
+
+void em_notify_pd_updated(const struct em_perf_domain *pd)
+{
+ __em_notify_pd_table(pd, EM_CMD_PD_UPDATED);
+}
+
+static int __em_notify_pd_deleted_size(const struct em_perf_domain *pd)
+{
+ int id_sz = nla_total_size(sizeof(u32)); /* EM_A_PD_TABLE_PD_ID */
+
+ return nlmsg_total_size(genlmsg_msg_size(id_sz));
+}
+
+void em_notify_pd_deleted(const struct em_perf_domain *pd)
+{
+ struct sk_buff *msg;
+ void *hdr;
+ int msg_sz;
+
+ if (!genl_has_listeners(&em_nl_family, &init_net, EM_NLGRP_EVENT))
+ return;
+
+ msg_sz = __em_notify_pd_deleted_size(pd);
+
+ msg = genlmsg_new(msg_sz, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = genlmsg_put(msg, 0, 0, &em_nl_family, 0, EM_CMD_PD_DELETED);
+ if (!hdr)
+ goto out_free_msg;
+
+ if (nla_put_u32(msg, EM_A_PD_TABLE_PD_ID, pd->id)) {
+ goto out_free_msg;
+ }
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast(&em_nl_family, msg, 0, EM_NLGRP_EVENT, GFP_KERNEL);
+
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+ return;
+}
+
+/**************************** Initialization *********************************/
+static int __init em_netlink_init(void)
+{
+ return genl_register_family(&em_nl_family);
+}
+postcore_initcall(em_netlink_init);
diff --git a/kernel/power/em_netlink.h b/kernel/power/em_netlink.h
new file mode 100644
index 000000000000..583d7f1c3939
--- /dev/null
+++ b/kernel/power/em_netlink.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * Generic netlink for energy model.
+ *
+ * Copyright (c) 2025 Valve Corporation.
+ * Author: Changwoo Min <changwoo@igalia.com>
+ */
+#ifndef _EM_NETLINK_H
+#define _EM_NETLINK_H
+
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_NET)
+int for_each_em_perf_domain(int (*cb)(struct em_perf_domain*, void *),
+ void *data);
+struct em_perf_domain *em_perf_domain_get_by_id(int id);
+void em_notify_pd_created(const struct em_perf_domain *pd);
+void em_notify_pd_deleted(const struct em_perf_domain *pd);
+void em_notify_pd_updated(const struct em_perf_domain *pd);
+#else
+static inline
+int for_each_em_perf_domain(int (*cb)(struct em_perf_domain*, void *),
+ void *data)
+{
+ return -EINVAL;
+}
+static inline
+struct em_perf_domain *em_perf_domain_get_by_id(int id)
+{
+ return NULL;
+}
+
+static inline void em_notify_pd_created(const struct em_perf_domain *pd) {}
+
+static inline void em_notify_pd_deleted(const struct em_perf_domain *pd) {}
+
+static inline void em_notify_pd_updated(const struct em_perf_domain *pd) {}
+#endif
+
+#endif /* _EM_NETLINK_H */
diff --git a/kernel/power/em_netlink_autogen.c b/kernel/power/em_netlink_autogen.c
new file mode 100644
index 000000000000..a7a09ab1d1c2
--- /dev/null
+++ b/kernel/power/em_netlink_autogen.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/em.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "em_netlink_autogen.h"
+
+#include <uapi/linux/energy_model.h>
+
+/* EM_CMD_GET_PD_TABLE - do */
+static const struct nla_policy em_get_pd_table_nl_policy[EM_A_PD_TABLE_PD_ID + 1] = {
+ [EM_A_PD_TABLE_PD_ID] = { .type = NLA_U32, },
+};
+
+/* Ops table for em */
+static const struct genl_split_ops em_nl_ops[] = {
+ {
+ .cmd = EM_CMD_GET_PDS,
+ .doit = em_nl_get_pds_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = EM_CMD_GET_PD_TABLE,
+ .doit = em_nl_get_pd_table_doit,
+ .policy = em_get_pd_table_nl_policy,
+ .maxattr = EM_A_PD_TABLE_PD_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+};
+
+static const struct genl_multicast_group em_nl_mcgrps[] = {
+ [EM_NLGRP_EVENT] = { "event", },
+};
+
+struct genl_family em_nl_family __ro_after_init = {
+ .name = EM_FAMILY_NAME,
+ .version = EM_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = em_nl_ops,
+ .n_split_ops = ARRAY_SIZE(em_nl_ops),
+ .mcgrps = em_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(em_nl_mcgrps),
+};
diff --git a/kernel/power/em_netlink_autogen.h b/kernel/power/em_netlink_autogen.h
new file mode 100644
index 000000000000..78ce609641f1
--- /dev/null
+++ b/kernel/power/em_netlink_autogen.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/em.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_EM_GEN_H
+#define _LINUX_EM_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/energy_model.h>
+
+int em_nl_get_pds_doit(struct sk_buff *skb, struct genl_info *info);
+int em_nl_get_pd_table_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+ EM_NLGRP_EVENT,
+};
+
+extern struct genl_family em_nl_family;
+
+#endif /* _LINUX_EM_GEN_H */
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 5f17d2e8e954..11af9f64aa82 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -17,12 +17,24 @@
#include <linux/sched/topology.h>
#include <linux/slab.h>
+#include "em_netlink.h"
+
/*
* Mutex serializing the registrations of performance domains and letting
* callbacks defined by drivers sleep.
*/
static DEFINE_MUTEX(em_pd_mutex);
+/*
+ * Manage performance domains with IDs. One can iterate the performance domains
+ * through the list and pick one with their associated ID. The mutex serializes
+ * the list access. When holding em_pd_list_mutex, em_pd_mutex should not be
+ * taken to avoid potential deadlock.
+ */
+static DEFINE_IDA(em_pd_ida);
+static LIST_HEAD(em_pd_list);
+static DEFINE_MUTEX(em_pd_list_mutex);
+
static void em_cpufreq_update_efficiencies(struct device *dev,
struct em_perf_state *table);
static void em_check_capacity_update(void);
@@ -116,6 +128,16 @@ static int em_debug_flags_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
+static int em_debug_id_show(struct seq_file *s, void *unused)
+{
+ struct em_perf_domain *pd = s->private;
+
+ seq_printf(s, "%d\n", pd->id);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(em_debug_id);
+
static void em_debug_create_pd(struct device *dev)
{
struct em_dbg_info *em_dbg;
@@ -132,6 +154,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("flags", 0444, d, dev->em_pd,
&em_debug_flags_fops);
+ debugfs_create_file("id", 0444, d, dev->em_pd, &em_debug_id_fops);
+
em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states,
sizeof(*em_dbg), GFP_KERNEL);
if (!em_dbg)
@@ -328,6 +352,8 @@ int em_dev_update_perf_domain(struct device *dev,
em_table_free(old_table);
mutex_unlock(&em_pd_mutex);
+
+ em_notify_pd_updated(pd);
return 0;
}
EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
@@ -396,7 +422,7 @@ static int em_create_pd(struct device *dev, int nr_states,
struct em_perf_table *em_table;
struct em_perf_domain *pd;
struct device *cpu_dev;
- int cpu, ret, num_cpus;
+ int cpu, ret, num_cpus, id;
if (_is_cpu_device(dev)) {
num_cpus = cpumask_weight(cpus);
@@ -420,6 +446,13 @@ static int em_create_pd(struct device *dev, int nr_states,
pd->nr_perf_states = nr_states;
+ INIT_LIST_HEAD(&pd->node);
+
+ id = ida_alloc(&em_pd_ida, GFP_KERNEL);
+ if (id < 0)
+ return -ENOMEM;
+ pd->id = id;
+
em_table = em_table_alloc(pd);
if (!em_table)
goto free_pd;
@@ -444,6 +477,7 @@ free_pd_table:
kfree(em_table);
free_pd:
kfree(pd);
+ ida_free(&em_pd_ida, id);
return -EINVAL;
}
@@ -659,8 +693,16 @@ int em_dev_register_pd_no_update(struct device *dev, unsigned int nr_states,
unlock:
mutex_unlock(&em_pd_mutex);
+ if (ret)
+ return ret;
- return ret;
+ mutex_lock(&em_pd_list_mutex);
+ list_add_tail(&dev->em_pd->node, &em_pd_list);
+ mutex_unlock(&em_pd_list_mutex);
+
+ em_notify_pd_created(dev->em_pd);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(em_dev_register_pd_no_update);
@@ -678,6 +720,12 @@ void em_dev_unregister_perf_domain(struct device *dev)
if (_is_cpu_device(dev))
return;
+ mutex_lock(&em_pd_list_mutex);
+ list_del_init(&dev->em_pd->node);
+ mutex_unlock(&em_pd_list_mutex);
+
+ em_notify_pd_deleted(dev->em_pd);
+
/*
* The mutex separates all register/unregister requests and protects
* from potential clean-up/setup issues in the debugfs directories.
@@ -689,6 +737,8 @@ void em_dev_unregister_perf_domain(struct device *dev)
em_table_free(rcu_dereference_protected(dev->em_pd->em_table,
lockdep_is_held(&em_pd_mutex)));
+ ida_free(&em_pd_ida, dev->em_pd->id);
+
kfree(dev->em_pd);
dev->em_pd = NULL;
mutex_unlock(&em_pd_mutex);
@@ -958,3 +1008,39 @@ void em_rebuild_sched_domains(void)
*/
schedule_work(&rebuild_sd_work);
}
+
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_NET)
+int for_each_em_perf_domain(int (*cb)(struct em_perf_domain*, void *),
+ void *data)
+{
+ struct em_perf_domain *pd;
+
+ lockdep_assert_not_held(&em_pd_mutex);
+ guard(mutex)(&em_pd_list_mutex);
+
+ list_for_each_entry(pd, &em_pd_list, node) {
+ int ret;
+
+ ret = cb(pd, data);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct em_perf_domain *em_perf_domain_get_by_id(int id)
+{
+ struct em_perf_domain *pd;
+
+ lockdep_assert_not_held(&em_pd_mutex);
+ guard(mutex)(&em_pd_list_mutex);
+
+ list_for_each_entry(pd, &em_pd_list, node) {
+ if (pd->id == id)
+ return pd;
+ }
+
+ return NULL;
+}
+#endif
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 26e45f86b955..af8d07bafe02 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -820,7 +820,10 @@ int hibernate(void)
if (error)
goto Restore;
- ksys_sync_helper();
+ error = pm_sleep_fs_sync();
+ if (error)
+ goto Notify;
+
filesystems_freeze(filesystem_freeze_enabled);
error = freeze_processes();
@@ -891,6 +894,7 @@ int hibernate(void)
freezer_test_done = false;
Exit:
filesystems_thaw();
+ Notify:
pm_notifier_call_chain(PM_POST_HIBERNATION);
Restore:
pm_restore_console();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 549f51ca3a1e..03b2c5495c77 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -18,6 +18,8 @@
#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/pm_runtime.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
#include "power.h"
@@ -92,6 +94,61 @@ void ksys_sync_helper(void)
}
EXPORT_SYMBOL_GPL(ksys_sync_helper);
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+/* Wakeup events handling resolution while syncing file systems in jiffies */
+#define PM_FS_SYNC_WAKEUP_RESOLUTION 5
+
+static atomic_t pm_fs_sync_count = ATOMIC_INIT(0);
+static struct workqueue_struct *pm_fs_sync_wq;
+static DECLARE_WAIT_QUEUE_HEAD(pm_fs_sync_wait);
+
+static bool pm_fs_sync_completed(void)
+{
+ return atomic_read(&pm_fs_sync_count) == 0;
+}
+
+static void pm_fs_sync_work_fn(struct work_struct *work)
+{
+ ksys_sync_helper();
+
+ if (atomic_dec_and_test(&pm_fs_sync_count))
+ wake_up(&pm_fs_sync_wait);
+}
+static DECLARE_WORK(pm_fs_sync_work, pm_fs_sync_work_fn);
+
+/**
+ * pm_sleep_fs_sync() - Sync file systems in an interruptible way
+ *
+ * Return: 0 on successful file system sync, or -EBUSY if the file system sync
+ * was aborted.
+ */
+int pm_sleep_fs_sync(void)
+{
+ pm_wakeup_clear(0);
+
+ /*
+ * Take back-to-back sleeps into account by queuing a subsequent fs sync
+ * only if the previous fs sync is running or is not queued. Multiple fs
+ * syncs increase the likelihood of saving the latest files immediately
+ * before sleep.
+ */
+ if (!work_pending(&pm_fs_sync_work)) {
+ atomic_inc(&pm_fs_sync_count);
+ queue_work(pm_fs_sync_wq, &pm_fs_sync_work);
+ }
+
+ while (!pm_fs_sync_completed()) {
+ if (pm_wakeup_pending())
+ return -EBUSY;
+
+ wait_event_timeout(pm_fs_sync_wait, pm_fs_sync_completed(),
+ PM_FS_SYNC_WAKEUP_RESOLUTION);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_SUSPEND || CONFIG_HIBERNATION */
+
/* Routines for PM-transition notifications */
static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
@@ -231,10 +288,10 @@ static ssize_t mem_sleep_store(struct kobject *kobj, struct kobj_attribute *attr
power_attr(mem_sleep);
/*
- * sync_on_suspend: invoke ksys_sync_helper() before suspend.
+ * sync_on_suspend: Sync file systems before suspend.
*
- * show() returns whether ksys_sync_helper() is invoked before suspend.
- * store() accepts 0 or 1. 0 disables ksys_sync_helper() and 1 enables it.
+ * show() returns whether file systems sync before suspend is enabled.
+ * store() accepts 0 or 1. 0 disables file systems sync and 1 enables it.
*/
bool sync_on_suspend_enabled = !IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC);
@@ -1066,16 +1123,26 @@ static const struct attribute_group *attr_groups[] = {
struct workqueue_struct *pm_wq;
EXPORT_SYMBOL_GPL(pm_wq);
-static int __init pm_start_workqueue(void)
+static int __init pm_start_workqueues(void)
{
- pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);
+ pm_wq = alloc_workqueue("pm", WQ_FREEZABLE | WQ_UNBOUND, 0);
+ if (!pm_wq)
+ return -ENOMEM;
- return pm_wq ? 0 : -ENOMEM;
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+ pm_fs_sync_wq = alloc_ordered_workqueue("pm_fs_sync", 0);
+ if (!pm_fs_sync_wq) {
+ destroy_workqueue(pm_wq);
+ return -ENOMEM;
+ }
+#endif
+
+ return 0;
}
static int __init pm_init(void)
{
- int error = pm_start_workqueue();
+ int error = pm_start_workqueues();
if (error)
return error;
hibernate_image_size_init();
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7ccd709af93f..75b63843886e 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -19,6 +19,7 @@ struct swsusp_info {
} __aligned(PAGE_SIZE);
#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION)
+extern int pm_sleep_fs_sync(void);
extern bool filesystem_freeze_enabled;
#endif
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 4244b069442e..f7d8064e9adc 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -415,6 +415,105 @@ static struct miscdevice cpu_latency_qos_miscdev = {
.fops = &cpu_latency_qos_fops,
};
+#ifdef CONFIG_PM_QOS_CPU_SYSTEM_WAKEUP
+/* The CPU system wakeup latency QoS. */
+static struct pm_qos_constraints cpu_wakeup_latency_constraints = {
+ .list = PLIST_HEAD_INIT(cpu_wakeup_latency_constraints.list),
+ .target_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT,
+ .default_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT,
+ .no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT,
+ .type = PM_QOS_MIN,
+};
+
+/**
+ * cpu_wakeup_latency_qos_limit - Current CPU system wakeup latency QoS limit.
+ *
+ * Returns the current CPU system wakeup latency QoS limit that may have been
+ * requested by user space.
+ */
+s32 cpu_wakeup_latency_qos_limit(void)
+{
+ return pm_qos_read_value(&cpu_wakeup_latency_constraints);
+}
+
+static int cpu_wakeup_latency_qos_open(struct inode *inode, struct file *filp)
+{
+ struct pm_qos_request *req;
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->qos = &cpu_wakeup_latency_constraints;
+ pm_qos_update_target(req->qos, &req->node, PM_QOS_ADD_REQ,
+ PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
+ filp->private_data = req;
+
+ return 0;
+}
+
+static int cpu_wakeup_latency_qos_release(struct inode *inode,
+ struct file *filp)
+{
+ struct pm_qos_request *req = filp->private_data;
+
+ filp->private_data = NULL;
+ pm_qos_update_target(req->qos, &req->node, PM_QOS_REMOVE_REQ,
+ PM_QOS_RESUME_LATENCY_NO_CONSTRAINT);
+ kfree(req);
+
+ return 0;
+}
+
+static ssize_t cpu_wakeup_latency_qos_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value = pm_qos_read_value(&cpu_wakeup_latency_constraints);
+
+ return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
+}
+
+static ssize_t cpu_wakeup_latency_qos_write(struct file *filp,
+ const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct pm_qos_request *req = filp->private_data;
+ s32 value;
+
+ if (count == sizeof(s32)) {
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ } else {
+ int ret;
+
+ ret = kstrtos32_from_user(buf, count, 16, &value);
+ if (ret)
+ return ret;
+ }
+
+ if (value < 0)
+ return -EINVAL;
+
+ pm_qos_update_target(req->qos, &req->node, PM_QOS_UPDATE_REQ, value);
+
+ return count;
+}
+
+static const struct file_operations cpu_wakeup_latency_qos_fops = {
+ .open = cpu_wakeup_latency_qos_open,
+ .release = cpu_wakeup_latency_qos_release,
+ .read = cpu_wakeup_latency_qos_read,
+ .write = cpu_wakeup_latency_qos_write,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice cpu_wakeup_latency_qos_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cpu_wakeup_latency",
+ .fops = &cpu_wakeup_latency_qos_fops,
+};
+#endif /* CONFIG_PM_QOS_CPU_SYSTEM_WAKEUP */
+
static int __init cpu_latency_qos_init(void)
{
int ret;
@@ -424,6 +523,13 @@ static int __init cpu_latency_qos_init(void)
pr_err("%s: %s setup failed\n", __func__,
cpu_latency_qos_miscdev.name);
+#ifdef CONFIG_PM_QOS_CPU_SYSTEM_WAKEUP
+ ret = misc_register(&cpu_wakeup_latency_qos_miscdev);
+ if (ret < 0)
+ pr_err("%s: %s setup failed\n", __func__,
+ cpu_wakeup_latency_qos_miscdev.name);
+#endif
+
return ret;
}
late_initcall(cpu_latency_qos_init);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 645f42e40478..0a946932d5c1 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -2110,22 +2110,20 @@ asmlinkage __visible int swsusp_save(void)
{
unsigned int nr_pages, nr_highmem;
- pr_info("Creating image:\n");
+ pm_deferred_pr_dbg("Creating image\n");
drain_local_pages(NULL);
nr_pages = count_data_pages();
nr_highmem = count_highmem_pages();
- pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
+ pm_deferred_pr_dbg("Need to copy %u pages\n", nr_pages + nr_highmem);
if (!enough_free_mem(nr_pages, nr_highmem)) {
- pr_err("Not enough free memory\n");
+ pm_deferred_pr_dbg("Not enough free memory for image creation\n");
return -ENOMEM;
}
- if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
- pr_err("Memory allocation failed\n");
+ if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem))
return -ENOMEM;
- }
/*
* During allocating of suspend pagedir, new cold pages may appear.
@@ -2144,7 +2142,8 @@ asmlinkage __visible int swsusp_save(void)
nr_zero_pages = nr_pages - nr_copy_pages;
nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
- pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
+ pm_deferred_pr_dbg("Image created (%d pages copied, %d zero pages)\n",
+ nr_copy_pages, nr_zero_pages);
return 0;
}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 3d4ebedad69f..2da4482bb6eb 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -344,10 +344,14 @@ MODULE_PARM_DESC(pm_test_delay,
static int suspend_test(int level)
{
#ifdef CONFIG_PM_DEBUG
+ int i;
+
if (pm_test_level == level) {
pr_info("suspend debug: Waiting for %d second(s).\n",
pm_test_delay);
- mdelay(pm_test_delay * 1000);
+ for (i = 0; i < pm_test_delay && !pm_wakeup_pending(); i++)
+ msleep(1000);
+
return 1;
}
#endif /* !CONFIG_PM_DEBUG */
@@ -589,7 +593,11 @@ static int enter_state(suspend_state_t state)
if (sync_on_suspend_enabled) {
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
- ksys_sync_helper();
+
+ error = pm_sleep_fs_sync();
+ if (error)
+ goto Unlock;
+
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 70ae21f7370d..33a186373bef 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -46,19 +46,18 @@ static bool clean_pages_on_read;
static bool clean_pages_on_decompress;
/*
- * The swap map is a data structure used for keeping track of each page
- * written to a swap partition. It consists of many swap_map_page
- * structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
- * These structures are stored on the swap and linked together with the
- * help of the .next_swap member.
+ * The swap map is a data structure used for keeping track of each page
+ * written to a swap partition. It consists of many swap_map_page structures
+ * that contain each an array of MAP_PAGE_ENTRIES swap entries. These
+ * structures are stored on the swap and linked together with the help of the
+ * .next_swap member.
*
- * The swap map is created during suspend. The swap map pages are
- * allocated and populated one at a time, so we only need one memory
- * page to set up the entire structure.
+ * The swap map is created during suspend. The swap map pages are allocated and
+ * populated one at a time, so we only need one memory page to set up the entire
+ * structure.
*
- * During resume we pick up all swap_map_page structures into a list.
+ * During resume we pick up all swap_map_page structures into a list.
*/
-
#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
/*
@@ -89,10 +88,8 @@ struct swap_map_page_list {
};
/*
- * The swap_map_handle structure is used for handling swap in
- * a file-alike way
+ * The swap_map_handle structure is used for handling swap in a file-alike way.
*/
-
struct swap_map_handle {
struct swap_map_page *cur;
struct swap_map_page_list *maps;
@@ -117,10 +114,9 @@ struct swsusp_header {
static struct swsusp_header *swsusp_header;
/*
- * The following functions are used for tracing the allocated
- * swap pages, so that they can be freed in case of an error.
+ * The following functions are used for tracing the allocated swap pages, so
+ * that they can be freed in case of an error.
*/
-
struct swsusp_extent {
struct rb_node node;
unsigned long start;
@@ -170,15 +166,14 @@ static int swsusp_extents_insert(unsigned long swap_offset)
return 0;
}
-/*
- * alloc_swapdev_block - allocate a swap page and register that it has
- * been allocated, so that it can be freed in case of an error.
- */
-
sector_t alloc_swapdev_block(int swap)
{
unsigned long offset;
+ /*
+ * Allocate a swap page and register that it has been allocated, so that
+ * it can be freed in case of an error.
+ */
offset = swp_offset(get_swap_page_of_type(swap));
if (offset) {
if (swsusp_extents_insert(offset))
@@ -189,16 +184,14 @@ sector_t alloc_swapdev_block(int swap)
return 0;
}
-/*
- * free_all_swap_pages - free swap pages allocated for saving image data.
- * It also frees the extents used to register which swap entries had been
- * allocated.
- */
-
void free_all_swap_pages(int swap)
{
struct rb_node *node;
+ /*
+ * Free swap pages allocated for saving image data. It also frees the
+ * extents used to register which swap entries had been allocated.
+ */
while ((node = swsusp_extents.rb_node)) {
struct swsusp_extent *ext;
@@ -303,6 +296,7 @@ static int hib_wait_io(struct hib_bio_batch *hb)
/*
* Saving part
*/
+
static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
@@ -336,16 +330,14 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
*/
unsigned int swsusp_header_flags;
-/**
- * swsusp_swap_check - check if the resume device is a swap device
- * and get its index (if so)
- *
- * This is called before saving image
- */
static int swsusp_swap_check(void)
{
int res;
+ /*
+ * Check if the resume device is a swap device and get its index (if so).
+ * This is called before saving the image.
+ */
if (swsusp_resume_device)
res = swap_type_of(swsusp_resume_device, swsusp_resume_block);
else
@@ -362,13 +354,6 @@ static int swsusp_swap_check(void)
return 0;
}
-/**
- * write_page - Write one page to given swap location.
- * @buf: Address we're writing.
- * @offset: Offset of the swap page we're writing to.
- * @hb: bio completion batch
- */
-
static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
{
gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
@@ -519,17 +504,14 @@ static int swap_writer_finish(struct swap_map_handle *handle,
CMP_HEADER, PAGE_SIZE)
#define CMP_SIZE (CMP_PAGES * PAGE_SIZE)
-/* Maximum number of threads for compression/decompression. */
-#define CMP_THREADS 3
+/* Default number of threads for compression/decompression. */
+#define CMP_THREADS 3
+static unsigned int hibernate_compression_threads = CMP_THREADS;
/* Minimum/maximum number of pages for read buffering. */
#define CMP_MIN_RD_PAGES 1024
#define CMP_MAX_RD_PAGES 8192
-/**
- * save_image - save the suspend image data
- */
-
static int save_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
unsigned int nr_to_write)
@@ -585,13 +567,48 @@ struct crc_data {
wait_queue_head_t go; /* start crc update */
wait_queue_head_t done; /* crc update done */
u32 *crc32; /* points to handle's crc32 */
- size_t *unc_len[CMP_THREADS]; /* uncompressed lengths */
- unsigned char *unc[CMP_THREADS]; /* uncompressed data */
+ size_t **unc_len; /* uncompressed lengths */
+ unsigned char **unc; /* uncompressed data */
};
-/*
- * CRC32 update function that runs in its own thread.
- */
+static struct crc_data *alloc_crc_data(int nr_threads)
+{
+ struct crc_data *crc;
+
+ crc = kzalloc(sizeof(*crc), GFP_KERNEL);
+ if (!crc)
+ return NULL;
+
+ crc->unc = kcalloc(nr_threads, sizeof(*crc->unc), GFP_KERNEL);
+ if (!crc->unc)
+ goto err_free_crc;
+
+ crc->unc_len = kcalloc(nr_threads, sizeof(*crc->unc_len), GFP_KERNEL);
+ if (!crc->unc_len)
+ goto err_free_unc;
+
+ return crc;
+
+err_free_unc:
+ kfree(crc->unc);
+err_free_crc:
+ kfree(crc);
+ return NULL;
+}
+
+static void free_crc_data(struct crc_data *crc)
+{
+ if (!crc)
+ return;
+
+ if (crc->thr)
+ kthread_stop(crc->thr);
+
+ kfree(crc->unc_len);
+ kfree(crc->unc);
+ kfree(crc);
+}
+
static int crc32_threadfn(void *data)
{
struct crc_data *d = data;
@@ -616,6 +633,7 @@ static int crc32_threadfn(void *data)
}
return 0;
}
+
/*
* Structure used for data compression.
*/
@@ -637,9 +655,6 @@ struct cmp_data {
/* Indicates the image size after compression */
static atomic64_t compressed_size = ATOMIC_INIT(0);
-/*
- * Compression function that runs in its own thread.
- */
static int compress_threadfn(void *data)
{
struct cmp_data *d = data;
@@ -671,12 +686,6 @@ static int compress_threadfn(void *data)
return 0;
}
-/**
- * save_compressed_image - Save the suspend image data after compression.
- * @handle: Swap map handle to use for saving the image.
- * @snapshot: Image to read data from.
- * @nr_to_write: Number of pages to save.
- */
static int save_compressed_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
unsigned int nr_to_write)
@@ -703,7 +712,7 @@ static int save_compressed_image(struct swap_map_handle *handle,
* footprint.
*/
nr_threads = num_online_cpus() - 1;
- nr_threads = clamp_val(nr_threads, 1, CMP_THREADS);
+ nr_threads = clamp_val(nr_threads, 1, hibernate_compression_threads);
page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH);
if (!page) {
@@ -719,7 +728,7 @@ static int save_compressed_image(struct swap_map_handle *handle,
goto out_clean;
}
- crc = kzalloc(sizeof(*crc), GFP_KERNEL);
+ crc = alloc_crc_data(nr_threads);
if (!crc) {
pr_err("Failed to allocate crc\n");
ret = -ENOMEM;
@@ -888,11 +897,7 @@ out_finish:
out_clean:
hib_finish_batch(&hb);
- if (crc) {
- if (crc->thr)
- kthread_stop(crc->thr);
- kfree(crc);
- }
+ free_crc_data(crc);
if (data) {
for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
@@ -908,13 +913,6 @@ out_clean:
return ret;
}
-/**
- * enough_swap - Make sure we have enough swap to save the image.
- *
- * Returns TRUE or FALSE after checking the total amount of swap
- * space available from the resume partition.
- */
-
static int enough_swap(unsigned int nr_pages)
{
unsigned int free_swap = count_swap_pages(root_swap, 1);
@@ -927,15 +925,16 @@ static int enough_swap(unsigned int nr_pages)
}
/**
- * swsusp_write - Write entire image and metadata.
- * @flags: flags to pass to the "boot" kernel in the image header
+ * swsusp_write - Write entire image and metadata.
+ * @flags: flags to pass to the "boot" kernel in the image header
+ *
+ * It is important _NOT_ to umount filesystems at this point. We want them
+ * synced (in case something goes wrong) but we DO not want to mark filesystem
+ * clean: it is not. (And it does not matter, if we resume correctly, we'll mark
+ * system clean, anyway.)
*
- * It is important _NOT_ to umount filesystems at this point. We want
- * them synced (in case something goes wrong) but we DO not want to mark
- * filesystem clean: it is not. (And it does not matter, if we resume
- * correctly, we'll mark system clean, anyway.)
+ * Return: 0 on success, negative error code on failure.
*/
-
int swsusp_write(unsigned int flags)
{
struct swap_map_handle handle;
@@ -978,8 +977,8 @@ out_finish:
}
/*
- * The following functions allow us to read data using a swap map
- * in a file-like way.
+ * The following functions allow us to read data using a swap map in a file-like
+ * way.
*/
static void release_swap_reader(struct swap_map_handle *handle)
@@ -1081,12 +1080,6 @@ static int swap_reader_finish(struct swap_map_handle *handle)
return 0;
}
-/**
- * load_image - load the image using the swap map handle
- * @handle and the snapshot handle @snapshot
- * (assume there are @nr_pages pages to load)
- */
-
static int load_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
unsigned int nr_to_read)
@@ -1157,9 +1150,6 @@ struct dec_data {
unsigned char cmp[CMP_SIZE]; /* compressed buffer */
};
-/*
- * Decompression function that runs in its own thread.
- */
static int decompress_threadfn(void *data)
{
struct dec_data *d = data;
@@ -1194,12 +1184,6 @@ static int decompress_threadfn(void *data)
return 0;
}
-/**
- * load_compressed_image - Load compressed image data and decompress it.
- * @handle: Swap map handle to use for loading data.
- * @snapshot: Image to copy uncompressed data into.
- * @nr_to_read: Number of pages to load.
- */
static int load_compressed_image(struct swap_map_handle *handle,
struct snapshot_handle *snapshot,
unsigned int nr_to_read)
@@ -1227,7 +1211,7 @@ static int load_compressed_image(struct swap_map_handle *handle,
* footprint.
*/
nr_threads = num_online_cpus() - 1;
- nr_threads = clamp_val(nr_threads, 1, CMP_THREADS);
+ nr_threads = clamp_val(nr_threads, 1, hibernate_compression_threads);
page = vmalloc_array(CMP_MAX_RD_PAGES, sizeof(*page));
if (!page) {
@@ -1243,7 +1227,7 @@ static int load_compressed_image(struct swap_map_handle *handle,
goto out_clean;
}
- crc = kzalloc(sizeof(*crc), GFP_KERNEL);
+ crc = alloc_crc_data(nr_threads);
if (!crc) {
pr_err("Failed to allocate crc\n");
ret = -ENOMEM;
@@ -1510,11 +1494,7 @@ out_clean:
hib_finish_batch(&hb);
for (i = 0; i < ring_size; i++)
free_page((unsigned long)page[i]);
- if (crc) {
- if (crc->thr)
- kthread_stop(crc->thr);
- kfree(crc);
- }
+ free_crc_data(crc);
if (data) {
for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
@@ -1533,8 +1513,9 @@ out_clean:
* swsusp_read - read the hibernation image.
* @flags_p: flags passed by the "frozen" kernel in the image header should
* be written into this memory location
+ *
+ * Return: 0 on success, negative error code on failure.
*/
-
int swsusp_read(unsigned int *flags_p)
{
int error;
@@ -1571,8 +1552,9 @@ static void *swsusp_holder;
/**
* swsusp_check - Open the resume device and check for the swsusp signature.
* @exclusive: Open the resume device exclusively.
+ *
+ * Return: 0 if a valid image is found, negative error code otherwise.
*/
-
int swsusp_check(bool exclusive)
{
void *holder = exclusive ? &swsusp_holder : NULL;
@@ -1622,7 +1604,6 @@ put:
/**
* swsusp_close - close resume device.
*/
-
void swsusp_close(void)
{
if (IS_ERR(hib_resume_bdev_file)) {
@@ -1634,9 +1615,10 @@ void swsusp_close(void)
}
/**
- * swsusp_unmark - Unmark swsusp signature in the resume device
+ * swsusp_unmark - Unmark swsusp signature in the resume device
+ *
+ * Return: 0 on success, negative error code on failure.
*/
-
#ifdef CONFIG_SUSPEND
int swsusp_unmark(void)
{
@@ -1662,8 +1644,46 @@ int swsusp_unmark(void)
}
#endif
+static ssize_t hibernate_compression_threads_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", hibernate_compression_threads);
+}
+
+static ssize_t hibernate_compression_threads_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 0, &val))
+ return -EINVAL;
+
+ if (val < 1)
+ return -EINVAL;
+
+ hibernate_compression_threads = val;
+ return n;
+}
+power_attr(hibernate_compression_threads);
+
+static struct attribute *g[] = {
+ &hibernate_compression_threads_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group attr_group = {
+ .attrs = g,
+};
+
static int __init swsusp_header_init(void)
{
+ int error;
+
+ error = sysfs_create_group(power_kobj, &attr_group);
+ if (error)
+ return -ENOMEM;
+
swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
if (!swsusp_header)
panic("Could not allocate memory for swsusp_header\n");
@@ -1671,3 +1691,19 @@ static int __init swsusp_header_init(void)
}
core_initcall(swsusp_header_init);
+
+static int __init hibernate_compression_threads_setup(char *str)
+{
+ int rc = kstrtouint(str, 0, &hibernate_compression_threads);
+
+ if (rc)
+ return rc;
+
+ if (hibernate_compression_threads < 1)
+ hibernate_compression_threads = CMP_THREADS;
+
+ return 1;
+
+}
+
+__setup("hibernate_compression_threads=", hibernate_compression_threads_setup);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3f9e3efb9f6e..4401cfe26e5c 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -278,7 +278,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
if (data->frozen)
break;
- ksys_sync_helper();
+ error = pm_sleep_fs_sync();
+ if (error)
+ break;
error = freeze_processes();
if (error)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 1cb7a3d70e65..c174afe1dd17 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -131,12 +131,13 @@ void __cpuidle default_idle_call(void)
}
static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ u64 max_latency_ns)
{
if (current_clr_polling_and_test())
return -EBUSY;
- return cpuidle_enter_s2idle(drv, dev);
+ return cpuidle_enter_s2idle(drv, dev, max_latency_ns);
}
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
@@ -205,12 +206,13 @@ static void cpuidle_idle_call(void)
u64 max_latency_ns;
if (idle_should_enter_s2idle()) {
+ max_latency_ns = cpu_wakeup_latency_qos_limit() *
+ NSEC_PER_USEC;
- entered_state = call_cpuidle_s2idle(drv, dev);
+ entered_state = call_cpuidle_s2idle(drv, dev,
+ max_latency_ns);
if (entered_state > 0)
goto exit_idle;
-
- max_latency_ns = U64_MAX;
} else {
max_latency_ns = dev->forced_idle_latency_limit_ns;
}