From d42044aad6528e0c9533dbaf836d1b0fbb19fe2d Mon Sep 17 00:00:00 2001 From: David Reaver Date: Sun, 12 Jan 2025 07:26:55 -0800 Subject: PM: hibernate: Replace deprecated kmap_atomic() with kmap_local_page() kmap_atomic() is deprecated and should be replaced with kmap_local_page() [1][2]. kmap_local_page() is faster in kernels with HIGHMEM enabled, can take page faults, and allows preemption. According to [2], this replacement is safe as long as the code between kmap_atomic() and kunmap_atomic() does not implicitly depend on disabling page faults or preemption. In all of the call sites in this patch, the only thing happening between mapping and unmapping pages is copy_page() calls, and I don't suspect they depend on disabling page faults or preemption. Link: https://lwn.net/Articles/836144/ [1] Link: https://docs.kernel.org/mm/highmem.html#temporary-virtual-mappings [2] Signed-off-by: David Reaver Link: https://patch.msgid.link/20250112152658.20132-1-me@davidreaver.com Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c9fb559a6399..4e6e24e8b854 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2270,9 +2270,9 @@ int snapshot_read_next(struct snapshot_handle *handle) */ void *kaddr; - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); copy_page(buffer, kaddr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); handle->buffer = buffer; } else { handle->buffer = page_address(page); @@ -2561,9 +2561,9 @@ static void copy_last_highmem_page(void) if (last_highmem_page) { void *dst; - dst = kmap_atomic(last_highmem_page); + dst = kmap_local_page(last_highmem_page); copy_page(dst, buffer); - kunmap_atomic(dst); + kunmap_local(dst); last_highmem_page = NULL; } } @@ -2881,13 +2881,13 @@ static inline void swap_two_pages_data(struct page *p1, struct page *p2, { void *kaddr1, *kaddr2; - kaddr1 = kmap_atomic(p1); - kaddr2 = kmap_atomic(p2); + kaddr1 = kmap_local_page(p1); + kaddr2 = kmap_local_page(p2); copy_page(buf, kaddr1); copy_page(kaddr1, kaddr2); copy_page(kaddr2, buf); - kunmap_atomic(kaddr2); - kunmap_atomic(kaddr1); + kunmap_local(kaddr2); + kunmap_local(kaddr1); } /** -- cgit v1.2.3 From 3e5eee147b7b0f5a93f56beffe34e81fdd00fa0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Feb 2025 21:11:42 +0100 Subject: PM: Block enabling of runtime PM during system suspend If device_prepare() runs on a device that has never had runtime PM enabled so far, it may reasonably assume that runtime PM will not be enabled for that device during the system suspend-resume cycle currently in progress, but this has never been guaranteed. To verify this assumption, make device_prepare() arrange for triggering a device warning accompanied by a call trace dump if runtime PM is enabled for such a device after it has returned. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/6131109.lOV4Wx5bFT@rjwysocki.net --- drivers/base/power/main.c | 9 +++++++++ drivers/base/power/runtime.c | 24 ++++++++++++++++++++++++ include/linux/pm.h | 1 + include/linux/pm_runtime.h | 4 ++++ 4 files changed, 38 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index dffa2aa1ba7d..acabd9f3e60f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1109,6 +1109,8 @@ static void device_complete(struct device *dev, pm_message_t state) device_unlock(dev); out: + /* If enabling runtime PM for the device is blocked, unblock it. */ + pm_runtime_unblock(dev); pm_runtime_put(dev); } @@ -1815,6 +1817,13 @@ static int device_prepare(struct device *dev, pm_message_t state) * it again during the complete phase. */ pm_runtime_get_noresume(dev); + /* + * If runtime PM is disabled for the device at this point and it has + * never been enabled so far, it should not be enabled until this system + * suspend-resume cycle is complete, so prepare to trigger a warning on + * subsequent attempts to enable it. + */ + pm_runtime_block_if_disabled(dev); if (dev->power.syscore) return 0; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a5aed89e1a6b..797ea38ceba7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1460,6 +1460,26 @@ int pm_runtime_barrier(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_barrier); +void pm_runtime_block_if_disabled(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + + if (dev->power.disable_depth && dev->power.last_status == RPM_INVALID) + dev->power.last_status = RPM_BLOCKED; + + spin_unlock_irq(&dev->power.lock); +} + +void pm_runtime_unblock(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + + if (dev->power.last_status == RPM_BLOCKED) + dev->power.last_status = RPM_INVALID; + + spin_unlock_irq(&dev->power.lock); +} + void __pm_runtime_disable(struct device *dev, bool check_resume) { spin_lock_irq(&dev->power.lock); @@ -1518,6 +1538,10 @@ void pm_runtime_enable(struct device *dev) if (--dev->power.disable_depth > 0) goto out; + if (dev->power.last_status == RPM_BLOCKED) { + dev_warn(dev, "Attempt to enable runtime PM when it is blocked\n"); + dump_stack(); + } dev->power.last_status = RPM_INVALID; dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); diff --git a/include/linux/pm.h b/include/linux/pm.h index 78855d794342..6ca6f34c58c3 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -597,6 +597,7 @@ enum rpm_status { RPM_RESUMING, RPM_SUSPENDED, RPM_SUSPENDING, + RPM_BLOCKED, }; /* diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 72c62e1171ca..10769119867b 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -77,6 +77,8 @@ extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); +extern void pm_runtime_block_if_disabled(struct device *dev); +extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); @@ -271,6 +273,8 @@ static inline int pm_runtime_get_if_active(struct device *dev) static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } +static inline void pm_runtime_block_if_disabled(struct device *dev) {} +static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} static inline void pm_runtime_allow(struct device *dev) {} -- cgit v1.2.3 From 758cc55ce3d5d79e8f98adbd03ad2cd29133af33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Feb 2025 21:13:09 +0100 Subject: PM: runtime: Introduce pm_runtime_blocked() Introduce a new helper function called pm_runtime_blocked() for checking the power.last_status value indicating whether or not enabling runtime PM for the given device has been blocked (which happens in the "prepare" phase of system-wide suspend if runtime PM is disabled for the given device at that point). Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/4632087.LvFx2qVVIh@rjwysocki.net --- drivers/base/power/runtime.c | 17 +++++++++++++++++ include/linux/pm_runtime.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 797ea38ceba7..c0f5a9f89299 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1555,6 +1555,23 @@ out: } EXPORT_SYMBOL_GPL(pm_runtime_enable); +bool pm_runtime_blocked(struct device *dev) +{ + bool ret; + + /* + * dev->power.last_status is a bit field, so in case it is updated via + * RMW, read it under the spin lock. + */ + spin_lock_irq(&dev->power.lock); + + ret = dev->power.last_status == RPM_BLOCKED; + + spin_unlock_irq(&dev->power.lock); + + return ret; +} + static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 10769119867b..aea0395c10a1 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -81,6 +81,7 @@ extern void pm_runtime_block_if_disabled(struct device *dev); extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); +extern bool pm_runtime_blocked(struct device *dev); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); @@ -277,6 +278,7 @@ static inline void pm_runtime_block_if_disabled(struct device *dev) {} static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} +static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} -- cgit v1.2.3 From bca84a7b93fdc744d79d94423c2cb905b1832310 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Feb 2025 21:16:48 +0100 Subject: PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally A recent discussion has revealed that using DPM_FLAG_SMART_SUSPEND unconditionally is generally problematic because it may lead to situations in which the device's runtime PM information is internally inconsistent or does not reflect its real state [1]. For this reason, change the handling of DPM_FLAG_SMART_SUSPEND so that it is only taken into account if it is consistently set by the drivers of all devices having any PM callbacks throughout dependency graphs in accordance with the following rules: - The "smart suspend" feature is only enabled for devices whose drivers ask for it (that is, set DPM_FLAG_SMART_SUSPEND) and for devices without PM callbacks unless they have never had runtime PM enabled. - The "smart suspend" feature is not enabled for a device if it has not been enabled for the device's parent unless the parent does not take children into account or it has never had runtime PM enabled. - The "smart suspend" feature is not enabled for a device if it has not been enabled for one of the device's suppliers taking runtime PM into account unless that supplier has never had runtime PM enabled. Namely, introduce a new device PM flag called smart_suspend that is only set if the above conditions are met and update all DPM_FLAG_SMART_SUSPEND users to check power.smart_suspend instead of directly checking the latter. At the same time, drop the power.set_active flage introduced recently in commit 3775fc538f53 ("PM: sleep: core: Synchronize runtime PM status of parents and children") because it is now sufficient to check power.smart_suspend along with the dev_pm_skip_resume() return value to decide whether or not pm_runtime_set_active() needs to be called for the device. Link: https://lore.kernel.org/linux-pm/CAPDyKFroyU3YDSfw_Y6k3giVfajg3NQGwNWeteJWqpW29BojhQ@mail.gmail.com/ [1] Fixes: 7585946243d6 ("PM: sleep: core: Restrict power.set_active propagation") Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Bjorn Helgaas # drivers/pci Link: https://patch.msgid.link/1914558.tdWV9SEqCh@rjwysocki.net --- drivers/acpi/device_pm.c | 4 +-- drivers/base/power/main.c | 63 ++++++++++++++++++++++++++++++++++++----------- drivers/mfd/intel-lpss.c | 2 +- drivers/pci/pci-driver.c | 6 ++--- include/linux/device.h | 9 +++++++ include/linux/pm.h | 2 +- 6 files changed, 64 insertions(+), 22 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 3b4d048c4941..dbd4446025ec 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1161,7 +1161,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_complete); */ int acpi_subsys_suspend(struct device *dev) { - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || + if (!dev_pm_smart_suspend(dev) || acpi_dev_needs_resume(dev, ACPI_COMPANION(dev))) pm_runtime_resume(dev); @@ -1320,7 +1320,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_restore_early); */ int acpi_subsys_poweroff(struct device *dev) { - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || + if (!dev_pm_smart_suspend(dev) || acpi_dev_needs_resume(dev, ACPI_COMPANION(dev))) pm_runtime_resume(dev); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index acabd9f3e60f..cc9903065900 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -656,15 +656,13 @@ static void device_resume_noirq(struct device *dev, pm_message_t state, bool asy * so change its status accordingly. * * Otherwise, the device is going to be resumed, so set its PM-runtime - * status to "active" unless its power.set_active flag is clear, in + * status to "active" unless its power.smart_suspend flag is clear, in * which case it is not necessary to update its PM-runtime status. */ - if (skip_resume) { + if (skip_resume) pm_runtime_set_suspended(dev); - } else if (dev->power.set_active) { + else if (dev_pm_smart_suspend(dev)) pm_runtime_set_active(dev); - dev->power.set_active = false; - } if (dev->pm_domain) { info = "noirq power domain "; @@ -1282,14 +1280,8 @@ Skip: dev->power.may_skip_resume)) dev->power.must_resume = true; - if (dev->power.must_resume) { - if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { - dev->power.set_active = true; - if (dev->parent && !dev->parent->power.ignore_children) - dev->parent->power.set_active = true; - } + if (dev->power.must_resume) dpm_superior_set_must_resume(dev); - } Complete: complete_all(&dev->power.completion); @@ -1797,6 +1789,49 @@ int dpm_suspend(pm_message_t state) return error; } +static void device_prepare_smart_suspend(struct device *dev) +{ + struct device_link *link; + int idx; + + /* + * The "smart suspend" feature is enabled for devices whose drivers ask + * for it and for devices without PM callbacks unless runtime PM is + * disabled and enabling it is blocked for them. + * + * However, if "smart suspend" is not enabled for the device's parent + * or any of its suppliers that take runtime PM into account, it cannot + * be enabled for the device either. + */ + dev->power.smart_suspend = (dev->power.no_pm_callbacks || + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) && + !pm_runtime_blocked(dev); + + if (!dev_pm_smart_suspend(dev)) + return; + + if (dev->parent && !dev_pm_smart_suspend(dev->parent) && + !dev->parent->power.ignore_children && !pm_runtime_blocked(dev->parent)) { + dev->power.smart_suspend = false; + return; + } + + idx = device_links_read_lock(); + + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + if (!(link->flags | DL_FLAG_PM_RUNTIME)) + continue; + + if (!dev_pm_smart_suspend(link->supplier) && + !pm_runtime_blocked(link->supplier)) { + dev->power.smart_suspend = false; + break; + } + } + + device_links_read_unlock(idx); +} + /** * device_prepare - Prepare a device for system power transition. * @dev: Device to handle. @@ -1858,6 +1893,7 @@ unlock: pm_runtime_put(dev); return ret; } + device_prepare_smart_suspend(dev); /* * A positive return value from ->prepare() means "this device appears * to be runtime-suspended and its state is fine, so if it really is @@ -2033,6 +2069,5 @@ void device_pm_check_callbacks(struct device *dev) bool dev_pm_skip_suspend(struct device *dev) { - return dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) && - pm_runtime_status_suspended(dev); + return dev_pm_smart_suspend(dev) && pm_runtime_status_suspended(dev); } diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 3ba05ebb9035..63d6694f7145 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL_NS_GPL(intel_lpss_remove, "INTEL_LPSS"); static int resume_lpss_device(struct device *dev, void *data) { - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) + if (!dev_pm_smart_suspend(dev)) pm_runtime_resume(dev); return 0; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f57ea36d125d..02726f36beb5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -812,8 +812,7 @@ static int pci_pm_suspend(struct device *dev) * suspend callbacks can cope with runtime-suspended devices, it is * better to resume the device from runtime suspend here. */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - pci_dev_need_resume(pci_dev)) { + if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { @@ -1151,8 +1150,7 @@ static int pci_pm_poweroff(struct device *dev) } /* The reason to do that is the same as in pci_pm_suspend(). */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - pci_dev_need_resume(pci_dev)) { + if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { diff --git a/include/linux/device.h b/include/linux/device.h index 80a5b3268986..615282365052 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1025,6 +1025,15 @@ static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags) return !!(dev->power.driver_flags & flags); } +static inline bool dev_pm_smart_suspend(struct device *dev) +{ +#ifdef CONFIG_PM_SLEEP + return dev->power.smart_suspend; +#else + return false; +#endif +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); diff --git a/include/linux/pm.h b/include/linux/pm.h index 6ca6f34c58c3..24647108f0ad 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -680,8 +680,8 @@ struct dev_pm_info { bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ bool async_in_progress:1; /* Owned by the PM core */ + bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ - bool set_active:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else bool should_wakeup:1; -- cgit v1.2.3 From 520a552f19d55825108ab83da093084c9afb50e9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Feb 2025 21:20:46 +0100 Subject: PM: sleep: Avoid unnecessary checks in device_prepare_smart_suspend() Add an optimization (on top of previous changes) to avoid calling pm_runtime_blocked(), which involves acquiring the device's PM spinlock, for devices with no PM callbacks and runtime PM "blocked". Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/2978873.e9J7NaK4W3@rjwysocki.net --- drivers/base/power/main.c | 16 +++++++++------- drivers/base/power/runtime.c | 9 +++++++-- include/linux/pm_runtime.h | 4 ++-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index cc9903065900..a06ef91fbdb9 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1796,16 +1796,14 @@ static void device_prepare_smart_suspend(struct device *dev) /* * The "smart suspend" feature is enabled for devices whose drivers ask - * for it and for devices without PM callbacks unless runtime PM is - * disabled and enabling it is blocked for them. + * for it and for devices without PM callbacks. * * However, if "smart suspend" is not enabled for the device's parent * or any of its suppliers that take runtime PM into account, it cannot * be enabled for the device either. */ - dev->power.smart_suspend = (dev->power.no_pm_callbacks || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) && - !pm_runtime_blocked(dev); + dev->power.smart_suspend = dev->power.no_pm_callbacks || + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); if (!dev_pm_smart_suspend(dev)) return; @@ -1843,6 +1841,7 @@ static void device_prepare_smart_suspend(struct device *dev) static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; + bool no_runtime_pm; int ret = 0; /* @@ -1858,7 +1857,7 @@ static int device_prepare(struct device *dev, pm_message_t state) * suspend-resume cycle is complete, so prepare to trigger a warning on * subsequent attempts to enable it. */ - pm_runtime_block_if_disabled(dev); + no_runtime_pm = pm_runtime_block_if_disabled(dev); if (dev->power.syscore) return 0; @@ -1893,7 +1892,10 @@ unlock: pm_runtime_put(dev); return ret; } - device_prepare_smart_suspend(dev); + /* Do not enable "smart suspend" for devices without runtime PM. */ + if (!no_runtime_pm) + device_prepare_smart_suspend(dev); + /* * A positive return value from ->prepare() means "this device appears * to be runtime-suspended and its state is fine, so if it really is diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index c0f5a9f89299..e772e45d30f3 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1460,14 +1460,19 @@ int pm_runtime_barrier(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_barrier); -void pm_runtime_block_if_disabled(struct device *dev) +bool pm_runtime_block_if_disabled(struct device *dev) { + bool ret; + spin_lock_irq(&dev->power.lock); - if (dev->power.disable_depth && dev->power.last_status == RPM_INVALID) + ret = dev->power.disable_depth && dev->power.last_status == RPM_INVALID; + if (ret) dev->power.last_status = RPM_BLOCKED; spin_unlock_irq(&dev->power.lock); + + return ret; } void pm_runtime_unblock(struct device *dev) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index aea0395c10a1..01ead602aedd 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -77,7 +77,7 @@ extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); -extern void pm_runtime_block_if_disabled(struct device *dev); +extern bool pm_runtime_block_if_disabled(struct device *dev); extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); @@ -274,7 +274,7 @@ static inline int pm_runtime_get_if_active(struct device *dev) static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } -static inline void pm_runtime_block_if_disabled(struct device *dev) {} +static inline bool pm_runtime_block_if_disabled(struct device *dev) { return true; } static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} -- cgit v1.2.3 From 52323ed1444ea5c2a5f1754ea0a2d9c8c216ccdf Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 24 Feb 2025 09:31:39 +0800 Subject: PM: hibernate: Avoid deadlock in hibernate_compressor_param_set() syzbot reported a deadlock in lock_system_sleep() (see below). The write operation to "/sys/module/hibernate/parameters/compressor" conflicts with the registration of ieee80211 device, resulting in a deadlock when attempting to acquire system_transition_mutex under param_lock. To avoid this deadlock, change hibernate_compressor_param_set() to use mutex_trylock() for attempting to acquire system_transition_mutex and return -EBUSY when it fails. Task flags need not be saved or adjusted before calling mutex_trylock(&system_transition_mutex) because the caller is not going to end up waiting for this mutex and if it runs concurrently with system suspend in progress, it will be frozen properly when it returns to user space. syzbot report: syz-executor895/5833 is trying to acquire lock: ffffffff8e0828c8 (system_transition_mutex){+.+.}-{4:4}, at: lock_system_sleep+0x87/0xa0 kernel/power/main.c:56 but task is already holding lock: ffffffff8e07dc68 (param_lock){+.+.}-{4:4}, at: kernel_param_lock kernel/params.c:607 [inline] ffffffff8e07dc68 (param_lock){+.+.}-{4:4}, at: param_attr_store+0xe6/0x300 kernel/params.c:586 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (param_lock){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ieee80211_rate_control_ops_get net/mac80211/rate.c:220 [inline] rate_control_alloc net/mac80211/rate.c:266 [inline] ieee80211_init_rate_ctrl_alg+0x18d/0x6b0 net/mac80211/rate.c:1015 ieee80211_register_hw+0x20cd/0x4060 net/mac80211/main.c:1531 mac80211_hwsim_new_radio+0x304e/0x54e0 drivers/net/wireless/virtual/mac80211_hwsim.c:5558 init_mac80211_hwsim+0x432/0x8c0 drivers/net/wireless/virtual/mac80211_hwsim.c:6910 do_one_initcall+0x128/0x700 init/main.c:1257 do_initcall_level init/main.c:1319 [inline] do_initcalls init/main.c:1335 [inline] do_basic_setup init/main.c:1354 [inline] kernel_init_freeable+0x5c7/0x900 init/main.c:1568 kernel_init+0x1c/0x2b0 init/main.c:1457 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 -> #2 (rtnl_mutex){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 wg_pm_notification drivers/net/wireguard/device.c:80 [inline] wg_pm_notification+0x49/0x180 drivers/net/wireguard/device.c:64 notifier_call_chain+0xb7/0x410 kernel/notifier.c:85 notifier_call_chain_robust kernel/notifier.c:120 [inline] blocking_notifier_call_chain_robust kernel/notifier.c:345 [inline] blocking_notifier_call_chain_robust+0xc9/0x170 kernel/notifier.c:333 pm_notifier_call_chain_robust+0x27/0x60 kernel/power/main.c:102 snapshot_open+0x189/0x2b0 kernel/power/user.c:77 misc_open+0x35a/0x420 drivers/char/misc.c:179 chrdev_open+0x237/0x6a0 fs/char_dev.c:414 do_dentry_open+0x735/0x1c40 fs/open.c:956 vfs_open+0x82/0x3f0 fs/open.c:1086 do_open fs/namei.c:3830 [inline] path_openat+0x1e88/0x2d80 fs/namei.c:3989 do_filp_open+0x20c/0x470 fs/namei.c:4016 do_sys_openat2+0x17a/0x1e0 fs/open.c:1428 do_sys_open fs/open.c:1443 [inline] __do_sys_openat fs/open.c:1459 [inline] __se_sys_openat fs/open.c:1454 [inline] __x64_sys_openat+0x175/0x210 fs/open.c:1454 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 ((pm_chain_head).rwsem){++++}-{4:4}: down_read+0x9a/0x330 kernel/locking/rwsem.c:1524 blocking_notifier_call_chain_robust kernel/notifier.c:344 [inline] blocking_notifier_call_chain_robust+0xa9/0x170 kernel/notifier.c:333 pm_notifier_call_chain_robust+0x27/0x60 kernel/power/main.c:102 snapshot_open+0x189/0x2b0 kernel/power/user.c:77 misc_open+0x35a/0x420 drivers/char/misc.c:179 chrdev_open+0x237/0x6a0 fs/char_dev.c:414 do_dentry_open+0x735/0x1c40 fs/open.c:956 vfs_open+0x82/0x3f0 fs/open.c:1086 do_open fs/namei.c:3830 [inline] path_openat+0x1e88/0x2d80 fs/namei.c:3989 do_filp_open+0x20c/0x470 fs/namei.c:4016 do_sys_openat2+0x17a/0x1e0 fs/open.c:1428 do_sys_open fs/open.c:1443 [inline] __do_sys_openat fs/open.c:1459 [inline] __se_sys_openat fs/open.c:1454 [inline] __x64_sys_openat+0x175/0x210 fs/open.c:1454 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (system_transition_mutex){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 lock_system_sleep+0x87/0xa0 kernel/power/main.c:56 hibernate_compressor_param_set+0x1c/0x210 kernel/power/hibernate.c:1452 param_attr_store+0x18f/0x300 kernel/params.c:588 module_attr_store+0x55/0x80 kernel/params.c:924 sysfs_kf_write+0x117/0x170 fs/sysfs/file.c:139 kernfs_fop_write_iter+0x33d/0x500 fs/kernfs/file.c:334 new_sync_write fs/read_write.c:586 [inline] vfs_write+0x5ae/0x1150 fs/read_write.c:679 ksys_write+0x12b/0x250 fs/read_write.c:731 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: system_transition_mutex --> rtnl_mutex --> param_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(param_lock); lock(rtnl_mutex); lock(param_lock); lock(system_transition_mutex); *** DEADLOCK *** Reported-by: syzbot+ace60642828c074eb913@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ace60642828c074eb913 Signed-off-by: Lizhi Xu Link: https://patch.msgid.link/20250224013139.3994500-1-lizhi.xu@windriver.com [ rjw: New subject matching the code changes, changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 10a01af63a80..b129ed1d25a8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1446,10 +1446,10 @@ static const char * const comp_alg_enabled[] = { static int hibernate_compressor_param_set(const char *compressor, const struct kernel_param *kp) { - unsigned int sleep_flags; int index, ret; - sleep_flags = lock_system_sleep(); + if (!mutex_trylock(&system_transition_mutex)) + return -EBUSY; index = sysfs_match_string(comp_alg_enabled, compressor); if (index >= 0) { @@ -1461,7 +1461,7 @@ static int hibernate_compressor_param_set(const char *compressor, ret = index; } - unlock_system_sleep(sleep_flags); + mutex_unlock(&system_transition_mutex); if (ret) pr_debug("Cannot set specified compressor %s\n", -- cgit v1.2.3 From e8195f0630f1c4c2465074fe81b5fda19efd3148 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 24 Feb 2025 15:00:49 +0800 Subject: PM: sleep: Suppress sleeping parent warning in special case Currently, if power.no_callbacks is set, device_prepare() will also set power.direct_complete for the device. If power.direct_complete is set in device_resume(), the clearing of power.is_prepared will be skipped and if new children appear under the device at that point, a warning will be printed. After commit (f76b168b6f11 PM: Rename dev_pm_info.in_suspend to is_prepared), power.is_prepared is generally cleared in device_resume() before invoking the resume callback for the device which allows that callback to add new children without triggering the warning, but this does not happen for devices with power.direct_complete set. This problem is visible in USB where usb_set_interface() can be called before device_complete() clears power.is_prepared for interface devices and since ep devices are added then, the warning is printed: usb 1-1: reset high-speed USB device number 3 using ci_hdrc ep_81: PM: parent 1-1:1.1 should not be sleeping PM: resume devices took 0.936 seconds Since it is legitimate to add the ep devices at that point, the warning above is not particularly useful, so get rid of it by clearing power.is_prepared in device_resume() for devices with power.direct_complete set if they have no PM callbacks, in which case they need not actually resume for the new children to work. Suggested-by: Rafael J. Wysocki Signed-off-by: Xu Yang Link: https://patch.msgid.link/20250224070049.3338646-1-xu.yang_2@nxp.com [ rjw: New subject, changelog edits, rephrased new code comment ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a06ef91fbdb9..e4103d29a21a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -928,6 +928,13 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; if (dev->power.direct_complete) { + /* + * Allow new children to be added under the device after this + * point if it has no PM callbacks. + */ + if (dev->power.no_pm_callbacks) + dev->power.is_prepared = false; + /* Match the pm_runtime_disable() in device_suspend(). */ pm_runtime_enable(dev); goto Complete; -- cgit v1.2.3 From 630d55e038728f6f5917da051984a5ec515d152e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 21 Feb 2025 05:02:19 -0800 Subject: PM: wakeup: Remove needless return in three void APIs Remove needless 'return' in the following void APIs: __pm_wakeup_event() pm_wakeup_event() pm_wakeup_hard_event() Since both the API and callee involved are void functions. Signed-off-by: Zijun Hu Link: https://patch.msgid.link/20250221-rmv_return-v1-14-cc8dff275827@quicinc.com Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d501c09c60cd..51e0e8dd5f9e 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -205,17 +205,17 @@ static inline void device_set_awake_path(struct device *dev) static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { - return pm_wakeup_ws_event(ws, msec, false); + pm_wakeup_ws_event(ws, msec, false); } static inline void pm_wakeup_event(struct device *dev, unsigned int msec) { - return pm_wakeup_dev_event(dev, msec, false); + pm_wakeup_dev_event(dev, msec, false); } static inline void pm_wakeup_hard_event(struct device *dev) { - return pm_wakeup_dev_event(dev, 0, true); + pm_wakeup_dev_event(dev, 0, true); } /** -- cgit v1.2.3 From eeb87d17aceab7803a5a5bcb6cf2817b745157cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2025 11:53:50 +0100 Subject: PM: sleep: Adjust check before setting power.must_resume The check before setting power.must_resume in device_suspend_noirq() does not take power.child_count into account, but it should do that, so use pm_runtime_need_not_resume() in it for this purpose and adjust the comment next to it accordingly. Fixes: 107d47b2b95e ("PM: sleep: core: Simplify the SMART_SUSPEND flag handling") Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/3353728.44csPzL39Z@rjwysocki.net --- drivers/base/power/main.c | 13 ++++++------- drivers/base/power/runtime.c | 2 +- include/linux/pm_runtime.h | 2 ++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e4103d29a21a..b03cdbc75b6d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1277,14 +1277,13 @@ Skip: dev->power.is_noirq_suspended = true; /* - * Skipping the resume of devices that were in use right before the - * system suspend (as indicated by their PM-runtime usage counters) - * would be suboptimal. Also resume them if doing that is not allowed - * to be skipped. + * Devices must be resumed unless they are explicitly allowed to be left + * in suspend, but even in that case skipping the resume of devices that + * were in use right before the system suspend (as indicated by their + * runtime PM usage counters and child counters) would be suboptimal. */ - if (atomic_read(&dev->power.usage_count) > 1 || - !(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) && - dev->power.may_skip_resume)) + if (!(dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME) && + dev->power.may_skip_resume) || !pm_runtime_need_not_resume(dev)) dev->power.must_resume = true; if (dev->power.must_resume) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index e772e45d30f3..3d08eb677177 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1906,7 +1906,7 @@ void pm_runtime_drop_link(struct device_link *link) pm_request_idle(link->supplier); } -static bool pm_runtime_need_not_resume(struct device *dev) +bool pm_runtime_need_not_resume(struct device *dev) { return atomic_read(&dev->power.usage_count) <= 1 && (atomic_read(&dev->power.child_count) == 0 || diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 01ead602aedd..4c5b74b745d5 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -66,6 +66,7 @@ static inline bool queue_pm_work(struct work_struct *work) extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern bool pm_runtime_need_not_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); @@ -244,6 +245,7 @@ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } -- cgit v1.2.3 From cb88c229fe77ea16cef2b9c8154cf44d331818a6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2025 11:45:52 +0100 Subject: PM: sleep: Update power.smart_suspend under PM spinlock Put the update of the power.smart_suspend device flag under the PM spinlock of the device in case multiple bit fields in struct dev_pm_info occupy one memory location which needs to be updated via RMW every time any of these bit fields is updated. The lock in question is already held around the power.direct_complete flag update in device_prepare() for the same reason, so this change does not add locking-related overhead to the code. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/2368159.ElGaqSPkdT@rjwysocki.net --- drivers/base/power/main.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b03cdbc75b6d..2f86d7cfdbbc 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1795,9 +1795,10 @@ int dpm_suspend(pm_message_t state) return error; } -static void device_prepare_smart_suspend(struct device *dev) +static bool device_prepare_smart_suspend(struct device *dev) { struct device_link *link; + bool ret = true; int idx; /* @@ -1808,17 +1809,13 @@ static void device_prepare_smart_suspend(struct device *dev) * or any of its suppliers that take runtime PM into account, it cannot * be enabled for the device either. */ - dev->power.smart_suspend = dev->power.no_pm_callbacks || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - - if (!dev_pm_smart_suspend(dev)) - return; + if (!dev->power.no_pm_callbacks && + !dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) + return false; if (dev->parent && !dev_pm_smart_suspend(dev->parent) && - !dev->parent->power.ignore_children && !pm_runtime_blocked(dev->parent)) { - dev->power.smart_suspend = false; - return; - } + !dev->parent->power.ignore_children && !pm_runtime_blocked(dev->parent)) + return false; idx = device_links_read_lock(); @@ -1828,12 +1825,14 @@ static void device_prepare_smart_suspend(struct device *dev) if (!dev_pm_smart_suspend(link->supplier) && !pm_runtime_blocked(link->supplier)) { - dev->power.smart_suspend = false; + ret = false; break; } } device_links_read_unlock(idx); + + return ret; } /** @@ -1847,7 +1846,7 @@ static void device_prepare_smart_suspend(struct device *dev) static int device_prepare(struct device *dev, pm_message_t state) { int (*callback)(struct device *) = NULL; - bool no_runtime_pm; + bool smart_suspend; int ret = 0; /* @@ -1863,7 +1862,7 @@ static int device_prepare(struct device *dev, pm_message_t state) * suspend-resume cycle is complete, so prepare to trigger a warning on * subsequent attempts to enable it. */ - no_runtime_pm = pm_runtime_block_if_disabled(dev); + smart_suspend = !pm_runtime_block_if_disabled(dev); if (dev->power.syscore) return 0; @@ -1899,9 +1898,12 @@ unlock: return ret; } /* Do not enable "smart suspend" for devices without runtime PM. */ - if (!no_runtime_pm) - device_prepare_smart_suspend(dev); + if (smart_suspend) + smart_suspend = device_prepare_smart_suspend(dev); + + spin_lock_irq(&dev->power.lock); + dev->power.smart_suspend = smart_suspend; /* * A positive return value from ->prepare() means "this device appears * to be runtime-suspended and its state is fine, so if it really is @@ -1909,11 +1911,12 @@ unlock: * will do the same thing with all of its descendants". This only * applies to suspend transitions, however. */ - spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && (ret > 0 || dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NO_DIRECT_COMPLETE); + spin_unlock_irq(&dev->power.lock); + return 0; } -- cgit v1.2.3 From 1476bb20eec33bd68b67c7bb7a5d62063af8148d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2025 11:47:33 +0100 Subject: PM: runtime: Convert pm_runtime_blocked() to static inline The comment in pm_runtime_blocked() is acutally wrong: power.last_status is not a bit field. Its data type is an enum and so one can reasonably assume that partial updates of it will not be observed. Accordingly, pm_runtime_blocked() can be converted to a static inline function and the related locking overhead can be eliminated, so long as it is only used in system suspend/resume code paths because power.last_status is not expected to be updated concurrently while that code is running. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/1923449.tdWV9SEqCh@rjwysocki.net --- drivers/base/power/runtime.c | 17 ----------------- include/linux/pm_runtime.h | 12 +++++++++++- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 3d08eb677177..42a58ed45a08 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1560,23 +1560,6 @@ out: } EXPORT_SYMBOL_GPL(pm_runtime_enable); -bool pm_runtime_blocked(struct device *dev) -{ - bool ret; - - /* - * dev->power.last_status is a bit field, so in case it is updated via - * RMW, read it under the spin lock. - */ - spin_lock_irq(&dev->power.lock); - - ret = dev->power.last_status == RPM_BLOCKED; - - spin_unlock_irq(&dev->power.lock); - - return ret; -} - static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 4c5b74b745d5..7fb5a459847e 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -82,7 +82,6 @@ extern bool pm_runtime_block_if_disabled(struct device *dev); extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); -extern bool pm_runtime_blocked(struct device *dev); extern void pm_runtime_allow(struct device *dev); extern void pm_runtime_forbid(struct device *dev); extern void pm_runtime_no_callbacks(struct device *dev); @@ -200,6 +199,17 @@ static inline bool pm_runtime_enabled(struct device *dev) return !dev->power.disable_depth; } +/** + * pm_runtime_blocked - Check if runtime PM enabling is blocked. + * @dev: Target device. + * + * Do not call this function outside system suspend/resume code paths. + */ +static inline bool pm_runtime_blocked(struct device *dev) +{ + return dev->power.last_status == RPM_BLOCKED; +} + /** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. -- cgit v1.2.3 From a84c2a885bc62a61e08fbcd9976a2a40400470c0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2025 11:49:12 +0100 Subject: PM: core: Tweak pm_runtime_block_if_disabled() return value Modify pm_runtime_block_if_disabled() to return true when runtime PM is disabled for the device, regardless of the power.last_status value. This effectively prevents "smart suspend" from being enabled for devices with runtime PM disabled in device_prepare(), even transiently, so update the related comment in that function accordingly. If a device has runtime PM disabled in device_prepare(), it is not actually known whether or not runtime PM will be enabled for that device going forward, so it is more appropriate to postpone the "smart suspend" optimization for the device in the given system suspend-resume cycle than to enable it and get confused going forward. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/13718674.uLZWGnKmhe@rjwysocki.net --- drivers/base/power/main.c | 2 +- drivers/base/power/runtime.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2f86d7cfdbbc..9215ec9f326b 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1897,7 +1897,7 @@ unlock: pm_runtime_put(dev); return ret; } - /* Do not enable "smart suspend" for devices without runtime PM. */ + /* Do not enable "smart suspend" for devices with disabled runtime PM. */ if (smart_suspend) smart_suspend = device_prepare_smart_suspend(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 42a58ed45a08..18e40dce460a 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1466,8 +1466,8 @@ bool pm_runtime_block_if_disabled(struct device *dev) spin_lock_irq(&dev->power.lock); - ret = dev->power.disable_depth && dev->power.last_status == RPM_INVALID; - if (ret) + ret = !pm_runtime_enabled(dev); + if (ret && dev->power.last_status == RPM_INVALID) dev->power.last_status = RPM_BLOCKED; spin_unlock_irq(&dev->power.lock); -- cgit v1.2.3 From 3038b22bc098565b93cfb3cc8933b89701feb407 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Feb 2025 17:38:58 +0100 Subject: PM: sleep: Rename power.async_in_progress to power.work_in_progress Rename the async_in_progress field in struct dev_pm_info to work_in_progress as after subsequent changes it will mean work in general rather than just async work. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/3338693.aeNJFYEL58@rjwysocki.net --- drivers/base/power/main.c | 12 ++++++------ include/linux/pm.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 9215ec9f326b..edf5a4af8b03 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -602,7 +602,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) reinit_completion(&dev->power.completion); if (is_async(dev)) { - dev->power.async_in_progress = true; + dev->power.work_in_progress = true; get_device(dev); @@ -614,9 +614,9 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) /* * Because async_schedule_dev_nocall() above has returned false or it * has not been called at all, func() is not running and it is safe to - * update the async_in_progress flag without extra synchronization. + * update the work_in_progress flag without extra synchronization. */ - dev->power.async_in_progress = false; + dev->power.work_in_progress = false; return false; } @@ -736,7 +736,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) dev = to_device(dpm_noirq_list.next); list_move_tail(&dev->power.entry, &dpm_late_early_list); - if (!dev->power.async_in_progress) { + if (!dev->power.work_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -876,7 +876,7 @@ void dpm_resume_early(pm_message_t state) dev = to_device(dpm_late_early_list.next); list_move_tail(&dev->power.entry, &dpm_suspended_list); - if (!dev->power.async_in_progress) { + if (!dev->power.work_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -1049,7 +1049,7 @@ void dpm_resume(pm_message_t state) dev = to_device(dpm_suspended_list.next); list_move_tail(&dev->power.entry, &dpm_prepared_list); - if (!dev->power.async_in_progress) { + if (!dev->power.work_in_progress) { get_device(dev); mutex_unlock(&dpm_list_mtx); diff --git a/include/linux/pm.h b/include/linux/pm.h index 24647108f0ad..63a8dffda787 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -679,7 +679,7 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - bool async_in_progress:1; /* Owned by the PM core */ + bool work_in_progress:1; /* Owned by the PM core */ bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ -- cgit v1.2.3 From 628ccd80529223c19d22e06086be6dd87d064e0c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Feb 2025 17:39:40 +0100 Subject: PM: sleep: Rearrange dpm_async_fn() and async state clearing In preparation for subsequent changes, move the power.completion reinitialization along with clearing power.work_in_progress into a separate function called dpm_clear_async_state() and rearrange dpm_async_fn() to get rid of unnecessary indentation. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Link: https://patch.msgid.link/8494650.T7Z3S40VBb@rjwysocki.net --- drivers/base/power/main.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index edf5a4af8b03..d240dc352b1f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -599,27 +599,34 @@ static bool is_async(struct device *dev) static bool dpm_async_fn(struct device *dev, async_func_t func) { - reinit_completion(&dev->power.completion); + if (!is_async(dev)) + return false; - if (is_async(dev)) { - dev->power.work_in_progress = true; + dev->power.work_in_progress = true; - get_device(dev); + get_device(dev); - if (async_schedule_dev_nocall(func, dev)) - return true; + if (async_schedule_dev_nocall(func, dev)) + return true; + + put_device(dev); - put_device(dev); - } /* - * Because async_schedule_dev_nocall() above has returned false or it - * has not been called at all, func() is not running and it is safe to - * update the work_in_progress flag without extra synchronization. + * async_schedule_dev_nocall() above has returned false, so func() is + * not running and it is safe to update power.work_in_progress without + * extra synchronization. */ dev->power.work_in_progress = false; + return false; } +static void dpm_clear_async_state(struct device *dev) +{ + reinit_completion(&dev->power.completion); + dev->power.work_in_progress = false; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -729,8 +736,10 @@ static void dpm_noirq_resume_devices(pm_message_t state) * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) + list_for_each_entry(dev, &dpm_noirq_list, power.entry) { + dpm_clear_async_state(dev); dpm_async_fn(dev, async_resume_noirq); + } while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); @@ -869,8 +878,10 @@ void dpm_resume_early(pm_message_t state) * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) + list_for_each_entry(dev, &dpm_late_early_list, power.entry) { + dpm_clear_async_state(dev); dpm_async_fn(dev, async_resume_early); + } while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); @@ -1042,8 +1053,10 @@ void dpm_resume(pm_message_t state) * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. */ - list_for_each_entry(dev, &dpm_suspended_list, power.entry) + list_for_each_entry(dev, &dpm_suspended_list, power.entry) { + dpm_clear_async_state(dev); dpm_async_fn(dev, async_resume); + } while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); @@ -1320,6 +1333,7 @@ static int dpm_noirq_suspend_devices(pm_message_t state) list_move(&dev->power.entry, &dpm_noirq_list); + dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend_noirq)) continue; @@ -1497,6 +1511,7 @@ int dpm_suspend_late(pm_message_t state) list_move(&dev->power.entry, &dpm_late_early_list); + dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend_late)) continue; @@ -1764,6 +1779,7 @@ int dpm_suspend(pm_message_t state) list_move(&dev->power.entry, &dpm_suspended_list); + dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend)) continue; -- cgit v1.2.3 From 13b4f9e126cb55b65430bae7e704cea23c78620c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 7 Mar 2025 02:17:50 +0000 Subject: PM: sleep: Remove unused pm_generic_ wrappers pm_generic_thaw_early() has been unused since 2016's commit 294f47ffd55c ("PM / Domains: Remove redundant system PM callbacks") pm_generic_freeze_late() has been unused since 2019's commit 3cd7957e85e6 ("ACPI: PM: Simplify and fix PM domain hibernation callbacks") Remove them. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20250307021750.457600-1-linux@treblig.org Signed-off-by: Rafael J. Wysocki --- drivers/base/power/generic_ops.c | 24 ------------------------ include/linux/pm.h | 4 ---- 2 files changed, 28 deletions(-) diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 4fa525668cb7..6502720bb564 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -114,18 +114,6 @@ int pm_generic_freeze_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(pm_generic_freeze_noirq); -/** - * pm_generic_freeze_late - Generic freeze_late callback for subsystems. - * @dev: Device to freeze. - */ -int pm_generic_freeze_late(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->freeze_late ? pm->freeze_late(dev) : 0; -} -EXPORT_SYMBOL_GPL(pm_generic_freeze_late); - /** * pm_generic_freeze - Generic freeze callback for subsystems. * @dev: Device to freeze. @@ -186,18 +174,6 @@ int pm_generic_thaw_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(pm_generic_thaw_noirq); -/** - * pm_generic_thaw_early - Generic thaw_early callback for subsystems. - * @dev: Device to thaw. - */ -int pm_generic_thaw_early(struct device *dev) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - - return pm && pm->thaw_early ? pm->thaw_early(dev) : 0; -} -EXPORT_SYMBOL_GPL(pm_generic_thaw_early); - /** * pm_generic_thaw - Generic thaw callback for subsystems. * @dev: Device to thaw. diff --git a/include/linux/pm.h b/include/linux/pm.h index 63a8dffda787..f0bd8fbae4f2 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -839,10 +839,8 @@ extern int pm_generic_resume_early(struct device *dev); extern int pm_generic_resume_noirq(struct device *dev); extern int pm_generic_resume(struct device *dev); extern int pm_generic_freeze_noirq(struct device *dev); -extern int pm_generic_freeze_late(struct device *dev); extern int pm_generic_freeze(struct device *dev); extern int pm_generic_thaw_noirq(struct device *dev); -extern int pm_generic_thaw_early(struct device *dev); extern int pm_generic_thaw(struct device *dev); extern int pm_generic_restore_noirq(struct device *dev); extern int pm_generic_restore_early(struct device *dev); @@ -884,10 +882,8 @@ static inline void dpm_for_each_dev(void *data, void (*fn)(struct device *, void #define pm_generic_resume_noirq NULL #define pm_generic_resume NULL #define pm_generic_freeze_noirq NULL -#define pm_generic_freeze_late NULL #define pm_generic_freeze NULL #define pm_generic_thaw_noirq NULL -#define pm_generic_thaw_early NULL #define pm_generic_thaw NULL #define pm_generic_restore_noirq NULL #define pm_generic_restore_early NULL -- cgit v1.2.3 From 0f42194c6b22d687fd53c8aea5413cf976366672 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 11 Mar 2025 17:08:22 +0100 Subject: PM: s2idle: Drop redundant locks when entering s2idle The calls to cpus_read_lock|unlock() protects us from getting CPUS hotplugged, while entering suspend-to-idle. However, when s2idle_enter() is called we should be far beyond the point when CPUs may be hotplugged. Let's therefore simplify the code and drop the use of the lock. Signed-off-by: Ulf Hansson Link: https://patch.msgid.link/20250311160827.1129643-2-ulf.hansson@linaro.org [ rjw: Rewrote the new comment ] Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 09f8397bae15..1876abf1be15 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -91,6 +91,12 @@ static void s2idle_enter(void) { trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true); + /* + * The correctness of the code below depends on the number of online + * CPUs being stable, but CPUs cannot be taken offline or put online + * while it is running. + */ + raw_spin_lock_irq(&s2idle_lock); if (pm_wakeup_pending()) goto out; @@ -98,8 +104,6 @@ static void s2idle_enter(void) s2idle_state = S2IDLE_STATE_ENTER; raw_spin_unlock_irq(&s2idle_lock); - cpus_read_lock(); - /* Push all the CPUs into the idle loop. */ wake_up_all_idle_cpus(); /* Make the current CPU wait so it can enter the idle loop too. */ @@ -112,8 +116,6 @@ static void s2idle_enter(void) */ wake_up_all_idle_cpus(); - cpus_read_unlock(); - raw_spin_lock_irq(&s2idle_lock); out: -- cgit v1.2.3 From 4b7d654258e0dd69a7d00be387b388f9d7544912 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 11 Mar 2025 17:08:23 +0100 Subject: PM: s2idle: Extend comment in s2idle_enter() The s2idle_lock must be held while checking for a pending wakeup and while moving into S2IDLE_STATE_ENTER, to make sure a wakeup doesn't get lost. Let's extend the comment in the code to make this clear. Signed-off-by: Ulf Hansson Link: https://patch.msgid.link/20250311160827.1129643-3-ulf.hansson@linaro.org [ rjw: Rewrote the new comment ] Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1876abf1be15..6fae1e0a331c 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -95,8 +95,12 @@ static void s2idle_enter(void) * The correctness of the code below depends on the number of online * CPUs being stable, but CPUs cannot be taken offline or put online * while it is running. + * + * The s2idle_lock must be acquired before the pending wakeup check to + * prevent pm_system_wakeup() from running as a whole between that check + * and the subsequent s2idle_state update in which case a wakeup event + * would get lost. */ - raw_spin_lock_irq(&s2idle_lock); if (pm_wakeup_pending()) goto out; -- cgit v1.2.3 From 956af869a2b7a1ab1f67bf8a74c51897f6f6715d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Mar 2025 11:47:19 +0100 Subject: PM: sleep: core: Fix indentation in dpm_wait_for_children() The body of dpm_wait_for_children() is indented by 7 spaces instead of a single TAB. Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/9c8ff2b103c3ba7b0d27bdc8248b05e3b1dc9551.1741776430.git.geert+renesas@glider.be Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d240dc352b1f..1b0eb83f7061 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -249,7 +249,7 @@ static int dpm_wait_fn(struct device *dev, void *async_ptr) static void dpm_wait_for_children(struct device *dev, bool async) { - device_for_each_child(dev, &async, dpm_wait_fn); + device_for_each_child(dev, &async, dpm_wait_fn); } static void dpm_wait_for_suppliers(struct device *dev, bool async) -- cgit v1.2.3 From 03f1444016b71feffa1dfb8a51f15ba592f94b13 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2025 17:00:00 +0100 Subject: PM: sleep: Fix handling devices with direct_complete set on errors When dpm_suspend() fails, some devices with power.direct_complete set may not have been handled by device_suspend() yet, so runtime PM has not been disabled for them yet even though power.direct_complete is set. Since device_resume() expects that runtime PM has been disabled for all devices with power.direct_complete set, it will attempt to reenable runtime PM for the devices that have not been processed by device_suspend() which does not make sense. Had those devices had runtime PM disabled before device_suspend() had run, device_resume() would have inadvertently enable runtime PM for them, but this is not expected to happen because it would require ->prepare() callbacks to return positive values for devices with runtime PM disabled, which would be invalid. In practice, this issue is most likely benign because pm_runtime_enable() will not allow the "disable depth" counter to underflow, but it causes a warning message to be printed for each affected device. To allow device_resume() to distinguish the "direct complete" devices that have been processed by device_suspend() from those which have not been handled by it, make device_suspend() set power.is_suspended for "direct complete" devices. Next, move the power.is_suspended check in device_resume() before the power.direct_complete check in it to make it skip the "direct complete" devices that have not been handled by device_suspend(). This change is based on a preliminary patch from Saravana Kannan. Fixes: aae4518b3124 ("PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily") Link: https://lore.kernel.org/linux-pm/20241114220921.2529905-2-saravanak@google.com/ Reported-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Reviewed-by: Saravana Kannan Link: https://patch.msgid.link/12627587.O9o76ZdvQC@rjwysocki.net --- drivers/base/power/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1b0eb83f7061..ad50018b8047 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -938,6 +938,9 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) if (dev->power.syscore) goto Complete; + if (!dev->power.is_suspended) + goto Complete; + if (dev->power.direct_complete) { /* * Allow new children to be added under the device after this @@ -963,9 +966,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) */ dev->power.is_prepared = false; - if (!dev->power.is_suspended) - goto Unlock; - if (dev->pm_domain) { info = "power domain "; callback = pm_op(&dev->pm_domain->ops, state); @@ -1005,7 +1005,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) error = dpm_run_callback(callback, dev, state, info); dev->power.is_suspended = false; - Unlock: device_unlock(dev); dpm_watchdog_clear(&wd); @@ -1669,6 +1668,7 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev)) { pm_dev_dbg(dev, state, "direct-complete "); + dev->power.is_suspended = true; goto Complete; } -- cgit v1.2.3 From 3860cbe239639503e56bd4365c6bf4cb957ef04e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Mar 2025 11:43:24 +0000 Subject: PM: sleep: Fix bit masking operation The mask operation link->flags | DL_FLAG_PM_RUNTIME is always true which is incorrect. The mask operation should be using the bit-wise & operator. Fix this. Fixes: bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250319114324.791829-1-colin.i.king@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ad50018b8047..ac2a197c1234 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1836,7 +1836,7 @@ static bool device_prepare_smart_suspend(struct device *dev) idx = device_links_read_lock(); list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { - if (!(link->flags | DL_FLAG_PM_RUNTIME)) + if (!(link->flags & DL_FLAG_PM_RUNTIME)) continue; if (!dev_pm_smart_suspend(link->supplier) && -- cgit v1.2.3