summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/pm/cpuidle.rst72
-rw-r--r--Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml55
-rw-r--r--Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml10
-rw-r--r--drivers/base/power/main.c26
-rw-r--r--drivers/base/power/sysfs.c1
-rw-r--r--drivers/base/power/wakeirq.c26
-rw-r--r--drivers/cpufreq/Kconfig6
-rw-r--r--drivers/cpufreq/Kconfig.arm8
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c36
-rw-r--r--drivers/cpufreq/airoha-cpufreq.c152
-rw-r--r--drivers/cpufreq/amd-pstate-trace.h52
-rw-r--r--drivers/cpufreq/amd-pstate-ut.c12
-rw-r--r--drivers/cpufreq/amd-pstate.c483
-rw-r--r--drivers/cpufreq/amd-pstate.h3
-rw-r--r--drivers/cpufreq/apple-soc-cpufreq.c56
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c4
-rw-r--r--drivers/cpufreq/cpufreq.c9
-rw-r--r--drivers/cpufreq/intel_pstate.c60
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c3
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c34
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c45
-rw-r--r--drivers/cpufreq/sparc-us2e-cpufreq.c2
-rw-r--r--drivers/cpufreq/sparc-us3-cpufreq.c2
-rw-r--r--drivers/cpuidle/governors/teo.c91
-rw-r--r--drivers/idle/intel_idle.c1
-rw-r--r--drivers/opp/core.c99
-rw-r--r--drivers/opp/of.c4
-rw-r--r--drivers/opp/opp.h1
-rw-r--r--include/linux/energy_model.h2
-rw-r--r--include/linux/pm_opp.h13
-rw-r--r--include/linux/pm_wakeirq.h6
-rw-r--r--include/linux/pm_wakeup.h17
-rw-r--r--kernel/power/Kconfig21
-rw-r--r--kernel/power/autosleep.c1
-rw-r--r--kernel/power/energy_model.c17
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/sched/cpufreq_schedutil.c37
38 files changed, 982 insertions, 488 deletions
diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 19754beb5a4e..eb58d7a5affd 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -269,27 +269,7 @@ Namely, when invoked to select an idle state for a CPU (i.e. an idle state that
the CPU will ask the processor hardware to enter), it attempts to predict the
idle duration and uses the predicted value for idle state selection.
-It first obtains the time until the closest timer event with the assumption
-that the scheduler tick will be stopped. That time, referred to as the *sleep
-length* in what follows, is the upper bound on the time before the next CPU
-wakeup. It is used to determine the sleep length range, which in turn is needed
-to get the sleep length correction factor.
-
-The ``menu`` governor maintains two arrays of sleep length correction factors.
-One of them is used when tasks previously running on the given CPU are waiting
-for some I/O operations to complete and the other one is used when that is not
-the case. Each array contains several correction factor values that correspond
-to different sleep length ranges organized so that each range represented in the
-array is approximately 10 times wider than the previous one.
-
-The correction factor for the given sleep length range (determined before
-selecting the idle state for the CPU) is updated after the CPU has been woken
-up and the closer the sleep length is to the observed idle duration, the closer
-to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
-The sleep length is multiplied by the correction factor for the range that it
-falls into to obtain the first approximation of the predicted idle duration.
-
-Next, the governor uses a simple pattern recognition algorithm to refine its
+It first uses a simple pattern recognition algorithm to obtain a preliminary
idle duration prediction. Namely, it saves the last 8 observed idle duration
values and, when predicting the idle duration next time, it computes the average
and variance of them. If the variance is small (smaller than 400 square
@@ -301,29 +281,39 @@ Again, if the variance of them is small (in the above sense), the average is
taken as the "typical interval" value and so on, until either the "typical
interval" is determined or too many data points are disregarded, in which case
the "typical interval" is assumed to equal "infinity" (the maximum unsigned
-integer value). The "typical interval" computed this way is compared with the
-sleep length multiplied by the correction factor and the minimum of the two is
-taken as the predicted idle duration.
-
-Then, the governor computes an extra latency limit to help "interactive"
-workloads. It uses the observation that if the exit latency of the selected
-idle state is comparable with the predicted idle duration, the total time spent
-in that state probably will be very short and the amount of energy to save by
-entering it will be relatively small, so likely it is better to avoid the
-overhead related to entering that state and exiting it. Thus selecting a
-shallower state is likely to be a better option then. The first approximation
-of the extra latency limit is the predicted idle duration itself which
-additionally is divided by a value depending on the number of tasks that
-previously ran on the given CPU and now they are waiting for I/O operations to
-complete. The result of that division is compared with the latency limit coming
-from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
-framework and the minimum of the two is taken as the limit for the idle states'
-exit latency.
+integer value).
+
+If the "typical interval" computed this way is long enough, the governor obtains
+the time until the closest timer event with the assumption that the scheduler
+tick will be stopped. That time, referred to as the *sleep length* in what follows,
+is the upper bound on the time before the next CPU wakeup. It is used to determine
+the sleep length range, which in turn is needed to get the sleep length correction
+factor.
+
+The ``menu`` governor maintains an array containing several correction factor
+values that correspond to different sleep length ranges organized so that each
+range represented in the array is approximately 10 times wider than the previous
+one.
+
+The correction factor for the given sleep length range (determined before
+selecting the idle state for the CPU) is updated after the CPU has been woken
+up and the closer the sleep length is to the observed idle duration, the closer
+to 1 the correction factor becomes (it must fall between 0 and 1 inclusive).
+The sleep length is multiplied by the correction factor for the range that it
+falls into to obtain an approximation of the predicted idle duration that is
+compared to the "typical interval" determined previously and the minimum of
+the two is taken as the idle duration prediction.
+
+If the "typical interval" value is small, which means that the CPU is likely
+to be woken up soon enough, the sleep length computation is skipped as it may
+be costly and the idle duration is simply predicted to equal the "typical
+interval" value.
Now, the governor is ready to walk the list of idle states and choose one of
them. For this purpose, it compares the target residency of each state with
-the predicted idle duration and the exit latency of it with the computed latency
-limit. It selects the state with the target residency closest to the predicted
+the predicted idle duration and the exit latency of it with the with the latency
+limit coming from the power management quality of service, or `PM QoS <cpu-pm-qos_>`_,
+framework. It selects the state with the target residency closest to the predicted
idle duration, but still below it, and exit latency that does not exceed the
limit.
diff --git a/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml
new file mode 100644
index 000000000000..7d4510b3219c
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/airoha,en7581-cpufreq.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cpufreq/airoha,en7581-cpufreq.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Airoha EN7581 CPUFreq
+
+maintainers:
+ - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+ On newer Airoha SoC, CPU Frequency is scaled indirectly with SMC commands
+ to ATF.
+
+ A virtual clock is exposed. This virtual clock is a get-only clock and
+ is used to expose the current global CPU clock. The frequency info comes
+ by the output of the SMC command that reports the clock in MHz.
+
+ The SMC sets the CPU clock by providing an index, this is modelled as
+ performance states in a power domain.
+
+ CPUs can't be individually scaled as the CPU frequency is shared across
+ all CPUs and is global.
+
+properties:
+ compatible:
+ const: airoha,en7581-cpufreq
+
+ '#clock-cells':
+ const: 0
+
+ '#power-domain-cells':
+ const: 0
+
+ operating-points-v2: true
+
+required:
+ - compatible
+ - '#clock-cells'
+ - '#power-domain-cells'
+ - operating-points-v2
+
+additionalProperties: false
+
+examples:
+ - |
+ performance-domain {
+ compatible = "airoha,en7581-cpufreq";
+
+ operating-points-v2 = <&cpu_smcc_opp_table>;
+
+ #power-domain-cells = <0>;
+ #clock-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
index 76cb9726660e..896276b8c6bb 100644
--- a/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/apple,cluster-cpufreq.yaml
@@ -24,9 +24,17 @@ properties:
- apple,t8112-cluster-cpufreq
- const: apple,cluster-cpufreq
- items:
- - const: apple,t6000-cluster-cpufreq
+ - enum:
+ - apple,s8000-cluster-cpufreq
+ - apple,t8010-cluster-cpufreq
+ - apple,t8015-cluster-cpufreq
+ - apple,t6000-cluster-cpufreq
- const: apple,t8103-cluster-cpufreq
- const: apple,cluster-cpufreq
+ - items:
+ - const: apple,t7000-cluster-cpufreq
+ - const: apple,s5l8960x-cluster-cpufreq
+ - const: apple,s5l8960x-cluster-cpufreq
reg:
maxItems: 1
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 4a67e83300e1..cbc9a7a75def 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -496,6 +496,7 @@ struct dpm_watchdog {
struct device *dev;
struct task_struct *tsk;
struct timer_list timer;
+ bool fatal;
};
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
@@ -512,11 +513,23 @@ struct dpm_watchdog {
static void dpm_watchdog_handler(struct timer_list *t)
{
struct dpm_watchdog *wd = from_timer(wd, t, timer);
+ struct timer_list *timer = &wd->timer;
+ unsigned int time_left;
+
+ if (wd->fatal) {
+ dev_emerg(wd->dev, "**** DPM device timeout ****\n");
+ show_stack(wd->tsk, NULL, KERN_EMERG);
+ panic("%s %s: unrecoverable failure\n",
+ dev_driver_string(wd->dev), dev_name(wd->dev));
+ }
+
+ time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
+ dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n",
+ CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left);
+ show_stack(wd->tsk, NULL, KERN_WARNING);
- dev_emerg(wd->dev, "**** DPM device timeout ****\n");
- show_stack(wd->tsk, NULL, KERN_EMERG);
- panic("%s %s: unrecoverable failure\n",
- dev_driver_string(wd->dev), dev_name(wd->dev));
+ wd->fatal = true;
+ mod_timer(timer, jiffies + HZ * time_left);
}
/**
@@ -530,10 +543,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
wd->dev = dev;
wd->tsk = current;
+ wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
/* use same timeout value for both suspend and resume */
- timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
+ timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
add_timer(timer);
}
@@ -914,7 +928,7 @@ static void device_resume(struct device *dev, pm_message_t state, bool async)
goto Complete;
if (dev->power.direct_complete) {
- /* Match the pm_runtime_disable() in __device_suspend(). */
+ /* Match the pm_runtime_disable() in device_suspend(). */
pm_runtime_enable(dev);
goto Complete;
}
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index f8163b559bf9..f84018125b46 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -6,7 +6,6 @@
#include <linux/export.h>
#include <linux/pm_qos.h>
#include <linux/pm_runtime.h>
-#include <linux/pm_wakeup.h>
#include <linux/atomic.h>
#include <linux/jiffies.h>
#include "power.h"
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 5a5a9e978e85..8aa28c08b289 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -103,6 +103,32 @@ void dev_pm_clear_wake_irq(struct device *dev)
}
EXPORT_SYMBOL_GPL(dev_pm_clear_wake_irq);
+static void devm_pm_clear_wake_irq(void *dev)
+{
+ dev_pm_clear_wake_irq(dev);
+}
+
+/**
+ * devm_pm_set_wake_irq - device-managed variant of dev_pm_set_wake_irq
+ * @dev: Device entry
+ * @irq: Device IO interrupt
+ *
+ *
+ * Attach a device IO interrupt as a wake IRQ, same with dev_pm_set_wake_irq,
+ * but the device will be auto clear wake capability on driver detach.
+ */
+int devm_pm_set_wake_irq(struct device *dev, int irq)
+{
+ int ret;
+
+ ret = dev_pm_set_wake_irq(dev, irq);
+ if (ret)
+ return ret;
+
+ return devm_add_action_or_reset(dev, devm_pm_clear_wake_irq, dev);
+}
+EXPORT_SYMBOL_GPL(devm_pm_set_wake_irq);
+
/**
* handle_threaded_wake_irq - Handler for dedicated wake-up interrupts
* @irq: Device specific dedicated wake-up interrupt
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 92a83a9bb2e1..d64b07ec48e5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -232,7 +232,7 @@ config CPUFREQ_VIRT
If in doubt, say N.
config CPUFREQ_DT_PLATDEV
- tristate "Generic DT based cpufreq platdev driver"
+ bool "Generic DT based cpufreq platdev driver"
depends on OF
help
This adds a generic DT based cpufreq platdev driver for frequency
@@ -325,8 +325,6 @@ config QORIQ_CPUFREQ
This adds the CPUFreq driver support for Freescale QorIQ SoCs
which are capable of changing the CPU's frequency dynamically.
-endif
-
config ACPI_CPPC_CPUFREQ
tristate "CPUFreq driver based on the ACPI CPPC spec"
depends on ACPI_PROCESSOR
@@ -355,4 +353,6 @@ config ACPI_CPPC_CPUFREQ_FIE
If in doubt, say N.
+endif
+
endmenu
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 5f7e13e60c80..704e84d00639 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -15,6 +15,14 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
To compile this driver as a module, choose M here: the
module will be called sun50i-cpufreq-nvmem.
+config ARM_AIROHA_SOC_CPUFREQ
+ tristate "Airoha EN7581 SoC CPUFreq support"
+ depends on ARCH_AIROHA || COMPILE_TEST
+ select PM_OPP
+ default ARCH_AIROHA
+ help
+ This adds the CPUFreq driver for Airoha EN7581 SoCs.
+
config ARM_APPLE_SOC_CPUFREQ
tristate "Apple Silicon SoC CPUFreq support"
depends on ARCH_APPLE || (COMPILE_TEST && 64BIT)
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index d35a28dd9463..890fff99f37d 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
##################################################################################
# ARM SoC drivers
+obj-$(CONFIG_ARM_AIROHA_SOC_CPUFREQ) += airoha-cpufreq.o
obj-$(CONFIG_ARM_APPLE_SOC_CPUFREQ) += apple-soc-cpufreq.o
obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o
obj-$(CONFIG_ARM_ARMADA_8K_CPUFREQ) += armada-8k-cpufreq.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index c9ebacf5c88e..302df42d6887 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -623,7 +623,14 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
#endif
#ifdef CONFIG_ACPI_CPPC_LIB
-static u64 get_max_boost_ratio(unsigned int cpu)
+/*
+ * get_max_boost_ratio: Computes the max_boost_ratio as the ratio
+ * between the highest_perf and the nominal_perf.
+ *
+ * Returns the max_boost_ratio for @cpu. Returns the CPPC nominal
+ * frequency via @nominal_freq if it is non-NULL pointer.
+ */
+static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
{
struct cppc_perf_caps perf_caps;
u64 highest_perf, nominal_perf;
@@ -652,6 +659,9 @@ static u64 get_max_boost_ratio(unsigned int cpu)
nominal_perf = perf_caps.nominal_perf;
+ if (nominal_freq)
+ *nominal_freq = perf_caps.nominal_freq;
+
if (!highest_perf || !nominal_perf) {
pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
return 0;
@@ -664,8 +674,12 @@ static u64 get_max_boost_ratio(unsigned int cpu)
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
}
+
#else
-static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
+static inline u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
+{
+ return 0;
+}
#endif
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -675,9 +689,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
struct acpi_cpufreq_data *data;
unsigned int cpu = policy->cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ u64 max_boost_ratio, nominal_freq = 0;
unsigned int valid_states = 0;
unsigned int result = 0;
- u64 max_boost_ratio;
unsigned int i;
#ifdef CONFIG_SMP
static int blacklisted;
@@ -827,16 +841,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
}
freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
- max_boost_ratio = get_max_boost_ratio(cpu);
+ max_boost_ratio = get_max_boost_ratio(cpu, &nominal_freq);
if (max_boost_ratio) {
- unsigned int freq = freq_table[0].frequency;
+ unsigned int freq = nominal_freq;
/*
- * Because the loop above sorts the freq_table entries in the
- * descending order, freq is the maximum frequency in the table.
- * Assume that it corresponds to the CPPC nominal frequency and
- * use it to set cpuinfo.max_freq.
+ * The loop above sorts the freq_table entries in the
+ * descending order. If ACPI CPPC has not advertised
+ * the nominal frequency (this is possible in CPPC
+ * revisions prior to 3), then use the first entry in
+ * the pstate table as a proxy for nominal frequency.
*/
+ if (!freq)
+ freq = freq_table[0].frequency;
+
policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
} else {
/*
diff --git a/drivers/cpufreq/airoha-cpufreq.c b/drivers/cpufreq/airoha-cpufreq.c
new file mode 100644
index 000000000000..4fe39eadd163
--- /dev/null
+++ b/drivers/cpufreq/airoha-cpufreq.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitfield.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "cpufreq-dt.h"
+
+struct airoha_cpufreq_priv {
+ int opp_token;
+ struct dev_pm_domain_list *pd_list;
+ struct platform_device *cpufreq_dt;
+};
+
+static struct platform_device *cpufreq_pdev;
+
+/* NOP function to disable OPP from setting clock */
+static int airoha_cpufreq_config_clks_nop(struct device *dev,
+ struct opp_table *opp_table,
+ struct dev_pm_opp *opp,
+ void *data, bool scaling_down)
+{
+ return 0;
+}
+
+static const char * const airoha_cpufreq_clk_names[] = { "cpu", NULL };
+static const char * const airoha_cpufreq_pd_names[] = { "perf" };
+
+static int airoha_cpufreq_probe(struct platform_device *pdev)
+{
+ const struct dev_pm_domain_attach_data attach_data = {
+ .pd_names = airoha_cpufreq_pd_names,
+ .num_pd_names = ARRAY_SIZE(airoha_cpufreq_pd_names),
+ .pd_flags = PD_FLAG_DEV_LINK_ON | PD_FLAG_REQUIRED_OPP,
+ };
+ struct dev_pm_opp_config config = {
+ .clk_names = airoha_cpufreq_clk_names,
+ .config_clks = airoha_cpufreq_config_clks_nop,
+ };
+ struct platform_device *cpufreq_dt;
+ struct airoha_cpufreq_priv *priv;
+ struct device *dev = &pdev->dev;
+ struct device *cpu_dev;
+ int ret;
+
+ /* CPUs refer to the same OPP table */
+ cpu_dev = get_cpu_device(0);
+ if (!cpu_dev)
+ return -ENODEV;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ /* Set OPP table conf with NOP config_clks */
+ priv->opp_token = dev_pm_opp_set_config(cpu_dev, &config);
+ if (priv->opp_token < 0)
+ return dev_err_probe(dev, priv->opp_token, "Failed to set OPP config\n");
+
+ /* Attach PM for OPP */
+ ret = dev_pm_domain_attach_list(cpu_dev, &attach_data,
+ &priv->pd_list);
+ if (ret)
+ goto clear_opp_config;
+
+ cpufreq_dt = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+ ret = PTR_ERR_OR_ZERO(cpufreq_dt);
+ if (ret) {
+ dev_err(dev, "failed to create cpufreq-dt device: %d\n", ret);
+ goto detach_pm;
+ }
+
+ priv->cpufreq_dt = cpufreq_dt;
+ platform_set_drvdata(pdev, priv);
+
+ return 0;
+
+detach_pm:
+ dev_pm_domain_detach_list(priv->pd_list);
+clear_opp_config:
+ dev_pm_opp_clear_config(priv->opp_token);
+
+ return ret;
+}
+
+static void airoha_cpufreq_remove(struct platform_device *pdev)
+{
+ struct airoha_cpufreq_priv *priv = platform_get_drvdata(pdev);
+
+ platform_device_unregister(priv->cpufreq_dt);
+
+ dev_pm_domain_detach_list(priv->pd_list);
+
+ dev_pm_opp_clear_config(priv->opp_token);
+}
+
+static struct platform_driver airoha_cpufreq_driver = {
+ .probe = airoha_cpufreq_probe,
+ .remove = airoha_cpufreq_remove,
+ .driver = {
+ .name = "airoha-cpufreq",
+ },
+};
+
+static const struct of_device_id airoha_cpufreq_match_list[] __initconst = {
+ { .compatible = "airoha,en7581" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, airoha_cpufreq_match_list);
+
+static int __init airoha_cpufreq_init(void)
+{
+ struct device_node *np = of_find_node_by_path("/");
+ const struct of_device_id *match;
+ int ret;
+
+ if (!np)
+ return -ENODEV;
+
+ match = of_match_node(airoha_cpufreq_match_list, np);
+ of_node_put(np);
+ if (!match)
+ return -ENODEV;
+
+ ret = platform_driver_register(&airoha_cpufreq_driver);
+ if (unlikely(ret < 0))
+ return ret;
+
+ cpufreq_pdev = platform_device_register_data(NULL, "airoha-cpufreq",
+ -1, match, sizeof(*match));
+ ret = PTR_ERR_OR_ZERO(cpufreq_pdev);
+ if (ret)
+ platform_driver_unregister(&airoha_cpufreq_driver);
+
+ return ret;
+}
+module_init(airoha_cpufreq_init);
+
+static void __exit airoha_cpufreq_exit(void)
+{
+ platform_device_unregister(cpufreq_pdev);
+ platform_driver_unregister(&airoha_cpufreq_driver);
+}
+module_exit(airoha_cpufreq_exit);
+
+MODULE_AUTHOR("Christian Marangi <ansuelsmth@gmail.com>");
+MODULE_DESCRIPTION("CPUfreq driver for Airoha SoCs");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/amd-pstate-trace.h b/drivers/cpufreq/amd-pstate-trace.h
index 35f38ae67fb1..8d692415d905 100644
--- a/drivers/cpufreq/amd-pstate-trace.h
+++ b/drivers/cpufreq/amd-pstate-trace.h
@@ -32,7 +32,6 @@ TRACE_EVENT(amd_pstate_perf,
u64 aperf,
u64 tsc,
unsigned int cpu_id,
- bool changed,
bool fast_switch
),
@@ -44,7 +43,6 @@ TRACE_EVENT(amd_pstate_perf,
aperf,
tsc,
cpu_id,
- changed,
fast_switch
),
@@ -57,7 +55,6 @@ TRACE_EVENT(amd_pstate_perf,
__field(unsigned long long, aperf)
__field(unsigned long long, tsc)
__field(unsigned int, cpu_id)
- __field(bool, changed)
__field(bool, fast_switch)
),
@@ -70,11 +67,10 @@ TRACE_EVENT(amd_pstate_perf,
__entry->aperf = aperf;
__entry->tsc = tsc;
__entry->cpu_id = cpu_id;
- __entry->changed = changed;
__entry->fast_switch = fast_switch;
),
- TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s",
+ TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u fast_switch=%s",
(unsigned long)__entry->min_perf,
(unsigned long)__entry->target_perf,
(unsigned long)__entry->capacity,
@@ -83,11 +79,55 @@ TRACE_EVENT(amd_pstate_perf,
(unsigned long long)__entry->aperf,
(unsigned long long)__entry->tsc,
(unsigned int)__entry->cpu_id,
- (__entry->changed) ? "true" : "false",
(__entry->fast_switch) ? "true" : "false"
)
);
+TRACE_EVENT(amd_pstate_epp_perf,
+
+ TP_PROTO(unsigned int cpu_id,
+ unsigned int highest_perf,
+ unsigned int epp,
+ unsigned int min_perf,
+ unsigned int max_perf,
+ bool boost
+ ),
+
+ TP_ARGS(cpu_id,
+ highest_perf,
+ epp,
+ min_perf,
+ max_perf,
+ boost),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, cpu_id)
+ __field(unsigned int, highest_perf)
+ __field(unsigned int, epp)
+ __field(unsigned int, min_perf)
+ __field(unsigned int, max_perf)
+ __field(bool, boost)
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_id = cpu_id;
+ __entry->highest_perf = highest_perf;
+ __entry->epp = epp;
+ __entry->min_perf = min_perf;
+ __entry->max_perf = max_perf;
+ __entry->boost = boost;
+ ),
+
+ TP_printk("cpu%u: [%u<->%u]/%u, epp=%u, boost=%u",
+ (unsigned int)__entry->cpu_id,
+ (unsigned int)__entry->min_perf,
+ (unsigned int)__entry->max_perf,
+ (unsigned int)__entry->highest_perf,
+ (unsigned int)__entry->epp,
+ (bool)__entry->boost
+ )
+);
+
#endif /* _AMD_PSTATE_TRACE_H */
/* This part must be outside protection */
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index a261d7300951..3a0a380c3590 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -207,7 +207,6 @@ static void amd_pstate_ut_check_freq(u32 index)
int cpu = 0;
struct cpufreq_policy *policy = NULL;
struct amd_cpudata *cpudata = NULL;
- u32 nominal_freq_khz;
for_each_possible_cpu(cpu) {
policy = cpufreq_cpu_get(cpu);
@@ -215,14 +214,13 @@ static void amd_pstate_ut_check_freq(u32 index)
break;
cpudata = policy->driver_data;
- nominal_freq_khz = cpudata->nominal_freq*1000;
- if (!((cpudata->max_freq >= nominal_freq_khz) &&
- (nominal_freq_khz > cpudata->lowest_nonlinear_freq) &&
+ if (!((cpudata->max_freq >= cpudata->nominal_freq) &&
+ (cpudata->nominal_freq > cpudata->lowest_nonlinear_freq) &&
(cpudata->lowest_nonlinear_freq > cpudata->min_freq) &&
(cpudata->min_freq > 0))) {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
- __func__, cpu, cpudata->max_freq, nominal_freq_khz,
+ __func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
cpudata->lowest_nonlinear_freq, cpudata->min_freq);
goto skip_test;
}
@@ -236,13 +234,13 @@ static void amd_pstate_ut_check_freq(u32 index)
if (cpudata->boost_supported) {
if ((policy->max == cpudata->max_freq) ||
- (policy->max == nominal_freq_khz))
+ (policy->max == cpudata->nominal_freq))
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
else {
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
__func__, cpu, policy->max, cpudata->max_freq,
- nominal_freq_khz);
+ cpudata->nominal_freq);
goto skip_test;
}
} else {
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 66e5dfc711c0..dd9b8d6993d6 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitfield.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -88,6 +89,11 @@ static bool cppc_enabled;
static bool amd_pstate_prefcore = true;
static struct quirk_entry *quirks;
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
+
/*
* AMD Energy Preference Performance (EPP)
* The EPP is used in the CCLK DPM controller to drive
@@ -180,120 +186,145 @@ static inline int get_mode_idx_from_str(const char *str, size_t size)
static DEFINE_MUTEX(amd_pstate_limits_lock);
static DEFINE_MUTEX(amd_pstate_driver_lock);
-static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
+static s16 msr_get_epp(struct amd_cpudata *cpudata)
{
- u64 epp;
+ u64 value;
int ret;
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- if (!cppc_req_cached) {
- epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
- &cppc_req_cached);
- if (epp)
- return epp;
- }
- epp = (cppc_req_cached >> 24) & 0xFF;
- } else {
- ret = cppc_get_epp_perf(cpudata->cpu, &epp);
- if (ret < 0) {
- pr_debug("Could not retrieve energy perf value (%d)\n", ret);
- return -EIO;
- }
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ if (ret < 0) {
+ pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+ return ret;
}
- return (s16)(epp & 0xff);
+ return FIELD_GET(AMD_CPPC_EPP_PERF_MASK, value);
}
-static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+DEFINE_STATIC_CALL(amd_pstate_get_epp, msr_get_epp);
+
+static inline s16 amd_pstate_get_epp(struct amd_cpudata *cpudata)
{
- s16 epp;
- int index = -EINVAL;
+ return static_call(amd_pstate_get_epp)(cpudata);
+}
- epp = amd_pstate_get_epp(cpudata, 0);
- if (epp < 0)
- return epp;
+static s16 shmem_get_epp(struct amd_cpudata *cpudata)
+{
+ u64 epp;
+ int ret;
- switch (epp) {
- case AMD_CPPC_EPP_PERFORMANCE:
- index = EPP_INDEX_PERFORMANCE;
- break;
- case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
- index = EPP_INDEX_BALANCE_PERFORMANCE;
- break;
- case AMD_CPPC_EPP_BALANCE_POWERSAVE:
- index = EPP_INDEX_BALANCE_POWERSAVE;
- break;
- case AMD_CPPC_EPP_POWERSAVE:
- index = EPP_INDEX_POWERSAVE;
- break;
- default:
- break;
+ ret = cppc_get_epp_perf(cpudata->cpu, &epp);
+ if (ret < 0) {
+ pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+ return ret;
}
- return index;
+ return (s16)(epp & 0xff);
}
-static void msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
- u32 des_perf, u32 max_perf, bool fast_switch)
+static int msr_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+ u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
{
- if (fast_switch)
- wrmsrl(MSR_AMD_CPPC_REQ, READ_ONCE(cpudata->cppc_req_cached));
- else
- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
- READ_ONCE(cpudata->cppc_req_cached));
+ u64 value, prev;
+
+ value = prev = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
+ AMD_CPPC_DES_PERF_MASK | AMD_CPPC_EPP_PERF_MASK);
+ value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
+ value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, des_perf);
+ value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
+
+ if (value == prev)
+ return 0;
+
+ if (fast_switch) {
+ wrmsrl(MSR_AMD_CPPC_REQ, value);
+ return 0;
+ } else {
+ int ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+
+ if (ret)
+ return ret;
+ }
+
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+ WRITE_ONCE(cpudata->epp_cached, epp);
+
+ return 0;
}
DEFINE_STATIC_CALL(amd_pstate_update_perf, msr_update_perf);
-static inline void amd_pstate_update_perf(struct amd_cpudata *cpudata,
+static inline int amd_pstate_update_perf(struct amd_cpudata *cpudata,
u32 min_perf, u32 des_perf,
- u32 max_perf, bool fast_switch)
+ u32 max_perf, u32 epp,
+ bool fast_switch)
{
- static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
- max_perf, fast_switch);
+ return static_call(amd_pstate_update_perf)(cpudata, min_perf, des_perf,
+ max_perf, epp, fast_switch);
}
-static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+static int msr_set_epp(struct amd_cpudata *cpudata, u32 epp)
{
+ u64 value, prev;
int ret;
- struct cppc_perf_ctrls perf_ctrls;
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- u64 value = READ_ONCE(cpudata->cppc_req_cached);
-
- value &= ~GENMASK_ULL(31, 24);
- value |= (u64)epp << 24;
- WRITE_ONCE(cpudata->cppc_req_cached, value);
+ value = prev = READ_ONCE(cpudata->cppc_req_cached);
+ value &= ~AMD_CPPC_EPP_PERF_MASK;
+ value |= FIELD_PREP(AMD_CPPC_EPP_PERF_MASK, epp);
- ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
- if (!ret)
- cpudata->epp_cached = epp;
- } else {
- amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
- cpudata->max_limit_perf, false);
+ if (value == prev)
+ return 0;
- perf_ctrls.energy_perf = epp;
- ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
- if (ret) {
- pr_debug("failed to set energy perf value (%d)\n", ret);
- return ret;
- }
- cpudata->epp_cached = epp;
+ ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ if (ret) {
+ pr_err("failed to set energy perf value (%d)\n", ret);
+ return ret;
}
+ /* update both so that msr_update_perf() can effectively check */
+ WRITE_ONCE(cpudata->epp_cached, epp);
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
return ret;
}
-static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
- int pref_index)
+DEFINE_STATIC_CALL(amd_pstate_set_epp, msr_set_epp);
+
+static inline int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+{
+ return static_call(amd_pstate_set_epp)(cpudata, epp);
+}
+
+static int shmem_set_epp(struct amd_cpudata *cpudata, u32 epp)
{
- int epp = -EINVAL;
int ret;
+ struct cppc_perf_ctrls perf_ctrls;
+
+ if (epp == cpudata->epp_cached)
+ return 0;
+
+ perf_ctrls.energy_perf = epp;
+ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+ if (ret) {
+ pr_debug("failed to set energy perf value (%d)\n", ret);
+ return ret;
+ }
+ WRITE_ONCE(cpudata->epp_cached, epp);
+
+ return ret;
+}
+
+static int amd_pstate_set_energy_pref_index(struct cpufreq_policy *policy,
+ int pref_index)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int epp;
if (!pref_index)
epp = cpudata->epp_default;
-
- if (epp == -EINVAL)
+ else
epp = epp_values[pref_index];
if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
@@ -301,9 +332,15 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
return -EBUSY;
}
- ret = amd_pstate_set_epp(cpudata, epp);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ epp,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
+ FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached),
+ policy->boost_enabled);
+ }
- return ret;
+ return amd_pstate_set_epp(cpudata, epp);
}
static inline int msr_cppc_enable(bool enable)
@@ -442,17 +479,23 @@ static inline int amd_pstate_init_perf(struct amd_cpudata *cpudata)
return static_call(amd_pstate_init_perf)(cpudata);
}
-static void shmem_update_perf(struct amd_cpudata *cpudata,
- u32 min_perf, u32 des_perf,
- u32 max_perf, bool fast_switch)
+static int shmem_update_perf(struct amd_cpudata *cpudata, u32 min_perf,
+ u32 des_perf, u32 max_perf, u32 epp, bool fast_switch)
{
struct cppc_perf_ctrls perf_ctrls;
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ int ret = shmem_set_epp(cpudata, epp);
+
+ if (ret)
+ return ret;
+ }
+
perf_ctrls.max_perf = max_perf;
perf_ctrls.min_perf = min_perf;
perf_ctrls.desired_perf = des_perf;
- cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ return cppc_set_perf(cpudata->cpu, &perf_ctrls);
}
static inline bool amd_pstate_sample(struct amd_cpudata *cpudata)
@@ -493,14 +536,8 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
{
unsigned long max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpudata->cpu);
- u64 prev = READ_ONCE(cpudata->cppc_req_cached);
u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
- u64 value = prev;
- min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
- cpudata->max_limit_perf);
- max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
- cpudata->max_limit_perf);
des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
max_freq = READ_ONCE(cpudata->max_limit_freq);
@@ -511,34 +548,18 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
des_perf = 0;
}
- value &= ~AMD_CPPC_MIN_PERF(~0L);
- value |= AMD_CPPC_MIN_PERF(min_perf);
-
- value &= ~AMD_CPPC_DES_PERF(~0L);
- value |= AMD_CPPC_DES_PERF(des_perf);
-
/* limit the max perf when core performance boost feature is disabled */
if (!cpudata->boost_supported)
max_perf = min_t(unsigned long, nominal_perf, max_perf);
- value &= ~AMD_CPPC_MAX_PERF(~0L);
- value |= AMD_CPPC_MAX_PERF(max_perf);
-
if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) {
trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq,
cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc,
- cpudata->cpu, (value != prev), fast_switch);
+ cpudata->cpu, fast_switch);
}
- if (value == prev)
- goto cpufreq_policy_put;
+ amd_pstate_update_perf(cpudata, min_perf, des_perf, max_perf, 0, fast_switch);
- WRITE_ONCE(cpudata->cppc_req_cached, value);
-
- amd_pstate_update_perf(cpudata, min_perf, des_perf,
- max_perf, fast_switch);
-
-cpufreq_policy_put:
cpufreq_cpu_put(policy);
}
@@ -570,7 +591,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
+ u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
struct amd_cpudata *cpudata = policy->driver_data;
max_perf = READ_ONCE(cpudata->highest_perf);
@@ -578,12 +599,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
max_limit_perf = div_u64(policy->max * max_perf, max_freq);
min_limit_perf = div_u64(policy->min * max_perf, max_freq);
- lowest_perf = READ_ONCE(cpudata->lowest_perf);
- if (min_limit_perf < lowest_perf)
- min_limit_perf = lowest_perf;
-
- if (max_limit_perf < min_limit_perf)
- max_limit_perf = min_limit_perf;
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
@@ -704,8 +721,8 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on)
if (on)
policy->cpuinfo.max_freq = max_freq;
- else if (policy->cpuinfo.max_freq > nominal_freq * 1000)
- policy->cpuinfo.max_freq = nominal_freq * 1000;
+ else if (policy->cpuinfo.max_freq > nominal_freq)
+ policy->cpuinfo.max_freq = nominal_freq;
policy->max = policy->cpuinfo.max_freq;
@@ -727,12 +744,11 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
pr_err("Boost mode is not supported by this processor or SBIOS\n");
return -EOPNOTSUPP;
}
- mutex_lock(&amd_pstate_driver_lock);
+ guard(mutex)(&amd_pstate_driver_lock);
+
ret = amd_pstate_cpu_boost_update(policy, state);
- WRITE_ONCE(cpudata->boost_state, !ret ? state : false);
policy->boost_enabled = !ret ? state : false;
refresh_frequency_limits(policy);
- mutex_unlock(&amd_pstate_driver_lock);
return ret;
}
@@ -752,9 +768,6 @@ static int amd_pstate_init_boost_support(struct amd_cpudata *cpudata)
goto exit_err;
}
- /* at least one CPU supports CPB, even if others fail later on to set up */
- current_pstate_driver->boost_enabled = true;
-
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_K7_HWCR, &boost_val);
if (ret) {
pr_err_once("failed to read initial CPU boost state!\n");
@@ -802,7 +815,7 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
* sched_set_itmt_support(true) has been called and it is valid to
* update them at any time after it has been called.
*/
- sched_set_itmt_core_prio((int)READ_ONCE(cpudata->highest_perf), cpudata->cpu);
+ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
schedule_work(&sched_prefcore_work);
}
@@ -823,7 +836,8 @@ static void amd_pstate_update_limits(unsigned int cpu)
if (!amd_pstate_prefcore)
return;
- mutex_lock(&amd_pstate_driver_lock);
+ guard(mutex)(&amd_pstate_driver_lock);
+
ret = amd_get_highest_perf(cpu, &cur_high);
if (ret)
goto free_cpufreq_put;
@@ -843,7 +857,6 @@ free_cpufreq_put:
if (!highest_perf_changed)
cpufreq_update_policy(cpu);
- mutex_unlock(&amd_pstate_driver_lock);
}
/*
@@ -895,9 +908,8 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
{
int ret;
u32 min_freq, max_freq;
- u32 nominal_perf, nominal_freq;
+ u32 highest_perf, nominal_perf, nominal_freq;
u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
- u32 boost_ratio, lowest_nonlinear_ratio;
struct cppc_perf_caps cppc_perf;
ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
@@ -905,29 +917,25 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
return ret;
if (quirks && quirks->lowest_freq)
- min_freq = quirks->lowest_freq * 1000;
+ min_freq = quirks->lowest_freq;
else
- min_freq = cppc_perf.lowest_freq * 1000;
+ min_freq = cppc_perf.lowest_freq;
if (quirks && quirks->nominal_freq)
- nominal_freq = quirks->nominal_freq ;
+ nominal_freq = quirks->nominal_freq;
else
nominal_freq = cppc_perf.nominal_freq;
+ highest_perf = READ_ONCE(cpudata->highest_perf);
nominal_perf = READ_ONCE(cpudata->nominal_perf);
-
- boost_ratio = div_u64(cpudata->highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
- max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+ max_freq = div_u64((u64)highest_perf * nominal_freq, nominal_perf);
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
- lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
- nominal_perf);
- lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
-
- WRITE_ONCE(cpudata->min_freq, min_freq);
- WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
- WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
- WRITE_ONCE(cpudata->max_freq, max_freq);
+ lowest_nonlinear_freq = div_u64((u64)nominal_freq * lowest_nonlinear_perf, nominal_perf);
+ WRITE_ONCE(cpudata->min_freq, min_freq * 1000);
+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq * 1000);
+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq * 1000);
+ WRITE_ONCE(cpudata->max_freq, max_freq * 1000);
/**
* Below values need to be initialized correctly, otherwise driver will fail to load
@@ -937,13 +945,13 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
*/
if (min_freq <= 0 || max_freq <= 0 || nominal_freq <= 0 || min_freq > max_freq) {
pr_err("min_freq(%d) or max_freq(%d) or nominal_freq(%d) value is incorrect\n",
- min_freq, max_freq, nominal_freq * 1000);
+ min_freq, max_freq, nominal_freq);
return -EINVAL;
}
- if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq * 1000) {
+ if (lowest_nonlinear_freq <= min_freq || lowest_nonlinear_freq > nominal_freq) {
pr_err("lowest_nonlinear_freq(%d) value is out of range [min_freq(%d), nominal_freq(%d)]\n",
- lowest_nonlinear_freq, min_freq, nominal_freq * 1000);
+ lowest_nonlinear_freq, min_freq, nominal_freq);
return -EINVAL;
}
@@ -1160,7 +1168,6 @@ static ssize_t show_energy_performance_available_preferences(
static ssize_t store_energy_performance_preference(
struct cpufreq_policy *policy, const char *buf, size_t count)
{
- struct amd_cpudata *cpudata = policy->driver_data;
char str_preference[21];
ssize_t ret;
@@ -1172,11 +1179,11 @@ static ssize_t store_energy_performance_preference(
if (ret < 0)
return -EINVAL;
- mutex_lock(&amd_pstate_limits_lock);
- ret = amd_pstate_set_energy_pref_index(cpudata, ret);
- mutex_unlock(&amd_pstate_limits_lock);
+ guard(mutex)(&amd_pstate_limits_lock);
+
+ ret = amd_pstate_set_energy_pref_index(policy, ret);
- return ret ?: count;
+ return ret ? ret : count;
}
static ssize_t show_energy_performance_preference(
@@ -1185,9 +1192,22 @@ static ssize_t show_energy_performance_preference(
struct amd_cpudata *cpudata = policy->driver_data;
int preference;
- preference = amd_pstate_get_energy_pref_index(cpudata);
- if (preference < 0)
- return preference;
+ switch (cpudata->epp_cached) {
+ case AMD_CPPC_EPP_PERFORMANCE:
+ preference = EPP_INDEX_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
+ preference = EPP_INDEX_BALANCE_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_POWERSAVE:
+ preference = EPP_INDEX_BALANCE_POWERSAVE;
+ break;
+ case AMD_CPPC_EPP_POWERSAVE:
+ preference = EPP_INDEX_POWERSAVE;
+ break;
+ default:
+ return -EINVAL;
+ }
return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
}
@@ -1236,6 +1256,9 @@ static int amd_pstate_register_driver(int mode)
return ret;
}
+ /* at least one CPU supports CPB */
+ current_pstate_driver->boost_enabled = cpu_feature_enabled(X86_FEATURE_CPB);
+
ret = cpufreq_register_driver(current_pstate_driver);
if (ret) {
amd_pstate_driver_cleanup();
@@ -1340,13 +1363,10 @@ EXPORT_SYMBOL_GPL(amd_pstate_update_status);
static ssize_t status_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- ssize_t ret;
- mutex_lock(&amd_pstate_driver_lock);
- ret = amd_pstate_show_status(buf);
- mutex_unlock(&amd_pstate_driver_lock);
+ guard(mutex)(&amd_pstate_driver_lock);
- return ret;
+ return amd_pstate_show_status(buf);
}
static ssize_t status_store(struct device *a, struct device_attribute *b,
@@ -1355,9 +1375,8 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
char *p = memchr(buf, '\n', count);
int ret;
- mutex_lock(&amd_pstate_driver_lock);
+ guard(mutex)(&amd_pstate_driver_lock);
ret = amd_pstate_update_status(buf, p ? p - buf : count);
- mutex_unlock(&amd_pstate_driver_lock);
return ret < 0 ? ret : count;
}
@@ -1451,7 +1470,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
return -ENOMEM;
cpudata->cpu = policy->cpu;
- cpudata->epp_policy = 0;
ret = amd_pstate_init_perf(cpudata);
if (ret)
@@ -1477,8 +1495,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->driver_data = cpudata;
- cpudata->epp_cached = cpudata->epp_default = amd_pstate_get_epp(cpudata, 0);
-
policy->min = policy->cpuinfo.min_freq;
policy->max = policy->cpuinfo.max_freq;
@@ -1489,10 +1505,13 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
* the default cpufreq governor is neither powersave nor performance.
*/
if (amd_pstate_acpi_pm_profile_server() ||
- amd_pstate_acpi_pm_profile_undefined())
+ amd_pstate_acpi_pm_profile_undefined()) {
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- else
+ cpudata->epp_default = amd_pstate_get_epp(cpudata);
+ } else {
policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
+ }
if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
@@ -1505,6 +1524,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
return ret;
WRITE_ONCE(cpudata->cppc_cap1_cached, value);
}
+ ret = amd_pstate_set_epp(cpudata, cpudata->epp_default);
+ if (ret)
+ return ret;
current_pstate_driver->adjust_perf = NULL;
@@ -1530,51 +1552,24 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
- u32 max_perf, min_perf;
- u64 value;
- s16 epp;
+ u32 epp;
- max_perf = READ_ONCE(cpudata->highest_perf);
- min_perf = READ_ONCE(cpudata->lowest_perf);
amd_pstate_update_min_max_limit(policy);
- max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
- cpudata->max_limit_perf);
- min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
- cpudata->max_limit_perf);
- value = READ_ONCE(cpudata->cppc_req_cached);
-
if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
- min_perf = min(cpudata->nominal_perf, max_perf);
-
- /* Initial min/max values for CPPC Performance Controls Register */
- value &= ~AMD_CPPC_MIN_PERF(~0L);
- value |= AMD_CPPC_MIN_PERF(min_perf);
-
- value &= ~AMD_CPPC_MAX_PERF(~0L);
- value |= AMD_CPPC_MAX_PERF(max_perf);
-
- /* CPPC EPP feature require to set zero to the desire perf bit */
- value &= ~AMD_CPPC_DES_PERF(~0L);
- value |= AMD_CPPC_DES_PERF(0);
-
- cpudata->epp_policy = cpudata->policy;
+ epp = 0;
+ else
+ epp = READ_ONCE(cpudata->epp_cached);
- /* Get BIOS pre-defined epp value */
- epp = amd_pstate_get_epp(cpudata, value);
- if (epp < 0) {
- /**
- * This return value can only be negative for shared_memory
- * systems where EPP register read/write not supported.
- */
- return epp;
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf, epp,
+ cpudata->min_limit_perf,
+ cpudata->max_limit_perf,
+ policy->boost_enabled);
}
- if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
- epp = 0;
-
- WRITE_ONCE(cpudata->cppc_req_cached, value);
- return amd_pstate_set_epp(cpudata, epp);
+ return amd_pstate_update_perf(cpudata, cpudata->min_limit_perf, 0U,
+ cpudata->max_limit_perf, epp, false);
}
static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
@@ -1603,87 +1598,63 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
return 0;
}
-static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
+static int amd_pstate_epp_reenable(struct cpufreq_policy *policy)
{
- struct cppc_perf_ctrls perf_ctrls;
- u64 value, max_perf;
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u64 max_perf;
int ret;
ret = amd_pstate_cppc_enable(true);
if (ret)
pr_err("failed to enable amd pstate during resume, return %d\n", ret);
- value = READ_ONCE(cpudata->cppc_req_cached);
max_perf = READ_ONCE(cpudata->highest_perf);
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
- } else {
- perf_ctrls.max_perf = max_perf;
- cppc_set_perf(cpudata->cpu, &perf_ctrls);
- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
- cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ cpudata->epp_cached,
+ FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cpudata->cppc_req_cached),
+ max_perf, policy->boost_enabled);
}
+
+ return amd_pstate_update_perf(cpudata, 0, 0, max_perf, cpudata->epp_cached, false);
}
static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
+ int ret;
pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
- if (cppc_state == AMD_PSTATE_ACTIVE) {
- amd_pstate_epp_reenable(cpudata);
- cpudata->suspended = false;
- }
+ ret = amd_pstate_epp_reenable(policy);
+ if (ret)
+ return ret;
+ cpudata->suspended = false;
return 0;
}
-static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
-{
- struct amd_cpudata *cpudata = policy->driver_data;
- struct cppc_perf_ctrls perf_ctrls;
- int min_perf;
- u64 value;
-
- min_perf = READ_ONCE(cpudata->lowest_perf);
- value = READ_ONCE(cpudata->cppc_req_cached);
-
- mutex_lock(&amd_pstate_limits_lock);
- if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
- cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
-
- /* Set max perf same as min perf */
- value &= ~AMD_CPPC_MAX_PERF(~0L);
- value |= AMD_CPPC_MAX_PERF(min_perf);
- value &= ~AMD_CPPC_MIN_PERF(~0L);
- value |= AMD_CPPC_MIN_PERF(min_perf);
- wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
- } else {
- perf_ctrls.desired_perf = 0;
- perf_ctrls.min_perf = min_perf;
- perf_ctrls.max_perf = min_perf;
- cppc_set_perf(cpudata->cpu, &perf_ctrls);
- perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
- cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
- }
- mutex_unlock(&amd_pstate_limits_lock);
-}
-
static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
{
struct amd_cpudata *cpudata = policy->driver_data;
-
- pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
+ int min_perf;
if (cpudata->suspended)
return 0;
- if (cppc_state == AMD_PSTATE_ACTIVE)
- amd_pstate_epp_offline(policy);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
- return 0;
+ guard(mutex)(&amd_pstate_limits_lock);
+
+ if (trace_amd_pstate_epp_perf_enabled()) {
+ trace_amd_pstate_epp_perf(cpudata->cpu, cpudata->highest_perf,
+ AMD_CPPC_EPP_BALANCE_POWERSAVE,
+ min_perf, min_perf, policy->boost_enabled);
+ }
+
+ return amd_pstate_update_perf(cpudata, min_perf, 0, min_perf,
+ AMD_CPPC_EPP_BALANCE_POWERSAVE, false);
}
static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
@@ -1711,12 +1682,10 @@ static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
struct amd_cpudata *cpudata = policy->driver_data;
if (cpudata->suspended) {
- mutex_lock(&amd_pstate_limits_lock);
+ guard(mutex)(&amd_pstate_limits_lock);
/* enable amd pstate from suspend state*/
- amd_pstate_epp_reenable(cpudata);
-
- mutex_unlock(&amd_pstate_limits_lock);
+ amd_pstate_epp_reenable(policy);
cpudata->suspended = false;
}
@@ -1869,6 +1838,8 @@ static int __init amd_pstate_init(void)
static_call_update(amd_pstate_cppc_enable, shmem_cppc_enable);
static_call_update(amd_pstate_init_perf, shmem_init_perf);
static_call_update(amd_pstate_update_perf, shmem_update_perf);
+ static_call_update(amd_pstate_get_epp, shmem_get_epp);
+ static_call_update(amd_pstate_set_epp, shmem_set_epp);
}
if (amd_pstate_prefcore) {
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index cd573bc6b6db..9747e3be6cee 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -57,7 +57,6 @@ struct amd_aperf_mperf {
* @hw_prefcore: check whether HW supports preferred core featue.
* Only when hw_prefcore and early prefcore param are true,
* AMD P-State driver supports preferred core featue.
- * @epp_policy: Last saved policy used to set energy-performance preference
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
* @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
@@ -94,13 +93,11 @@ struct amd_cpudata {
bool hw_prefcore;
/* EPP feature related attributes*/
- s16 epp_policy;
s16 epp_cached;
u32 policy;
u64 cppc_cap1_cached;
bool suspended;
s16 epp_default;
- bool boost_state;
};
/*
diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c
index 4dcacab9b4bf..269b18c62d04 100644
--- a/drivers/cpufreq/apple-soc-cpufreq.c
+++ b/drivers/cpufreq/apple-soc-cpufreq.c
@@ -22,11 +22,14 @@
#include <linux/pm_opp.h>
#include <linux/slab.h>
-#define APPLE_DVFS_CMD 0x20
-#define APPLE_DVFS_CMD_BUSY BIT(31)
-#define APPLE_DVFS_CMD_SET BIT(25)
-#define APPLE_DVFS_CMD_PS2 GENMASK(16, 12)
-#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0)
+#define APPLE_DVFS_CMD 0x20
+#define APPLE_DVFS_CMD_BUSY BIT(31)
+#define APPLE_DVFS_CMD_SET BIT(25)
+#define APPLE_DVFS_CMD_PS1_S5L8960X GENMASK(24, 22)
+#define APPLE_DVFS_CMD_PS1_S5L8960X_SHIFT 22
+#define APPLE_DVFS_CMD_PS2 GENMASK(15, 12)
+#define APPLE_DVFS_CMD_PS1 GENMASK(4, 0)
+#define APPLE_DVFS_CMD_PS1_SHIFT 0
/* Same timebase as CPU counter (24MHz) */
#define APPLE_DVFS_LAST_CHG_TIME 0x38
@@ -35,6 +38,9 @@
* Apple ran out of bits and had to shift this in T8112...
*/
#define APPLE_DVFS_STATUS 0x50
+#define APPLE_DVFS_STATUS_CUR_PS_S5L8960X GENMASK(5, 3)
+#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_S5L8960X 3
+#define APPLE_DVFS_STATUS_TGT_PS_S5L8960X GENMASK(2, 0)
#define APPLE_DVFS_STATUS_CUR_PS_T8103 GENMASK(7, 4)
#define APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103 4
#define APPLE_DVFS_STATUS_TGT_PS_T8103 GENMASK(3, 0)
@@ -52,12 +58,15 @@
#define APPLE_DVFS_PLL_FACTOR_MULT GENMASK(31, 16)
#define APPLE_DVFS_PLL_FACTOR_DIV GENMASK(15, 0)
-#define APPLE_DVFS_TRANSITION_TIMEOUT 100
+#define APPLE_DVFS_TRANSITION_TIMEOUT 400
struct apple_soc_cpufreq_info {
+ bool has_ps2;
u64 max_pstate;
u64 cur_pstate_mask;
u64 cur_pstate_shift;
+ u64 ps1_mask;
+ u64 ps1_shift;
};
struct apple_cpu_priv {
@@ -68,25 +77,47 @@ struct apple_cpu_priv {
static struct cpufreq_driver apple_soc_cpufreq_driver;
+static const struct apple_soc_cpufreq_info soc_s5l8960x_info = {
+ .has_ps2 = false,
+ .max_pstate = 7,
+ .cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_S5L8960X,
+ .cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_S5L8960X,
+ .ps1_mask = APPLE_DVFS_CMD_PS1_S5L8960X,
+ .ps1_shift = APPLE_DVFS_CMD_PS1_S5L8960X_SHIFT,
+};
+
static const struct apple_soc_cpufreq_info soc_t8103_info = {
+ .has_ps2 = true,
.max_pstate = 15,
.cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8103,
.cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8103,
+ .ps1_mask = APPLE_DVFS_CMD_PS1,
+ .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT,
};
static const struct apple_soc_cpufreq_info soc_t8112_info = {
+ .has_ps2 = false,
.max_pstate = 31,
.cur_pstate_mask = APPLE_DVFS_STATUS_CUR_PS_T8112,
.cur_pstate_shift = APPLE_DVFS_STATUS_CUR_PS_SHIFT_T8112,
+ .ps1_mask = APPLE_DVFS_CMD_PS1,
+ .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT,
};
static const struct apple_soc_cpufreq_info soc_default_info = {
+ .has_ps2 = false,
.max_pstate = 15,
.cur_pstate_mask = 0, /* fallback */
+ .ps1_mask = APPLE_DVFS_CMD_PS1,
+ .ps1_shift = APPLE_DVFS_CMD_PS1_SHIFT,
};
static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = {
{
+ .compatible = "apple,s5l8960x-cluster-cpufreq",
+ .data = &soc_s5l8960x_info,
+ },
+ {
.compatible = "apple,t8103-cluster-cpufreq",
.data = &soc_t8103_info,
},
@@ -109,7 +140,7 @@ static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu)
unsigned int pstate;
if (priv->info->cur_pstate_mask) {
- u64 reg = readq_relaxed(priv->reg_base + APPLE_DVFS_STATUS);
+ u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS);
pstate = (reg & priv->info->cur_pstate_mask) >> priv->info->cur_pstate_shift;
} else {
@@ -148,9 +179,12 @@ static int apple_soc_cpufreq_set_target(struct cpufreq_policy *policy,
return -EIO;
}
- reg &= ~(APPLE_DVFS_CMD_PS1 | APPLE_DVFS_CMD_PS2);
- reg |= FIELD_PREP(APPLE_DVFS_CMD_PS1, pstate);
- reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate);
+ reg &= ~priv->info->ps1_mask;
+ reg |= pstate << priv->info->ps1_shift;
+ if (priv->info->has_ps2) {
+ reg &= ~APPLE_DVFS_CMD_PS2;
+ reg |= FIELD_PREP(APPLE_DVFS_CMD_PS2, pstate);
+ }
reg |= APPLE_DVFS_CMD_SET;
writeq_relaxed(reg, priv->reg_base + APPLE_DVFS_CMD);
@@ -275,7 +309,7 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy)
transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
if (!transition_latency)
- transition_latency = CPUFREQ_ETERNAL;
+ transition_latency = APPLE_DVFS_TRANSITION_TIMEOUT * NSEC_PER_USEC;
policy->cpuinfo.transition_latency = transition_latency;
policy->dvfs_possible_from_any_cpu = true;
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 2a3e8bd317c9..2aa00769cf09 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -103,6 +103,8 @@ static const struct of_device_id allowlist[] __initconst = {
* platforms using "operating-points-v2" property.
*/
static const struct of_device_id blocklist[] __initconst = {
+ { .compatible = "airoha,en7581", },
+
{ .compatible = "allwinner,sun50i-a100" },
{ .compatible = "allwinner,sun50i-h6", },
{ .compatible = "allwinner,sun50i-h616", },
@@ -235,5 +237,3 @@ create_pdev:
sizeof(struct cpufreq_dt_platform_data)));
}
core_initcall(cpufreq_dt_platdev_init);
-MODULE_DESCRIPTION("Generic DT based cpufreq platdev driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1a4cae54a01b..1076e37a18ad 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -25,6 +25,7 @@
#include <linux/mutex.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/tick.h>
@@ -602,12 +603,12 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr,
if (cpufreq_boost_trigger_state(enable)) {
pr_err("%s: Cannot %s BOOST!\n",
- __func__, enable ? "enable" : "disable");
+ __func__, str_enable_disable(enable));
return -EINVAL;
}
pr_debug("%s: cpufreq BOOST %s\n",
- __func__, enable ? "enabled" : "disabled");
+ __func__, str_enabled_disabled(enable));
return count;
}
@@ -1538,7 +1539,7 @@ static int cpufreq_online(unsigned int cpu)
/*
* Register with the energy model before
- * sugov_eas_rebuild_sd() is called, which will result
+ * em_rebuild_sched_domains() is called, which will result
* in rebuilding of the sched domains, which should only be done
* once the energy model is properly initialized for the policy
* first.
@@ -2812,7 +2813,7 @@ err_reset_state:
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
pr_err("%s: Cannot %s BOOST\n",
- __func__, state ? "enable" : "disable");
+ __func__, str_enable_disable(state));
return ret;
}
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b8e2396a708a..9c4cc01fd51a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -28,6 +28,7 @@
#include <linux/pm_qos.h>
#include <linux/bitfield.h>
#include <trace/events/power.h>
+#include <linux/units.h>
#include <asm/cpu.h>
#include <asm/div64.h>
@@ -302,11 +303,11 @@ static bool hwp_is_hybrid;
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
-#define HYBRID_SCALING_FACTOR 78741
+#define HYBRID_SCALING_FACTOR_ADL 78741
#define HYBRID_SCALING_FACTOR_MTL 80000
#define HYBRID_SCALING_FACTOR_LNL 86957
-static int hybrid_scaling_factor = HYBRID_SCALING_FACTOR;
+static int hybrid_scaling_factor;
static inline int core_get_scaling(void)
{
@@ -414,18 +415,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
static int intel_pstate_cppc_get_scaling(int cpu)
{
struct cppc_perf_caps cppc_perf;
- int ret;
-
- ret = cppc_get_perf_caps(cpu, &cppc_perf);
/*
- * If the nominal frequency and the nominal performance are not
- * zero and the ratio between them is not 100, return the hybrid
- * scaling factor.
+ * Compute the perf-to-frequency scaling factor for the given CPU if
+ * possible, unless it would be 0.
*/
- if (!ret && cppc_perf.nominal_perf && cppc_perf.nominal_freq &&
- cppc_perf.nominal_perf * 100 != cppc_perf.nominal_freq)
- return hybrid_scaling_factor;
+ if (!cppc_get_perf_caps(cpu, &cppc_perf) &&
+ cppc_perf.nominal_perf && cppc_perf.nominal_freq)
+ return div_u64(cppc_perf.nominal_freq * KHZ_PER_MHZ,
+ cppc_perf.nominal_perf);
return core_get_scaling();
}
@@ -2211,24 +2209,30 @@ static void hybrid_get_type(void *data)
static int hwp_get_cpu_scaling(int cpu)
{
- u8 cpu_type = 0;
+ if (hybrid_scaling_factor) {
+ u8 cpu_type = 0;
- smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1);
- /* P-cores have a smaller perf level-to-freqency scaling factor. */
- if (cpu_type == 0x40)
- return hybrid_scaling_factor;
+ smp_call_function_single(cpu, hybrid_get_type, &cpu_type, 1);
- /* Use default core scaling for E-cores */
- if (cpu_type == 0x20)
+ /*
+ * Return the hybrid scaling factor for P-cores and use the
+ * default core scaling for E-cores.
+ */
+ if (cpu_type == 0x40)
+ return hybrid_scaling_factor;
+
+ if (cpu_type == 0x20)
+ return core_get_scaling();
+ }
+
+ /* Use core scaling on non-hybrid systems. */
+ if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
return core_get_scaling();
/*
- * If reached here, this system is either non-hybrid (like Tiger
- * Lake) or hybrid-capable (like Alder Lake or Raptor Lake) with
- * no E cores (in which case CPUID for hybrid support is 0).
- *
- * The CPPC nominal_frequency field is 0 for non-hybrid systems,
- * so the default core scaling will be used for them.
+ * The system is hybrid, but the hybrid scaling factor is not known or
+ * the CPU type is not one of the above, so use CPPC to compute the
+ * scaling factor for this CPU.
*/
return intel_pstate_cppc_get_scaling(cpu);
}
@@ -2709,7 +2713,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
}
cpu->epp_powersave = -EINVAL;
- cpu->epp_policy = 0;
+ cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
intel_pstate_get_cpu_pstates(cpu);
@@ -3665,8 +3669,12 @@ static const struct x86_cpu_id intel_epp_default[] = {
};
static const struct x86_cpu_id intel_hybrid_scaling_factor[] = {
+ X86_MATCH_VFM(INTEL_ALDERLAKE, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_P, HYBRID_SCALING_FACTOR_ADL),
+ X86_MATCH_VFM(INTEL_RAPTORLAKE_S, HYBRID_SCALING_FACTOR_ADL),
X86_MATCH_VFM(INTEL_METEORLAKE_L, HYBRID_SCALING_FACTOR_MTL),
- X86_MATCH_VFM(INTEL_ARROWLAKE, HYBRID_SCALING_FACTOR_MTL),
X86_MATCH_VFM(INTEL_LUNARLAKE_M, HYBRID_SCALING_FACTOR_LNL),
{}
};
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 8de759247771..ae79d909943b 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -18,6 +18,7 @@
#include <linux/of.h>
#include <linux/reboot.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/cpu.h>
#include <linux/hashtable.h>
#include <trace/events/power.h>
@@ -281,7 +282,7 @@ next:
pr_info("cpufreq pstate min 0x%x nominal 0x%x max 0x%x\n", pstate_min,
pstate_nominal, pstate_max);
pr_info("Workload Optimized Frequency is %s in the platform\n",
- (powernv_pstate_info.wof_enabled) ? "enabled" : "disabled");
+ str_enabled_disabled(powernv_pstate_info.wof_enabled));
pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
if (!pstate_ids) {
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 98129565acb8..b2e7e89feaac 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -143,14 +143,12 @@ static unsigned long qcom_lmh_get_throttle_freq(struct qcom_cpufreq_data *data)
}
/* Get the frequency requested by the cpufreq core for the CPU */
-static unsigned int qcom_cpufreq_get_freq(unsigned int cpu)
+static unsigned int qcom_cpufreq_get_freq(struct cpufreq_policy *policy)
{
struct qcom_cpufreq_data *data;
const struct qcom_cpufreq_soc_data *soc_data;
- struct cpufreq_policy *policy;
unsigned int index;
- policy = cpufreq_cpu_get_raw(cpu);
if (!policy)
return 0;
@@ -163,12 +161,10 @@ static unsigned int qcom_cpufreq_get_freq(unsigned int cpu)
return policy->freq_table[index].frequency;
}
-static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
+static unsigned int __qcom_cpufreq_hw_get(struct cpufreq_policy *policy)
{
struct qcom_cpufreq_data *data;
- struct cpufreq_policy *policy;
- policy = cpufreq_cpu_get_raw(cpu);
if (!policy)
return 0;
@@ -177,7 +173,12 @@ static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
if (data->throttle_irq >= 0)
return qcom_lmh_get_throttle_freq(data) / HZ_PER_KHZ;
- return qcom_cpufreq_get_freq(cpu);
+ return qcom_cpufreq_get_freq(policy);
+}
+
+static unsigned int qcom_cpufreq_hw_get(unsigned int cpu)
+{
+ return __qcom_cpufreq_hw_get(cpufreq_cpu_get_raw(cpu));
}
static unsigned int qcom_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
@@ -363,7 +364,7 @@ static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
* If h/w throttled frequency is higher than what cpufreq has requested
* for, then stop polling and switch back to interrupt mechanism.
*/
- if (throttled_freq >= qcom_cpufreq_get_freq(cpu))
+ if (throttled_freq >= qcom_cpufreq_get_freq(cpufreq_cpu_get_raw(cpu)))
enable_irq(data->throttle_irq);
else
mod_delayed_work(system_highpri_wq, &data->throttle_work,
@@ -441,7 +442,6 @@ static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
return data->throttle_irq;
data->cancel_throttle = false;
- data->policy = policy;
mutex_init(&data->throttle_lock);
INIT_DEFERRABLE_WORK(&data->throttle_work, qcom_lmh_dcvs_poll);
@@ -552,6 +552,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
policy->driver_data = data;
policy->dvfs_possible_from_any_cpu = true;
+ data->policy = policy;
ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
if (ret) {
@@ -622,11 +623,24 @@ static unsigned long qcom_cpufreq_hw_recalc_rate(struct clk_hw *hw, unsigned lon
{
struct qcom_cpufreq_data *data = container_of(hw, struct qcom_cpufreq_data, cpu_clk);
- return qcom_lmh_get_throttle_freq(data);
+ return __qcom_cpufreq_hw_get(data->policy) * HZ_PER_KHZ;
+}
+
+/*
+ * Since we cannot determine the closest rate of the target rate, let's just
+ * return the actual rate at which the clock is running at. This is needed to
+ * make clk_set_rate() API work properly.
+ */
+static int qcom_cpufreq_hw_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
+{
+ req->rate = qcom_cpufreq_hw_recalc_rate(hw, 0);
+
+ return 0;
}
static const struct clk_ops qcom_cpufreq_hw_clk_ops = {
.recalc_rate = qcom_cpufreq_hw_recalc_rate,
+ .determine_rate = qcom_cpufreq_hw_determine_rate,
};
static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 07d6f9a9b7c8..b8fe758aeb01 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -16,6 +16,7 @@
#include <linux/export.h>
#include <linux/module.h>
#include <linux/pm_opp.h>
+#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/scmi_protocol.h>
#include <linux/types.h>
@@ -26,6 +27,8 @@ struct scmi_data {
int nr_opp;
struct device *cpu_dev;
cpumask_var_t opp_shared_cpus;
+ struct notifier_block limit_notify_nb;
+ struct freq_qos_request limits_freq_req;
};
static struct scmi_protocol_handle *ph;
@@ -174,6 +177,22 @@ static struct freq_attr *scmi_cpufreq_hw_attr[] = {
NULL,
};
+static int scmi_limit_notify_cb(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct scmi_data *priv = container_of(nb, struct scmi_data, limit_notify_nb);
+ struct scmi_perf_limits_report *limit_notify = data;
+ unsigned int limit_freq_khz;
+ int ret;
+
+ limit_freq_khz = limit_notify->range_max_freq / HZ_PER_KHZ;
+
+ ret = freq_qos_update_request(&priv->limits_freq_req, limit_freq_khz);
+ if (ret < 0)
+ pr_warn("failed to update freq constraint: %d\n", ret);
+
+ return NOTIFY_OK;
+}
+
static int scmi_cpufreq_init(struct cpufreq_policy *policy)
{
int ret, nr_opp, domain;
@@ -181,6 +200,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
struct device *cpu_dev;
struct scmi_data *priv;
struct cpufreq_frequency_table *freq_table;
+ struct scmi_device *sdev = cpufreq_get_driver_data();
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
@@ -294,6 +314,23 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
}
}
+ ret = freq_qos_add_request(&policy->constraints, &priv->limits_freq_req, FREQ_QOS_MAX,
+ FREQ_QOS_MAX_DEFAULT_VALUE);
+ if (ret < 0) {
+ dev_err(cpu_dev, "failed to add qos limits request: %d\n", ret);
+ goto out_free_table;
+ }
+
+ priv->limit_notify_nb.notifier_call = scmi_limit_notify_cb;
+ ret = sdev->handle->notify_ops->event_notifier_register(sdev->handle, SCMI_PROTOCOL_PERF,
+ SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED,
+ &priv->domain_id,
+ &priv->limit_notify_nb);
+ if (ret)
+ dev_warn(&sdev->dev,
+ "failed to register for limits change notifier for domain %d\n",
+ priv->domain_id);
+
return 0;
out_free_table:
@@ -313,7 +350,13 @@ out_free_priv:
static void scmi_cpufreq_exit(struct cpufreq_policy *policy)
{
struct scmi_data *priv = policy->driver_data;
+ struct scmi_device *sdev = cpufreq_get_driver_data();
+ sdev->handle->notify_ops->event_notifier_unregister(sdev->handle, SCMI_PROTOCOL_PERF,
+ SCMI_EVENT_PERFORMANCE_LIMITS_CHANGED,
+ &priv->domain_id,
+ &priv->limit_notify_nb);
+ freq_qos_remove_request(&priv->limits_freq_req);
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
dev_pm_opp_remove_all_dynamic(priv->cpu_dev);
free_cpumask_var(priv->opp_shared_cpus);
@@ -372,6 +415,8 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev)
if (!handle)
return -ENODEV;
+ scmi_cpufreq_driver.driver_data = sdev;
+
perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph);
if (IS_ERR(perf_ops))
return PTR_ERR(perf_ops);
diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c
index 8a0cd5312a59..15899dd77c08 100644
--- a/drivers/cpufreq/sparc-us2e-cpufreq.c
+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c
@@ -323,7 +323,7 @@ static int __init us2e_freq_init(void)
impl = ((ver >> 32) & 0xffff);
if (manuf == 0x17 && impl == 0x13) {
- us2e_freq_table = kzalloc(NR_CPUS * sizeof(*us2e_freq_table),
+ us2e_freq_table = kcalloc(NR_CPUS, sizeof(*us2e_freq_table),
GFP_KERNEL);
if (!us2e_freq_table)
return -ENOMEM;
diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c
index b50f9d13e6d2..de50a2f3b124 100644
--- a/drivers/cpufreq/sparc-us3-cpufreq.c
+++ b/drivers/cpufreq/sparc-us3-cpufreq.c
@@ -171,7 +171,7 @@ static int __init us3_freq_init(void)
impl == CHEETAH_PLUS_IMPL ||
impl == JAGUAR_IMPL ||
impl == PANTHER_IMPL)) {
- us3_freq_table = kzalloc(NR_CPUS * sizeof(*us3_freq_table),
+ us3_freq_table = kcalloc(NR_CPUS, sizeof(*us3_freq_table),
GFP_KERNEL);
if (!us3_freq_table)
return -ENOMEM;
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index f2992f92d8db..173ddcac540a 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -10,25 +10,27 @@
* DOC: teo-description
*
* The idea of this governor is based on the observation that on many systems
- * timer events are two or more orders of magnitude more frequent than any
- * other interrupts, so they are likely to be the most significant cause of CPU
- * wakeups from idle states. Moreover, information about what happened in the
- * (relatively recent) past can be used to estimate whether or not the deepest
- * idle state with target residency within the (known) time till the closest
- * timer event, referred to as the sleep length, is likely to be suitable for
- * the upcoming CPU idle period and, if not, then which of the shallower idle
- * states to choose instead of it.
+ * timer interrupts are two or more orders of magnitude more frequent than any
+ * other interrupt types, so they are likely to dominate CPU wakeup patterns.
+ * Moreover, in principle, the time when the next timer event is going to occur
+ * can be determined at the idle state selection time, although doing that may
+ * be costly, so it can be regarded as the most reliable source of information
+ * for idle state selection.
*
- * Of course, non-timer wakeup sources are more important in some use cases
- * which can be covered by taking a few most recent idle time intervals of the
- * CPU into account. However, even in that context it is not necessary to
- * consider idle duration values greater than the sleep length, because the
- * closest timer will ultimately wake up the CPU anyway unless it is woken up
- * earlier.
+ * Of course, non-timer wakeup sources are more important in some use cases,
+ * but even then it is generally unnecessary to consider idle duration values
+ * greater than the time time till the next timer event, referred as the sleep
+ * length in what follows, because the closest timer will ultimately wake up the
+ * CPU anyway unless it is woken up earlier.
*
- * Thus this governor estimates whether or not the prospective idle duration of
- * a CPU is likely to be significantly shorter than the sleep length and selects
- * an idle state for it accordingly.
+ * However, since obtaining the sleep length may be costly, the governor first
+ * checks if it can select a shallow idle state using wakeup pattern information
+ * from recent times, in which case it can do without knowing the sleep length
+ * at all. For this purpose, it counts CPU wakeup events and looks for an idle
+ * state whose target residency has not exceeded the idle duration (measured
+ * after wakeup) in the majority of relevant recent cases. If the target
+ * residency of that state is small enough, it may be used right away and the
+ * sleep length need not be determined.
*
* The computations carried out by this governor are based on using bins whose
* boundaries are aligned with the target residency parameter values of the CPU
@@ -39,7 +41,11 @@
* idle state 2, the third bin spans from the target residency of idle state 2
* up to, but not including, the target residency of idle state 3 and so on.
* The last bin spans from the target residency of the deepest idle state
- * supplied by the driver to infinity.
+ * supplied by the driver to the scheduler tick period length or to infinity if
+ * the tick period length is less than the target residency of that state. In
+ * the latter case, the governor also counts events with the measured idle
+ * duration between the tick period length and the target residency of the
+ * deepest idle state.
*
* Two metrics called "hits" and "intercepts" are associated with each bin.
* They are updated every time before selecting an idle state for the given CPU
@@ -49,47 +55,46 @@
* sleep length and the idle duration measured after CPU wakeup fall into the
* same bin (that is, the CPU appears to wake up "on time" relative to the sleep
* length). In turn, the "intercepts" metric reflects the relative frequency of
- * situations in which the measured idle duration is so much shorter than the
- * sleep length that the bin it falls into corresponds to an idle state
- * shallower than the one whose bin is fallen into by the sleep length (these
- * situations are referred to as "intercepts" below).
+ * non-timer wakeup events for which the measured idle duration falls into a bin
+ * that corresponds to an idle state shallower than the one whose bin is fallen
+ * into by the sleep length (these events are also referred to as "intercepts"
+ * below).
*
* In order to select an idle state for a CPU, the governor takes the following
* steps (modulo the possible latency constraint that must be taken into account
* too):
*
- * 1. Find the deepest CPU idle state whose target residency does not exceed
- * the current sleep length (the candidate idle state) and compute 2 sums as
- * follows:
+ * 1. Find the deepest enabled CPU idle state (the candidate idle state) and
+ * compute 2 sums as follows:
*
- * - The sum of the "hits" and "intercepts" metrics for the candidate state
- * and all of the deeper idle states (it represents the cases in which the
- * CPU was idle long enough to avoid being intercepted if the sleep length
- * had been equal to the current one).
+ * - The sum of the "hits" metric for all of the idle states shallower than
+ * the candidate one (it represents the cases in which the CPU was likely
+ * woken up by a timer).
*
- * - The sum of the "intercepts" metrics for all of the idle states shallower
- * than the candidate one (it represents the cases in which the CPU was not
- * idle long enough to avoid being intercepted if the sleep length had been
- * equal to the current one).
+ * - The sum of the "intercepts" metric for all of the idle states shallower
+ * than the candidate one (it represents the cases in which the CPU was
+ * likely woken up by a non-timer wakeup source).
*
- * 2. If the second sum is greater than the first one the CPU is likely to wake
- * up early, so look for an alternative idle state to select.
+ * 2. If the second sum computed in step 1 is greater than a half of the sum of
+ * both metrics for the candidate state bin and all subsequent bins(if any),
+ * a shallower idle state is likely to be more suitable, so look for it.
*
- * - Traverse the idle states shallower than the candidate one in the
+ * - Traverse the enabled idle states shallower than the candidate one in the
* descending order.
*
* - For each of them compute the sum of the "intercepts" metrics over all
* of the idle states between it and the candidate one (including the
* former and excluding the latter).
*
- * - If each of these sums that needs to be taken into account (because the
- * check related to it has indicated that the CPU is likely to wake up
- * early) is greater than a half of the corresponding sum computed in step
- * 1 (which means that the target residency of the state in question had
- * not exceeded the idle duration in over a half of the relevant cases),
- * select the given idle state instead of the candidate one.
+ * - If this sum is greater than a half of the second sum computed in step 1,
+ * use the given idle state as the new candidate one.
*
- * 3. By default, select the candidate state.
+ * 3. If the current candidate state is state 0 or its target residency is short
+ * enough, return it and prevent the scheduler tick from being stopped.
+ *
+ * 4. Obtain the sleep length value and check if it is below the target
+ * residency of the current candidate state, in which case a new shallower
+ * candidate state needs to be found, so look for it.
*/
#include <linux/cpuidle.h>
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index ac4d8faa3886..23d0cd27a581 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1651,6 +1651,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &idle_cpu_snr),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &idle_cpu_grr),
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &idle_cpu_srf),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &idle_cpu_srf),
{}
};
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 0311b18319a4..73e9a3b2f29b 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -101,11 +101,55 @@ struct opp_table *_find_opp_table(struct device *dev)
* representation in the OPP table and manage the clock configuration themselves
* in an platform specific way.
*/
-static bool assert_single_clk(struct opp_table *opp_table)
+static bool assert_single_clk(struct opp_table *opp_table,
+ unsigned int __always_unused index)
{
return !WARN_ON(opp_table->clk_count > 1);
}
+/*
+ * Returns true if clock table is large enough to contain the clock index.
+ */
+static bool assert_clk_index(struct opp_table *opp_table,
+ unsigned int index)
+{
+ return opp_table->clk_count > index;
+}
+
+/*
+ * Returns true if bandwidth table is large enough to contain the bandwidth index.
+ */
+static bool assert_bandwidth_index(struct opp_table *opp_table,
+ unsigned int index)
+{
+ return opp_table->path_count > index;
+}
+
+/**
+ * dev_pm_opp_get_bw() - Gets the bandwidth corresponding to an opp
+ * @opp: opp for which bandwidth has to be returned for
+ * @peak: select peak or average bandwidth
+ * @index: bandwidth index
+ *
+ * Return: bandwidth in kBps, else return 0
+ */
+unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index)
+{
+ if (IS_ERR_OR_NULL(opp)) {
+ pr_err("%s: Invalid parameters\n", __func__);
+ return 0;
+ }
+
+ if (index >= opp->opp_table->path_count)
+ return 0;
+
+ if (!opp->bandwidth)
+ return 0;
+
+ return peak ? opp->bandwidth[index].peak : opp->bandwidth[index].avg;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_bw);
+
/**
* dev_pm_opp_get_voltage() - Gets the voltage corresponding to an opp
* @opp: opp for which voltage has to be returned for
@@ -499,12 +543,12 @@ static struct dev_pm_opp *_opp_table_find_key(struct opp_table *opp_table,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
bool (*compare)(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp,
unsigned long opp_key, unsigned long key),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
/* Assert that the requirement is met */
- if (assert && !assert(opp_table))
+ if (assert && !assert(opp_table, index))
return ERR_PTR(-EINVAL);
mutex_lock(&opp_table->lock);
@@ -532,7 +576,7 @@ _find_key(struct device *dev, unsigned long *key, int index, bool available,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
bool (*compare)(struct dev_pm_opp **opp, struct dev_pm_opp *temp_opp,
unsigned long opp_key, unsigned long key),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
struct opp_table *opp_table;
struct dev_pm_opp *opp;
@@ -555,7 +599,7 @@ _find_key(struct device *dev, unsigned long *key, int index, bool available,
static struct dev_pm_opp *_find_key_exact(struct device *dev,
unsigned long key, int index, bool available,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
/*
* The value of key will be updated here, but will be ignored as the
@@ -568,7 +612,7 @@ static struct dev_pm_opp *_find_key_exact(struct device *dev,
static struct dev_pm_opp *_opp_table_find_key_ceil(struct opp_table *opp_table,
unsigned long *key, int index, bool available,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
return _opp_table_find_key(opp_table, key, index, available, read,
_compare_ceil, assert);
@@ -577,7 +621,7 @@ static struct dev_pm_opp *_opp_table_find_key_ceil(struct opp_table *opp_table,
static struct dev_pm_opp *_find_key_ceil(struct device *dev, unsigned long *key,
int index, bool available,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
return _find_key(dev, key, index, available, read, _compare_ceil,
assert);
@@ -586,7 +630,7 @@ static struct dev_pm_opp *_find_key_ceil(struct device *dev, unsigned long *key,
static struct dev_pm_opp *_find_key_floor(struct device *dev,
unsigned long *key, int index, bool available,
unsigned long (*read)(struct dev_pm_opp *opp, int index),
- bool (*assert)(struct opp_table *opp_table))
+ bool (*assert)(struct opp_table *opp_table, unsigned int index))
{
return _find_key(dev, key, index, available, read, _compare_floor,
assert);
@@ -647,7 +691,8 @@ struct dev_pm_opp *
dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
u32 index, bool available)
{
- return _find_key_exact(dev, freq, index, available, _read_freq, NULL);
+ return _find_key_exact(dev, freq, index, available, _read_freq,
+ assert_clk_index);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact_indexed);
@@ -707,7 +752,8 @@ struct dev_pm_opp *
dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq,
u32 index)
{
- return _find_key_ceil(dev, freq, index, true, _read_freq, NULL);
+ return _find_key_ceil(dev, freq, index, true, _read_freq,
+ assert_clk_index);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_indexed);
@@ -760,7 +806,7 @@ struct dev_pm_opp *
dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq,
u32 index)
{
- return _find_key_floor(dev, freq, index, true, _read_freq, NULL);
+ return _find_key_floor(dev, freq, index, true, _read_freq, assert_clk_index);
}
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor_indexed);
@@ -878,7 +924,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw,
unsigned long temp = *bw;
struct dev_pm_opp *opp;
- opp = _find_key_ceil(dev, &temp, index, true, _read_bw, NULL);
+ opp = _find_key_ceil(dev, &temp, index, true, _read_bw,
+ assert_bandwidth_index);
*bw = temp;
return opp;
}
@@ -909,7 +956,8 @@ struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
unsigned long temp = *bw;
struct dev_pm_opp *opp;
- opp = _find_key_floor(dev, &temp, index, true, _read_bw, NULL);
+ opp = _find_key_floor(dev, &temp, index, true, _read_bw,
+ assert_bandwidth_index);
*bw = temp;
return opp;
}
@@ -1480,11 +1528,6 @@ err:
return ERR_PTR(ret);
}
-void _get_opp_table_kref(struct opp_table *opp_table)
-{
- kref_get(&opp_table->kref);
-}
-
static struct opp_table *_update_opp_table_clk(struct device *dev,
struct opp_table *opp_table,
bool getclk)
@@ -1645,6 +1688,17 @@ static void _opp_table_kref_release(struct kref *kref)
kfree(opp_table);
}
+void _get_opp_table_kref(struct opp_table *opp_table)
+{
+ kref_get(&opp_table->kref);
+}
+
+void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table)
+{
+ _get_opp_table_kref(opp_table);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table_ref);
+
void dev_pm_opp_put_opp_table(struct opp_table *opp_table)
{
kref_put_mutex(&opp_table->kref, _opp_table_kref_release,
@@ -1679,6 +1733,7 @@ void dev_pm_opp_get(struct dev_pm_opp *opp)
{
kref_get(&opp->kref);
}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get);
void dev_pm_opp_put(struct dev_pm_opp *opp)
{
@@ -1702,7 +1757,7 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
if (IS_ERR(opp_table))
return;
- if (!assert_single_clk(opp_table))
+ if (!assert_single_clk(opp_table, 0))
goto put_table;
mutex_lock(&opp_table->lock);
@@ -2054,7 +2109,7 @@ int _opp_add_v1(struct opp_table *opp_table, struct device *dev,
unsigned long tol, u_volt = data->u_volt;
int ret;
- if (!assert_single_clk(opp_table))
+ if (!assert_single_clk(opp_table, 0))
return -EINVAL;
new_opp = _opp_allocate(opp_table);
@@ -2810,7 +2865,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
return r;
}
- if (!assert_single_clk(opp_table)) {
+ if (!assert_single_clk(opp_table, 0)) {
r = -EINVAL;
goto put_table;
}
@@ -2886,7 +2941,7 @@ int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq,
return r;
}
- if (!assert_single_clk(opp_table)) {
+ if (!assert_single_clk(opp_table, 0)) {
r = -EINVAL;
goto put_table;
}
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index fd5ed2858258..a24f76f5fd01 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -926,7 +926,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
ret = _of_opp_alloc_required_opps(opp_table, new_opp);
if (ret)
- goto free_opp;
+ goto put_node;
if (!of_property_read_u32(np, "clock-latency-ns", &val))
new_opp->clock_latency_ns = val;
@@ -976,6 +976,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
free_required_opps:
_of_opp_free_required_opps(opp_table, new_opp);
+put_node:
+ of_node_put(np);
free_opp:
_opp_free(new_opp);
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 430651e7424a..5c7c81190e41 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -250,7 +250,6 @@ struct opp_table {
};
/* Routines internal to opp core */
-void dev_pm_opp_get(struct dev_pm_opp *opp);
bool _opp_remove_all_static(struct opp_table *opp_table);
void _get_opp_table_kref(struct opp_table *opp_table);
int _get_opp_count(struct opp_table *opp_table);
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 752e0b297582..78318d49276d 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int em_dev_update_chip_binning(struct device *dev);
int em_update_performance_limits(struct em_perf_domain *pd,
unsigned long freq_min_khz, unsigned long freq_max_khz);
+void em_rebuild_sched_domains(void);
/**
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
@@ -404,6 +405,7 @@ int em_update_performance_limits(struct em_perf_domain *pd,
{
return -EINVAL;
}
+static inline void em_rebuild_sched_domains(void) {}
#endif
#endif
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 568183e3e641..c247317aae38 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -100,8 +100,11 @@ struct dev_pm_opp_data {
#if defined(CONFIG_PM_OPP)
struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
+void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table);
void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
+unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index);
+
unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies);
@@ -158,6 +161,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
unsigned int *bw, int index);
+void dev_pm_opp_get(struct dev_pm_opp *opp);
void dev_pm_opp_put(struct dev_pm_opp *opp);
int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp);
@@ -203,8 +207,15 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {}
+
static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
+static inline unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index)
+{
+ return 0;
+}
+
static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
{
return 0;
@@ -334,6 +345,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev,
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {}
+
static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {}
static inline int
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index d9642c6cf852..25b63ed51b76 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -10,6 +10,7 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
extern void dev_pm_clear_wake_irq(struct device *dev);
+extern int devm_pm_set_wake_irq(struct device *dev, int irq);
#else /* !CONFIG_PM */
@@ -32,5 +33,10 @@ static inline void dev_pm_clear_wake_irq(struct device *dev)
{
}
+static inline int devm_pm_set_wake_irq(struct device *dev, int irq)
+{
+ return 0;
+}
+
#endif /* CONFIG_PM */
#endif /* _LINUX_PM_WAKEIRQ_H */
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 222f7530806c..d501c09c60cd 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -240,4 +240,21 @@ static inline int device_init_wakeup(struct device *dev, bool enable)
return 0;
}
+static void device_disable_wakeup(void *dev)
+{
+ device_init_wakeup(dev, false);
+}
+
+/**
+ * devm_device_init_wakeup - Resource managed device wakeup initialization.
+ * @dev: Device to handle.
+ *
+ * This function is the devm managed version of device_init_wakeup(dev, true).
+ */
+static inline int devm_device_init_wakeup(struct device *dev)
+{
+ device_init_wakeup(dev, true);
+ return devm_add_action_or_reset(dev, device_disable_wakeup, dev);
+}
+
#endif /* _LINUX_PM_WAKEUP_H */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index afce8130d8b9..ca947ed32e3d 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -257,11 +257,30 @@ config DPM_WATCHDOG
boot session.
config DPM_WATCHDOG_TIMEOUT
- int "Watchdog timeout in seconds"
+ int "Watchdog timeout to panic in seconds"
range 1 120
default 120
depends on DPM_WATCHDOG
+config DPM_WATCHDOG_WARNING_TIMEOUT
+ int "Watchdog timeout to warn in seconds"
+ range 1 DPM_WATCHDOG_TIMEOUT
+ default DPM_WATCHDOG_TIMEOUT
+ depends on DPM_WATCHDOG
+ help
+ If the DPM watchdog warning timeout and main timeout are
+ different then a non-fatal warning (with a stack trace of
+ the stuck suspend routine) will be printed when the warning
+ timeout expires. If the suspend routine gets un-stuck
+ before the main timeout expires then no other action is
+ taken. If the routine continues to be stuck and the main
+ timeout expires then an emergency-level message and stack
+ trace will be printed and the system will panic.
+
+ If the warning timeout is equal to the main timeout (the
+ default) then the warning will never happen and the system
+ will jump straight to panic when the main timeout expires.
+
config PM_TRACE
bool
help
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c
index b29c8aca7486..865df641b97c 100644
--- a/kernel/power/autosleep.c
+++ b/kernel/power/autosleep.c
@@ -9,7 +9,6 @@
#include <linux/device.h>
#include <linux/mutex.h>
-#include <linux/pm_wakeup.h>
#include "power.h"
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index d07faf42eace..3874f0e97651 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -908,3 +908,20 @@ int em_update_performance_limits(struct em_perf_domain *pd,
return 0;
}
EXPORT_SYMBOL_GPL(em_update_performance_limits);
+
+static void rebuild_sd_workfn(struct work_struct *work)
+{
+ rebuild_sched_domains_energy();
+}
+
+void em_rebuild_sched_domains(void)
+{
+ static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
+
+ /*
+ * When called from the cpufreq_register_driver() path, the
+ * cpu_hotplug_lock is already held, so use a work item to
+ * avoid nested locking in rebuild_sched_domains().
+ */
+ schedule_work(&rebuild_sd_work);
+}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index de0e6b1077f2..c352dea2f67b 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -110,7 +110,7 @@ extern int hibernate_preallocate_memory(void);
extern void clear_or_poison_free_pages(void);
-/**
+/*
* Auxiliary structure used for reading the snapshot image data and
* metadata from and writing them to the list of page backup entries
* (PBEs) which is the main data structure of swsusp.
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 28c77904ea74..a2a29e3fffca 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
if (unlikely(sg_policy->limits_changed)) {
sg_policy->limits_changed = false;
- sg_policy->need_freq_update = true;
+ sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
return true;
}
@@ -96,7 +96,7 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
unsigned int next_freq)
{
if (sg_policy->need_freq_update)
- sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
+ sg_policy->need_freq_update = false;
else if (sg_policy->next_freq == next_freq)
return false;
@@ -604,31 +604,6 @@ static const struct kobj_type sugov_tunables_ktype = {
/********************** cpufreq governor interface *********************/
-#ifdef CONFIG_ENERGY_MODEL
-static void rebuild_sd_workfn(struct work_struct *work)
-{
- rebuild_sched_domains_energy();
-}
-
-static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
-
-/*
- * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
- * on governor changes to make sure the scheduler knows about it.
- */
-static void sugov_eas_rebuild_sd(void)
-{
- /*
- * When called from the cpufreq_register_driver() path, the
- * cpu_hotplug_lock is already held, so use a work item to
- * avoid nested locking in rebuild_sched_domains().
- */
- schedule_work(&rebuild_sd_work);
-}
-#else
-static inline void sugov_eas_rebuild_sd(void) { };
-#endif
-
struct cpufreq_governor schedutil_gov;
static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
@@ -784,7 +759,11 @@ static int sugov_init(struct cpufreq_policy *policy)
goto fail;
out:
- sugov_eas_rebuild_sd();
+ /*
+ * Schedutil is the preferred governor for EAS, so rebuild sched domains
+ * on governor changes to make sure the scheduler knows about them.
+ */
+ em_rebuild_sched_domains();
mutex_unlock(&global_tunables_lock);
return 0;
@@ -826,7 +805,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
sugov_policy_free(sg_policy);
cpufreq_disable_fast_switch(policy);
- sugov_eas_rebuild_sd();
+ em_rebuild_sched_domains();
}
static int sugov_start(struct cpufreq_policy *policy)