From a0df77348ab37687c9513bc2dd772cbc6eadbe63 Mon Sep 17 00:00:00 2001 From: Tao Wang Date: Tue, 23 May 2017 16:13:18 +0800 Subject: cpufreq: dt: Add support for hi3660 Add the compatible string for supporting the generic device tree cpufreq-dt driver on Hisilicon's 3660 SoC. Signed-off-by: Tao Wang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 921b4a6c3d16..1c262923fe58 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -31,6 +31,7 @@ static const struct of_device_id machines[] __initconst = { { .compatible = "arm,integrator-ap", }, { .compatible = "arm,integrator-cp", }, + { .compatible = "hisilicon,hi3660", }, { .compatible = "hisilicon,hi6220", }, { .compatible = "fsl,imx27", }, -- cgit v1.2.3 From 3fafb4e77279ed375077e95f54ee687b481c24d2 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 30 May 2017 18:57:18 +0300 Subject: cpufreq: imx6q: imx6ull should use the same flow as imx6ul This fixes an issue with imx6ull where setting the frequency to 528Mhz would actually set the ARM clock to 324Mhz. Signed-off-by: Octavian Purdila Signed-off-by: Leonard Crestez Acked-by: Viresh Kumar Reviewed-by: Fabio Estevam Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 9c13f097fd8c..b6edd3ccaa55 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -101,7 +101,8 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it * - Disable pll2_pfd2_396m_clk */ - if (of_machine_is_compatible("fsl,imx6ul")) { + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) { /* * When changing pll1_sw_clk's parent to pll1_sys_clk, * CPU may run at higher than 528MHz, this will lead to @@ -215,7 +216,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_clk; } - if (of_machine_is_compatible("fsl,imx6ul")) { + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) { pll2_bus_clk = clk_get(cpu_dev, "pll2_bus"); secondary_sel_clk = clk_get(cpu_dev, "secondary_sel"); if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) { -- cgit v1.2.3 From e773f5c7e8c6279b2560ae1ca2a1d37cc12fe7c3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 7 Jun 2017 20:13:24 +0200 Subject: cpufreq: exynos5440: Fix inconsistent indenting Fix inconsistent indenting and unneeded white space in assignment. Signed-off-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/exynos5440-cpufreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c index 9180d34cc9fc..b6b369c22272 100644 --- a/drivers/cpufreq/exynos5440-cpufreq.c +++ b/drivers/cpufreq/exynos5440-cpufreq.c @@ -173,12 +173,12 @@ static void exynos_enable_dvfs(unsigned int cur_frequency) /* Enable PSTATE Change Event */ tmp = __raw_readl(dvfs_info->base + XMU_PMUEVTEN); tmp |= (1 << PSTATE_CHANGED_EVTEN_SHIFT); - __raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN); + __raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN); /* Enable PSTATE Change IRQ */ tmp = __raw_readl(dvfs_info->base + XMU_PMUIRQEN); tmp |= (1 << PSTATE_CHANGED_IRQEN_SHIFT); - __raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN); + __raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN); /* Set initial performance index */ cpufreq_for_each_entry(pos, freq_table) @@ -330,7 +330,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) struct resource res; unsigned int cur_frequency; - np = pdev->dev.of_node; + np = pdev->dev.of_node; if (!np) return -ENODEV; -- cgit v1.2.3 From 0370f0f975e5561c658aa86e2c372079e6a425eb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 15 Jun 2017 10:55:59 +0100 Subject: cpufreq: sfi: make freq_table static pointer freq_table can be made static as it does not need to be in global scope. Cleans up sparse warning: "symbol 'freq_table' was not declared. Should it be static?" Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/sfi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c index 992ce6f9abec..3779742f86e3 100644 --- a/drivers/cpufreq/sfi-cpufreq.c +++ b/drivers/cpufreq/sfi-cpufreq.c @@ -24,7 +24,7 @@ #include -struct cpufreq_frequency_table *freq_table; +static struct cpufreq_frequency_table *freq_table; static struct sfi_freq_table_entry *sfi_cpufreq_array; static int num_freq_table_entries; -- cgit v1.2.3 From 51204e0639c49ada02fd823782ad673b6326d748 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 16 Jun 2017 20:03:11 -0700 Subject: x86: do not use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz" cpufreq_quick_get() allows cpufreq drivers to over-ride cpu_khz that is otherwise reported in x86 /proc/cpuinfo "cpu MHz". There are four problems with this scheme, any of them is sufficient justification to delete it. 1. Depending on which cpufreq driver is loaded, the behavior of this field is different. 2. Distros complain that they have to explain to users why and how this field changes. Distros have requested a constant. 3. The two major providers of this information, acpi_cpufreq and intel_pstate, both "get it wrong" in different ways. acpi_cpufreq lies to the user by telling them that they are running at whatever frequency was last requested by software. intel_pstate lies to the user by telling them that they are running at the average frequency computed over an undefined measurement. But an average computed over an undefined interval, is itself, undefined... 4. On modern processors, user space utilities, such as turbostat(1), are more accurate and more precise, while supporing concurrent measurement over arbitrary intervals. Users who have been consulting /proc/cpuinfo to track changing CPU frequency will be dissapointed that it no longer wiggles -- perhaps being unaware of the limitations of the information they have been consuming. Yes, they can change their scripts to look in sysfs cpufreq/scaling_cur_frequency. Here they will find the same data of dubious quality here removed from /proc/cpuinfo. The value in sysfs will be addressed in a subsequent patch to address issues 1-3, above. Issue 4 will remain -- users that really care about accurate frequency information should not be using either proc or sysfs kernel interfaces. They should be using using turbostat(8), or a similar purpose-built analysis tool. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/proc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 6df621ae62a7..218f79825b3c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -2,7 +2,6 @@ #include #include #include -#include /* * Get CPU information for use by the procfs. @@ -76,14 +75,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->microcode) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); - if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get(cpu); - - if (!freq) - freq = cpu_khz; + if (cpu_has(c, X86_FEATURE_TSC)) seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } + cpu_khz / 1000, (cpu_khz % 1000)); /* Cache size */ if (c->x86_cache_size >= 0) -- cgit v1.2.3 From 1a4fe38add8be45eb3873ad756561b9baf6bcaef Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 12 Jun 2017 16:30:27 -0700 Subject: cpufreq: intel_pstate: Remove max/min fractions to limit performance In the current model the max/min perf limits are a fraction of current user space limits to the allowed max_freq or 100% for global limits. This results in wrong ratio limits calculation because of rounding issues for some user space limits. Initially we tried to solve this issue by issue by having more shift bits to increase precision. Still there are isolated cases where we still have error. This can be avoided by using ratios all together. Since the way we get cpuinfo.max_freq is by multiplying scaling factor to max ratio, we can easily keep the max/min ratios in terms of ratios and not fractions. For example: if the max ratio = 36 cpuinfo.max_freq = 36 * 100000 = 3600000 Suppose user space sets a limit of 1200000, then we can calculate max ratio limit as = 36 * 1200000 / 3600000 = 12 This will be correct for any user limits. The other advantage is that, we don't need to do any calculation in the fast path as ratio limit is already calculated via set_policy() callback. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 114 +++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 51 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index eb1158532de3..be0290a1a5e2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -231,10 +231,8 @@ struct global_params { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data - * @min_perf: Minimum capacity limit as a fraction of the maximum - * turbo P-state capacity. - * @max_perf: Maximum capacity limit as a fraction of the maximum - * turbo P-state capacity. + * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios + * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios * @acpi_perf_data: Stores ACPI perf information read from _PSS * @valid_pss_table: Set to true for valid ACPI _PSS entries found * @epp_powersave: Last saved HWP energy performance preference @@ -266,8 +264,8 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; - int32_t min_perf; - int32_t max_perf; + int32_t min_perf_ratio; + int32_t max_perf_ratio; #ifdef CONFIG_ACPI struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; @@ -794,25 +792,32 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_hwp_set(unsigned int cpu) +static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, + int *current_max) { - struct cpudata *cpu_data = all_cpu_data[cpu]; - int min, hw_min, max, hw_max; - u64 value, cap; - s16 epp; + u64 cap; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - hw_min = HWP_LOWEST_PERF(cap); if (global.no_turbo) - hw_max = HWP_GUARANTEED_PERF(cap); + *current_max = HWP_GUARANTEED_PERF(cap); else - hw_max = HWP_HIGHEST_PERF(cap); + *current_max = HWP_HIGHEST_PERF(cap); + + *phy_max = HWP_HIGHEST_PERF(cap); +} + +static void intel_pstate_hwp_set(unsigned int cpu) +{ + struct cpudata *cpu_data = all_cpu_data[cpu]; + int max, min; + u64 value; + s16 epp; + + max = cpu_data->max_perf_ratio; + min = cpu_data->min_perf_ratio; - max = fp_ext_toint(hw_max * cpu_data->max_perf); if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) min = max; - else - min = fp_ext_toint(hw_max * cpu_data->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); @@ -1528,8 +1533,7 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) update_turbo_state(); pstate = intel_pstate_get_base_pstate(cpu); - pstate = max(cpu->pstate.min_pstate, - fp_ext_toint(pstate * cpu->max_perf)); + pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio); intel_pstate_set_pstate(cpu, pstate); } @@ -1695,9 +1699,8 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) int max_pstate = intel_pstate_get_base_pstate(cpu); int min_pstate; - min_pstate = max(cpu->pstate.min_pstate, - fp_ext_toint(max_pstate * cpu->min_perf)); - max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf)); + min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio); + max_pstate = max(min_pstate, cpu->max_perf_ratio); return clamp_t(int, pstate, min_pstate, max_pstate); } @@ -1967,52 +1970,61 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, { int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; + int max_state, turbo_max; - max_policy_perf = div_ext_fp(policy->max, max_freq); - max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); + /* + * HWP needs some special consideration, because on BDX the + * HWP_REQUEST uses abstract value to represent performance + * rather than pure ratios. + */ + if (hwp_active) { + intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + } else { + max_state = intel_pstate_get_base_pstate(cpu); + turbo_max = cpu->pstate.turbo_pstate; + } + + max_policy_perf = max_state * policy->max / max_freq; if (policy->max == policy->min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = div_ext_fp(policy->min, max_freq); + min_policy_perf = max_state * policy->min / max_freq; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } + pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", + policy->cpu, max_state, + min_policy_perf, max_policy_perf); + /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { - cpu->min_perf = min_policy_perf; - cpu->max_perf = max_policy_perf; + cpu->min_perf_ratio = min_policy_perf; + cpu->max_perf_ratio = max_policy_perf; } else { int32_t global_min, global_max; /* Global limits are in percent of the maximum turbo P-state. */ - global_max = percent_ext_fp(global.max_perf_pct); - global_min = percent_ext_fp(global.min_perf_pct); - if (max_freq != cpu->pstate.turbo_freq) { - int32_t turbo_factor; - - turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate, - cpu->pstate.max_pstate); - global_min = mul_ext_fp(global_min, turbo_factor); - global_max = mul_ext_fp(global_max, turbo_factor); - } + global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); + global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); global_min = clamp_t(int32_t, global_min, 0, global_max); - cpu->min_perf = max(min_policy_perf, global_min); - cpu->min_perf = min(cpu->min_perf, max_policy_perf); - cpu->max_perf = min(max_policy_perf, global_max); - cpu->max_perf = max(min_policy_perf, cpu->max_perf); + pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, + global_min, global_max); - /* Make sure min_perf <= max_perf */ - cpu->min_perf = min(cpu->min_perf, cpu->max_perf); - } + cpu->min_perf_ratio = max(min_policy_perf, global_min); + cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf); + cpu->max_perf_ratio = min(max_policy_perf, global_max); + cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio); - cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS); - cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS); + /* Make sure min_perf <= max_perf */ + cpu->min_perf_ratio = min(cpu->min_perf_ratio, + cpu->max_perf_ratio); - pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, - fp_ext_toint(cpu->max_perf * 100), - fp_ext_toint(cpu->min_perf * 100)); + } + pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, + cpu->max_perf_ratio, + cpu->min_perf_ratio); } static int intel_pstate_set_policy(struct cpufreq_policy *policy) @@ -2115,8 +2127,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - cpu->max_perf = int_ext_tofp(1); - cpu->min_perf = 0; + cpu->max_perf_ratio = 0xFF; + cpu->min_perf_ratio = 0; policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; -- cgit v1.2.3 From d50a7d8acd780f54c48703ab1069c2e64a24e4a0 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 12 Jun 2017 17:55:10 +0200 Subject: ARM: cpuidle: Support asymmetric idle definition Some hardware have clusters with different idle states. The current code does not support this and fails as it expects all the idle states to be identical. Because of this, the Mediatek mtk8173 had to create the same idle state for a big.Little system and now the Hisilicon 960 is facing the same situation. Solve this by simply assuming the multiple driver will be needed for all the platforms using the ARM generic cpuidle driver which makes sense because of the different topologies we can support with a single kernel for ARM32 or ARM64. Every CPU has its own driver, so every single CPU can specify in the DT the idle states. This simple approach allows to support the future dynamIQ system, current SMP and HMP. Tested on: - 96boards: Hikey 620 - 96boards: Hikey 960 - 96boards: dragonboard410c - Mediatek 8173 Tested-by: Leo Yan Signed-off-by: Daniel Lezcano Acked-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/Kconfig.arm | 1 + drivers/cpuidle/cpuidle-arm.c | 62 ++++++++++++++++++++++++++----------------- 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 21340e0be73e..f52144808455 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -4,6 +4,7 @@ config ARM_CPUIDLE bool "Generic ARM/ARM64 CPU idle Driver" select DT_IDLE_STATES + select CPU_IDLE_MULTIPLE_DRIVERS help Select this to enable generic cpuidle driver for ARM. It provides a generic idle driver whose idle states are configured diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index f440d385ed34..7080c384ad5d 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev, return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx); } -static struct cpuidle_driver arm_idle_driver = { +static struct cpuidle_driver arm_idle_driver __initdata = { .name = "arm_idle", .owner = THIS_MODULE, /* @@ -80,30 +81,42 @@ static const struct of_device_id arm_idle_state_match[] __initconst = { static int __init arm_idle_init(void) { int cpu, ret; - struct cpuidle_driver *drv = &arm_idle_driver; + struct cpuidle_driver *drv; struct cpuidle_device *dev; - /* - * Initialize idle states data, starting at index 1. - * This driver is DT only, if no DT idle states are detected (ret == 0) - * let the driver initialization fail accordingly since there is no - * reason to initialize the idle driver if only wfi is supported. - */ - ret = dt_init_idle_driver(drv, arm_idle_state_match, 1); - if (ret <= 0) - return ret ? : -ENODEV; - - ret = cpuidle_register_driver(drv); - if (ret) { - pr_err("Failed to register cpuidle driver\n"); - return ret; - } - - /* - * Call arch CPU operations in order to initialize - * idle states suspend back-end specific data - */ for_each_possible_cpu(cpu) { + + drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL); + if (!drv) { + ret = -ENOMEM; + goto out_fail; + } + + drv->cpumask = (struct cpumask *)cpumask_of(cpu); + + /* + * Initialize idle states data, starting at index 1. This + * driver is DT only, if no DT idle states are detected (ret + * == 0) let the driver initialization fail accordingly since + * there is no reason to initialize the idle driver if only + * wfi is supported. + */ + ret = dt_init_idle_driver(drv, arm_idle_state_match, 1); + if (ret <= 0) { + ret = ret ? : -ENODEV; + goto out_fail; + } + + ret = cpuidle_register_driver(drv); + if (ret) { + pr_err("Failed to register cpuidle driver\n"); + goto out_fail; + } + + /* + * Call arch CPU operations in order to initialize + * idle states suspend back-end specific data + */ ret = arm_cpuidle_init(cpu); /* @@ -141,10 +154,11 @@ out_fail: dev = per_cpu(cpuidle_devices, cpu); cpuidle_unregister_device(dev); kfree(dev); + drv = cpuidle_get_driver(); + cpuidle_unregister_driver(drv); + kfree(drv); } - cpuidle_unregister_driver(drv); - return ret; } device_initcall(arm_idle_init); -- cgit v1.2.3 From f8475cef90082bf0902ddab106112de130d90395 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 23 Jun 2017 22:11:52 -0700 Subject: x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF The goal of this change is to give users a uniform and meaningful result when they read /sys/...cpufreq/scaling_cur_freq on modern x86 hardware, as compared to what they get today. Modern x86 processors include the hardware needed to accurately calculate frequency over an interval -- APERF, MPERF, and the TSC. Here we provide an x86 routine to make this calculation on supported hardware, and use it in preference to any driver driver-specific cpufreq_driver.get() routine. MHz is computed like so: MHz = base_MHz * delta_APERF / delta_MPERF MHz is the average frequency of the busy processor over a measurement interval. The interval is defined to be the time between successive invocations of aperfmperf_khz_on_cpu(), which are expected to to happen on-demand when users read sysfs attribute cpufreq/scaling_cur_freq. As with previous methods of calculating MHz, idle time is excluded. base_MHz above is from TSC calibration global "cpu_khz". This x86 native method to calculate MHz returns a meaningful result no matter if P-states are controlled by hardware or firmware and/or if the Linux cpufreq sub-system is or is-not installed. When this routine is invoked more frequently, the measurement interval becomes shorter. However, the code limits re-computation to 10ms intervals so that average frequency remains meaningful. Discerning users are encouraged to take advantage of the turbostat(8) utility, which can gracefully handle concurrent measurement intervals of arbitrary length. Signed-off-by: Len Brown Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/aperfmperf.c | 79 ++++++++++++++++++++++++++++++++++++++++ drivers/cpufreq/cpufreq.c | 12 +++++- include/linux/cpufreq.h | 2 + 4 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/aperfmperf.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 52000010c62e..cdf82492b770 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -21,6 +21,7 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o +obj-$(CONFIG_CPU_FREQ) += aperfmperf.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c new file mode 100644 index 000000000000..d869c8671e36 --- /dev/null +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -0,0 +1,79 @@ +/* + * x86 APERF/MPERF KHz calculation for + * /sys/.../cpufreq/scaling_cur_freq + * + * Copyright (C) 2017 Intel Corp. + * Author: Len Brown + * + * This file is licensed under GPLv2. + */ + +#include +#include +#include +#include + +struct aperfmperf_sample { + unsigned int khz; + unsigned long jiffies; + u64 aperf; + u64 mperf; +}; + +static DEFINE_PER_CPU(struct aperfmperf_sample, samples); + +/* + * aperfmperf_snapshot_khz() + * On the current CPU, snapshot APERF, MPERF, and jiffies + * unless we already did it within 10ms + * calculate kHz, save snapshot + */ +static void aperfmperf_snapshot_khz(void *dummy) +{ + u64 aperf, aperf_delta; + u64 mperf, mperf_delta; + struct aperfmperf_sample *s = this_cpu_ptr(&samples); + + /* Don't bother re-computing within 10 ms */ + if (time_before(jiffies, s->jiffies + HZ/100)) + return; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + aperf_delta = aperf - s->aperf; + mperf_delta = mperf - s->mperf; + + /* + * There is no architectural guarantee that MPERF + * increments faster than we can read it. + */ + if (mperf_delta == 0) + return; + + /* + * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then + * khz = (cpu_khz * aperf_delta) / mperf_delta + */ + if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta) + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); + else /* khz = aperf_delta / (mperf_delta / cpu_khz) */ + s->khz = div64_u64(aperf_delta, + div64_u64(mperf_delta, cpu_khz)); + s->jiffies = jiffies; + s->aperf = aperf; + s->mperf = mperf; +} + +unsigned int arch_freq_get_on_cpu(int cpu) +{ + if (!cpu_khz) + return 0; + + if (!static_cpu_has(X86_FEATURE_APERFMPERF)) + return 0; + + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); + + return per_cpu(samples.khz, cpu); +} diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26b643d57847..6e7424d12de9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -632,11 +632,21 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); +__weak unsigned int arch_freq_get_on_cpu(int cpu) +{ + return 0; +} + static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf) { ssize_t ret; + unsigned int freq; - if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + freq = arch_freq_get_on_cpu(policy->cpu); + if (freq) + ret = sprintf(buf, "%u\n", freq); + else if (cpufreq_driver && cpufreq_driver->setpolicy && + cpufreq_driver->get) ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); else ret = sprintf(buf, "%u\n", policy->cur); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..905117bd5012 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -883,6 +883,8 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) } #endif +extern unsigned int arch_freq_get_on_cpu(int cpu); + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; -- cgit v1.2.3 From 62611cb912f7cb08ef7815780bcc20a09abedd20 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 23 Jun 2017 22:11:53 -0700 Subject: intel_pstate: delete scheduler hook in HWP mode The cpufreq/scaling_cur_freq sysfs attribute is now provided by shared x86 cpufreq code on modern x86 systems, including all systems supported by the intel_pstate driver. In HWP mode, maintaining that value was the sole purpose of the scheduler hook, intel_pstate_update_util_hwp(), so it can now be removed. Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be0290a1a5e2..b00a45595c89 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1736,16 +1736,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate) fp_toint(cpu->iowait_boost * 100)); } -static void intel_pstate_update_util_hwp(struct update_util_data *data, - u64 time, unsigned int flags) -{ - struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; - - if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL) - intel_pstate_sample(cpu, time); -} - static void intel_pstate_update_util_pid(struct update_util_data *data, u64 time, unsigned int flags) { @@ -1937,6 +1927,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; + if (hwp_active) + return; + if (cpu->update_util_set) return; @@ -2570,7 +2563,6 @@ static int __init intel_pstate_init(void) } else { hwp_active++; intel_pstate.attr = hwp_cpufreq_attrs; - pstate_funcs.update_util = intel_pstate_update_util_hwp; goto hwp_cpu_matched; } } else { -- cgit v1.2.3 From 82b4e03e01bc8def546b52707962809f04ae5c7a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 23 Jun 2017 22:11:54 -0700 Subject: intel_pstate: skip scheduler hook when in "performance" mode When the governor is set to "performance", intel_pstate does not need the scheduler hook for doing any calculations. Under these conditions, its only purpose is to continue to maintain cpufreq/scaling_cur_freq. The cpufreq/scaling_cur_freq sysfs attribute is now provided by shared x86 cpufreq code on modern x86 systems, including all systems supported by the intel_pstate driver. So in "performance" governor mode, the scheduler hook can be skipped. This applies to both in Software and Hardware P-state control modes. Suggested-by: Srinivas Pandruvada Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b00a45595c89..1772d309bea9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2044,10 +2044,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) */ intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_max_within_limits(cpu); + } else { + intel_pstate_set_update_util_hook(policy->cpu); } - intel_pstate_set_update_util_hook(policy->cpu); - if (hwp_active) intel_pstate_hwp_set(policy->cpu); -- cgit v1.2.3 From 5209654a46ee71137ad9b06da99d4ef2794475af Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 7 Jun 2017 10:19:46 -0500 Subject: x86/ACPI/cstate: Allow ACPI C1 FFH MWAIT use on AMD systems AMD systems support the Monitor/Mwait instructions and these can be used for ACPI C1 in the same way as on Intel systems. Three things are needed: 1) This patch. 2) BIOS that declares a C1 state in _CST to use FFH, with correct values. 3) CPUID_Fn00000005_EDX is non-zero on the system. The BIOS on AMD systems have historically not defined a C1 state in _CST, so the acpi_idle driver uses HALT for ACPI C1. Currently released systems have CPUID_Fn00000005_EDX as reserved/RAZ. If a BIOS is released for these systems that requests a C1 state with FFH, the FFH implementation in Linux will fail since CPUID_Fn00000005_EDX is 0. The acpi_idle driver will then fallback to using HALT for ACPI C1. Future systems are expected to have non-zero CPUID_Fn00000005_EDX and BIOS support for using FFH for ACPI C1. Allow ffh_cstate_init() to succeed on AMD systems. Tested on Fam15h and Fam17h systems. Signed-off-by: Yazen Ghannam Acked-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cstate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8233a630280f..dde437f5d14f 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -167,7 +167,8 @@ static int __init ffh_cstate_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_INTEL) + if (c->x86_vendor != X86_VENDOR_INTEL && + c->x86_vendor != X86_VENDOR_AMD) return -1; cpu_cstate_entry = alloc_percpu(struct cstate_entry); -- cgit v1.2.3 From 73808d0fd26b3d3f0f44cc7c469ad1d3c1b570b8 Mon Sep 17 00:00:00 2001 From: "Prakash, Prashanth" Date: Thu, 11 May 2017 16:39:44 -0600 Subject: cpufreq / CPPC: Initialize policy->min to lowest nonlinear performance Description of Lowest Perfomance in ACPI 6.1 specification states: "Lowest Performance is the absolute lowest performance level of the platform. Selecting a performance level lower than the lowest nonlinear performance level may actually cause an efficiency penalty, but should reduce the instantaneous power consumption of the processor. In traditional terms, this represents the T-state range of performance levels." Set the default value of policy->min to Lowest Nonlinear Performance to avoid any potential efficiency penalty. Signed-off-by: Prashanth Prakash Acked-by: Viresh Kumar Acked-by: Alexey Klimov Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index e82bb3c30b92..10be285c9055 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -144,10 +144,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cppc_dmi_max_khz = cppc_get_dmi_max_khz(); - policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf; + /* + * Set min to lowest nonlinear perf to avoid any efficiency penalty (see + * Section 8.4.7.1.1.5 of ACPI 6.1 spec) + */ + policy->min = cpu->perf_caps.lowest_nonlinear_perf * cppc_dmi_max_khz / + cpu->perf_caps.highest_perf; policy->max = cppc_dmi_max_khz; - policy->cpuinfo.min_freq = policy->min; - policy->cpuinfo.max_freq = policy->max; + + /* + * Set cpuinfo.min_freq to Lowest to make the full range of performance + * available if userspace wants to use any perf between lowest & lowest + * nonlinear perf + */ + policy->cpuinfo.min_freq = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / + cpu->perf_caps.highest_perf; + policy->cpuinfo.max_freq = cppc_dmi_max_khz; + policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num); policy->shared_type = cpu->shared_type; -- cgit v1.2.3 From 654d08a42a5660514df5a5a6ed4dc1b0c978a0a3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 9 Jun 2017 12:29:20 -0700 Subject: intel_idle: Use more common logging style Remove #define PREFIX and add #define pr_fmt to use more common logging. Miscellanea: o Add missing newline to format o Convert a single printk without KERN_ to pr_info Signed-off-by: Joe Perches Acked-by: Jacob Pan Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 216d7ec88c0c..c2ae819a871c 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -51,6 +51,8 @@ /* un-comment DEBUG to enable pr_debug() statements */ #define DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -65,7 +67,6 @@ #include #define INTEL_IDLE_VERSION "0.4.1" -#define PREFIX "intel_idle: " static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", @@ -1111,7 +1112,7 @@ static int __init intel_idle_probe(void) const struct x86_cpu_id *id; if (max_cstate == 0) { - pr_debug(PREFIX "disabled\n"); + pr_debug("disabled\n"); return -EPERM; } @@ -1119,8 +1120,8 @@ static int __init intel_idle_probe(void) if (!id) { if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6) - pr_debug(PREFIX "does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); + pr_debug("does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -1134,13 +1135,13 @@ static int __init intel_idle_probe(void) !mwait_substates) return -ENODEV; - pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates); + pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; cpuidle_state_table = icpu->state_table; - pr_debug(PREFIX "v" INTEL_IDLE_VERSION - " model 0x%X\n", boot_cpu_data.x86_model); + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", + boot_cpu_data.x86_model); return 0; } @@ -1340,8 +1341,7 @@ static void __init intel_idle_cpuidle_driver_init(void) break; if (cstate + 1 > max_cstate) { - printk(PREFIX "max_cstate %d reached\n", - max_cstate); + pr_info("max_cstate %d reached\n", max_cstate); break; } @@ -1358,8 +1358,8 @@ static void __init intel_idle_cpuidle_driver_init(void) /* if state marked as disabled, skip it */ if (cpuidle_state_table[cstate].disabled != 0) { - pr_debug(PREFIX "state %s is disabled", - cpuidle_state_table[cstate].name); + pr_debug("state %s is disabled\n", + cpuidle_state_table[cstate].name); continue; } @@ -1395,7 +1395,7 @@ static int intel_idle_cpu_init(unsigned int cpu) dev->cpu = cpu; if (cpuidle_register_device(dev)) { - pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu); + pr_debug("cpuidle_register_device %d failed!\n", cpu); return -EIO; } @@ -1447,8 +1447,8 @@ static int __init intel_idle_init(void) retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { struct cpuidle_driver *drv = cpuidle_get_driver(); - printk(KERN_DEBUG PREFIX "intel_idle yielding to %s", - drv ? drv->name : "none"); + printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), + drv ? drv->name : "none"); goto init_driver_fail; } @@ -1460,8 +1460,8 @@ static int __init intel_idle_init(void) if (retval < 0) goto hp_setup_fail; - pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n", - lapic_timer_reliable_states); + pr_debug("lapic_timer_reliable_states 0x%x\n", + lapic_timer_reliable_states); return 0; -- cgit v1.2.3 From 3ed09c94580de9d5b18cc35d1f97e9f24cd9233b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 26 Jun 2017 15:38:15 +1000 Subject: cpuidle: menu: allow state 0 to be disabled The menu driver does not allow state0 to be disabled completely. If it is disabled but other enabled states don't meet latency requirements, it is still used. Fix this by starting with the first enabled idle state. Fall back to state 0 if no idle states are enabled (arguably this should be -EINVAL if it is attempted, but this is the minimal fix). Acked-by: Gautham R. Shenoy Signed-off-by: Nicholas Piggin Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b2330fd69e34..61b64c2b2cb8 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -286,6 +286,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) struct device *device = get_cpu_device(dev->cpu); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; + int first_idx; + int idx; unsigned int interactivity_req; unsigned int expected_interval; unsigned long nr_iowaiters, cpu_load; @@ -335,11 +337,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (data->next_timer_us > polling_threshold && latency_req > s->exit_latency && !s->disabled && !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable) - data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + first_idx = CPUIDLE_DRIVER_STATE_START; else - data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1; + first_idx = CPUIDLE_DRIVER_STATE_START - 1; } else { - data->last_state_idx = CPUIDLE_DRIVER_STATE_START; + first_idx = 0; } /* @@ -359,20 +361,28 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * Find the idle state with the lowest power while satisfying * our constraints. */ - for (i = data->last_state_idx + 1; i < drv->state_count; i++) { + idx = -1; + for (i = first_idx; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; if (s->disabled || su->disable) continue; + if (idx == -1) + idx = i; /* first enabled state */ if (s->target_residency > data->predicted_us) break; if (s->exit_latency > latency_req) break; - data->last_state_idx = i; + idx = i; } + if (idx == -1) + idx = 0; /* No states enabled. Must use 0. */ + + data->last_state_idx = idx; + return data->last_state_idx; } -- cgit v1.2.3 From fab24dcc395637557a7988a867e7b3a5823917a9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 Jun 2017 01:47:56 +0200 Subject: cpufreq: intel_pstate: Clean up after performance governor changes After commit 82b4e03e01bc (intel_pstate: skip scheduler hook when in "performance" mode) get_target_pstate_use_performance() and get_target_pstate_use_cpu_load() are never called if scaling_governor is "performance", so drop the CPUFREQ_POLICY_PERFORMANCE checks from them as they will never trigger anyway. Moreover, the documentation needs to be updated to reflect the change made by the above commit, so do that too. Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- Documentation/admin-guide/pm/intel_pstate.rst | 6 ++---- drivers/cpufreq/intel_pstate.c | 6 ------ 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 33d703989ea8..1d6249825efc 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -157,10 +157,8 @@ Without HWP, this P-state selection algorithm is always the same regardless of the processor model and platform configuration. It selects the maximum P-state it is allowed to use, subject to limits set via -``sysfs``, every time the P-state selection computations are carried out by the -driver's utilization update callback for the given CPU (that does not happen -more often than every 10 ms), but the hardware configuration will not be changed -if the new P-state is the same as the current one. +``sysfs``, every time the driver configuration for the given CPU is updated +(e.g. via ``sysfs``). This is the default P-state selection algorithm if the :c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1772d309bea9..756483251832 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1620,9 +1620,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) int32_t busy_frac, boost; int target, avg_pstate; - if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) - return cpu->pstate.turbo_pstate; - busy_frac = div_fp(sample->mperf, sample->tsc); boost = cpu->iowait_boost; @@ -1659,9 +1656,6 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) int32_t perf_scaled, max_pstate, current_pstate, sample_ratio; u64 duration_ns; - if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) - return cpu->pstate.turbo_pstate; - /* * perf_scaled is the ratio of the average P-state during the last * sampling period to the P-state requested last time (in percent). -- cgit v1.2.3 From 8183003e48cbb9d6d276b6330eb5edecf95830f3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 Jun 2017 01:49:44 +0200 Subject: cpufreq: Update scaling_cur_freq documentation Commit f8475cef9008 "x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF" modified the way the scaling_cur_freq cpufreq policy attribute in sysfs is handled on contemporary Intel-based x86 systems, so update the documentation to reflect that change. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpufreq.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 09aa2e949787..463cf7e73db8 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -269,16 +269,16 @@ are the following: ``scaling_cur_freq`` Current frequency of all of the CPUs belonging to this policy (in kHz). - For the majority of scaling drivers, this is the frequency of the last - P-state requested by the driver from the hardware using the scaling + In the majority of cases, this is the frequency of the last P-state + requested by the scaling driver from the hardware using the scaling interface provided by it, which may or may not reflect the frequency the CPU is actually running at (due to hardware design and other limitations). - Some scaling drivers (e.g. |intel_pstate|) attempt to provide - information more precisely reflecting the current CPU frequency through - this attribute, but that still may not be the exact current CPU - frequency as seen by the hardware at the moment. + Some architectures (e.g. ``x86``) may attempt to provide information + more precisely reflecting the current CPU frequency through this + attribute, but that still may not be the exact current CPU frequency as + seen by the hardware at the moment. ``scaling_driver`` The scaling driver currently in use. -- cgit v1.2.3