From a0df77348ab37687c9513bc2dd772cbc6eadbe63 Mon Sep 17 00:00:00 2001
From: Tao Wang <kevin.wangtao@hisilicon.com>
Date: Tue, 23 May 2017 16:13:18 +0800
Subject: cpufreq: dt: Add support for hi3660

Add the compatible string for supporting the generic device tree cpufreq-dt
driver on Hisilicon's 3660 SoC.

Signed-off-by: Tao Wang <kevin.wangtao@hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 921b4a6c3d16..1c262923fe58 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -31,6 +31,7 @@ static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "arm,integrator-ap", },
 	{ .compatible = "arm,integrator-cp", },
 
+	{ .compatible = "hisilicon,hi3660", },
 	{ .compatible = "hisilicon,hi6220", },
 
 	{ .compatible = "fsl,imx27", },
-- 
cgit v1.2.3


From 3fafb4e77279ed375077e95f54ee687b481c24d2 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@nxp.com>
Date: Tue, 30 May 2017 18:57:18 +0300
Subject: cpufreq: imx6q: imx6ull should use the same flow as imx6ul

This fixes an issue with imx6ull where setting the frequency to 528Mhz
would actually set the ARM clock to 324Mhz.

Signed-off-by: Octavian Purdila <octavian.purdila@nxp.com>
Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 9c13f097fd8c..b6edd3ccaa55 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -101,7 +101,8 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 	 *  - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it
 	 *  - Disable pll2_pfd2_396m_clk
 	 */
-	if (of_machine_is_compatible("fsl,imx6ul")) {
+	if (of_machine_is_compatible("fsl,imx6ul") ||
+	    of_machine_is_compatible("fsl,imx6ull")) {
 		/*
 		 * When changing pll1_sw_clk's parent to pll1_sys_clk,
 		 * CPU may run at higher than 528MHz, this will lead to
@@ -215,7 +216,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 		goto put_clk;
 	}
 
-	if (of_machine_is_compatible("fsl,imx6ul")) {
+	if (of_machine_is_compatible("fsl,imx6ul") ||
+	    of_machine_is_compatible("fsl,imx6ull")) {
 		pll2_bus_clk = clk_get(cpu_dev, "pll2_bus");
 		secondary_sel_clk = clk_get(cpu_dev, "secondary_sel");
 		if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) {
-- 
cgit v1.2.3


From e773f5c7e8c6279b2560ae1ca2a1d37cc12fe7c3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 7 Jun 2017 20:13:24 +0200
Subject: cpufreq: exynos5440: Fix inconsistent indenting

Fix inconsistent indenting and unneeded white space in assignment.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/exynos5440-cpufreq.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/exynos5440-cpufreq.c b/drivers/cpufreq/exynos5440-cpufreq.c
index 9180d34cc9fc..b6b369c22272 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -173,12 +173,12 @@ static void exynos_enable_dvfs(unsigned int cur_frequency)
 	/* Enable PSTATE Change Event */
 	tmp = __raw_readl(dvfs_info->base + XMU_PMUEVTEN);
 	tmp |= (1 << PSTATE_CHANGED_EVTEN_SHIFT);
-	 __raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN);
+	__raw_writel(tmp, dvfs_info->base + XMU_PMUEVTEN);
 
 	/* Enable PSTATE Change IRQ */
 	tmp = __raw_readl(dvfs_info->base + XMU_PMUIRQEN);
 	tmp |= (1 << PSTATE_CHANGED_IRQEN_SHIFT);
-	 __raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN);
+	__raw_writel(tmp, dvfs_info->base + XMU_PMUIRQEN);
 
 	/* Set initial performance index */
 	cpufreq_for_each_entry(pos, freq_table)
@@ -330,7 +330,7 @@ static int exynos_cpufreq_probe(struct platform_device *pdev)
 	struct resource res;
 	unsigned int cur_frequency;
 
-	np =  pdev->dev.of_node;
+	np = pdev->dev.of_node;
 	if (!np)
 		return -ENODEV;
 
-- 
cgit v1.2.3


From 0370f0f975e5561c658aa86e2c372079e6a425eb Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Jun 2017 10:55:59 +0100
Subject: cpufreq: sfi: make freq_table static

pointer freq_table can be made static as it does not need to be in
global scope.

Cleans up sparse warning:
"symbol 'freq_table' was not declared. Should it be static?"

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sfi-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
index 992ce6f9abec..3779742f86e3 100644
--- a/drivers/cpufreq/sfi-cpufreq.c
+++ b/drivers/cpufreq/sfi-cpufreq.c
@@ -24,7 +24,7 @@
 
 #include <asm/msr.h>
 
-struct cpufreq_frequency_table *freq_table;
+static struct cpufreq_frequency_table *freq_table;
 static struct sfi_freq_table_entry *sfi_cpufreq_array;
 static int num_freq_table_entries;
 
-- 
cgit v1.2.3


From 51204e0639c49ada02fd823782ad673b6326d748 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 16 Jun 2017 20:03:11 -0700
Subject: x86: do not use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz"

cpufreq_quick_get() allows cpufreq drivers to over-ride cpu_khz
that is otherwise reported in x86 /proc/cpuinfo "cpu MHz".

There are four problems with this scheme,
any of them is sufficient justification to delete it.

 1. Depending on which cpufreq driver is loaded, the behavior
    of this field is different.

 2. Distros complain that they have to explain to users
    why and how this field changes.  Distros have requested a constant.

 3. The two major providers of this information, acpi_cpufreq
    and intel_pstate, both "get it wrong" in different ways.

    acpi_cpufreq lies to the user by telling them that
    they are running at whatever frequency was last
    requested by software.

    intel_pstate lies to the user by telling them that
    they are running at the average frequency computed
    over an undefined measurement.  But an average computed
    over an undefined interval, is itself, undefined...

 4. On modern processors, user space utilities, such as
    turbostat(1), are more accurate and more precise, while
    supporing concurrent measurement over arbitrary intervals.

Users who have been consulting /proc/cpuinfo to
track changing CPU frequency will be dissapointed that
it no longer wiggles -- perhaps being unaware of the
limitations of the information they have been consuming.

Yes, they can change their scripts to look in sysfs
cpufreq/scaling_cur_frequency.  Here they will find the same
data of dubious quality here removed from /proc/cpuinfo.
The value in sysfs will be addressed in a subsequent patch
to address issues 1-3, above.

Issue 4 will remain -- users that really care about
accurate frequency information should not be using either
proc or sysfs kernel interfaces.
They should be using using turbostat(8), or a similar
purpose-built analysis tool.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/cpu/proc.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 6df621ae62a7..218f79825b3c 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -2,7 +2,6 @@
 #include <linux/timex.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
-#include <linux/cpufreq.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -76,14 +75,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (c->microcode)
 		seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
-	if (cpu_has(c, X86_FEATURE_TSC)) {
-		unsigned int freq = cpufreq_quick_get(cpu);
-
-		if (!freq)
-			freq = cpu_khz;
+	if (cpu_has(c, X86_FEATURE_TSC))
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			   freq / 1000, (freq % 1000));
-	}
+			   cpu_khz / 1000, (cpu_khz % 1000));
 
 	/* Cache size */
 	if (c->x86_cache_size >= 0)
-- 
cgit v1.2.3


From 1a4fe38add8be45eb3873ad756561b9baf6bcaef Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 12 Jun 2017 16:30:27 -0700
Subject: cpufreq: intel_pstate: Remove max/min fractions to limit performance

In the current model the max/min perf limits are a fraction of current
user space limits to the allowed max_freq or 100% for global limits.
This results in wrong ratio limits calculation because of rounding
issues for some user space limits.

Initially we tried to solve this issue by issue by having more shift
bits to increase precision. Still there are isolated cases where we still
have error.

This can be avoided by using ratios all together. Since the way we get
cpuinfo.max_freq is by multiplying scaling factor to max ratio, we can
easily keep the max/min ratios in terms of ratios and not fractions.

For example:
if the max ratio = 36
cpuinfo.max_freq = 36 * 100000 = 3600000

Suppose user space sets a limit of 1200000, then we can calculate
max ratio limit as
= 36 * 1200000 / 3600000
= 12
This will be correct for any user limits.

The other advantage is that, we don't need to do any calculation in the
fast path as ratio limit is already calculated via set_policy() callback.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 114 +++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 51 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb1158532de3..be0290a1a5e2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -231,10 +231,8 @@ struct global_params {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
- * @min_perf:		Minimum capacity limit as a fraction of the maximum
- *			turbo P-state capacity.
- * @max_perf:		Maximum capacity limit as a fraction of the maximum
- *			turbo P-state capacity.
+ * @min_perf_ratio:	Minimum capacity in terms of PERF or HWP ratios
+ * @max_perf_ratio:	Maximum capacity in terms of PERF or HWP ratios
  * @acpi_perf_data:	Stores ACPI perf information read from _PSS
  * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  * @epp_powersave:	Last saved HWP energy performance preference
@@ -266,8 +264,8 @@ struct cpudata {
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
-	int32_t	min_perf;
-	int32_t	max_perf;
+	int32_t	min_perf_ratio;
+	int32_t	max_perf_ratio;
 #ifdef CONFIG_ACPI
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
@@ -794,25 +792,32 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 	NULL,
 };
 
-static void intel_pstate_hwp_set(unsigned int cpu)
+static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
+				     int *current_max)
 {
-	struct cpudata *cpu_data = all_cpu_data[cpu];
-	int min, hw_min, max, hw_max;
-	u64 value, cap;
-	s16 epp;
+	u64 cap;
 
 	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
-	hw_min = HWP_LOWEST_PERF(cap);
 	if (global.no_turbo)
-		hw_max = HWP_GUARANTEED_PERF(cap);
+		*current_max = HWP_GUARANTEED_PERF(cap);
 	else
-		hw_max = HWP_HIGHEST_PERF(cap);
+		*current_max = HWP_HIGHEST_PERF(cap);
+
+	*phy_max = HWP_HIGHEST_PERF(cap);
+}
+
+static void intel_pstate_hwp_set(unsigned int cpu)
+{
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+	int max, min;
+	u64 value;
+	s16 epp;
+
+	max = cpu_data->max_perf_ratio;
+	min = cpu_data->min_perf_ratio;
 
-	max = fp_ext_toint(hw_max * cpu_data->max_perf);
 	if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
 		min = max;
-	else
-		min = fp_ext_toint(hw_max * cpu_data->min_perf);
 
 	rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
@@ -1528,8 +1533,7 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 	update_turbo_state();
 	pstate = intel_pstate_get_base_pstate(cpu);
-	pstate = max(cpu->pstate.min_pstate,
-		     fp_ext_toint(pstate * cpu->max_perf));
+	pstate = max(cpu->pstate.min_pstate, cpu->max_perf_ratio);
 	intel_pstate_set_pstate(cpu, pstate);
 }
 
@@ -1695,9 +1699,8 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 	int max_pstate = intel_pstate_get_base_pstate(cpu);
 	int min_pstate;
 
-	min_pstate = max(cpu->pstate.min_pstate,
-			 fp_ext_toint(max_pstate * cpu->min_perf));
-	max_pstate = max(min_pstate, fp_ext_toint(max_pstate * cpu->max_perf));
+	min_pstate = max(cpu->pstate.min_pstate, cpu->min_perf_ratio);
+	max_pstate = max(min_pstate, cpu->max_perf_ratio);
 	return clamp_t(int, pstate, min_pstate, max_pstate);
 }
 
@@ -1967,52 +1970,61 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 {
 	int max_freq = intel_pstate_get_max_freq(cpu);
 	int32_t max_policy_perf, min_policy_perf;
+	int max_state, turbo_max;
 
-	max_policy_perf = div_ext_fp(policy->max, max_freq);
-	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
+	/*
+	 * HWP needs some special consideration, because on BDX the
+	 * HWP_REQUEST uses abstract value to represent performance
+	 * rather than pure ratios.
+	 */
+	if (hwp_active) {
+		intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
+	} else {
+		max_state = intel_pstate_get_base_pstate(cpu);
+		turbo_max = cpu->pstate.turbo_pstate;
+	}
+
+	max_policy_perf = max_state * policy->max / max_freq;
 	if (policy->max == policy->min) {
 		min_policy_perf = max_policy_perf;
 	} else {
-		min_policy_perf = div_ext_fp(policy->min, max_freq);
+		min_policy_perf = max_state * policy->min / max_freq;
 		min_policy_perf = clamp_t(int32_t, min_policy_perf,
 					  0, max_policy_perf);
 	}
 
+	pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
+		 policy->cpu, max_state,
+		 min_policy_perf, max_policy_perf);
+
 	/* Normalize user input to [min_perf, max_perf] */
 	if (per_cpu_limits) {
-		cpu->min_perf = min_policy_perf;
-		cpu->max_perf = max_policy_perf;
+		cpu->min_perf_ratio = min_policy_perf;
+		cpu->max_perf_ratio = max_policy_perf;
 	} else {
 		int32_t global_min, global_max;
 
 		/* Global limits are in percent of the maximum turbo P-state. */
-		global_max = percent_ext_fp(global.max_perf_pct);
-		global_min = percent_ext_fp(global.min_perf_pct);
-		if (max_freq != cpu->pstate.turbo_freq) {
-			int32_t turbo_factor;
-
-			turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
-						  cpu->pstate.max_pstate);
-			global_min = mul_ext_fp(global_min, turbo_factor);
-			global_max = mul_ext_fp(global_max, turbo_factor);
-		}
+		global_max = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
+		global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
 		global_min = clamp_t(int32_t, global_min, 0, global_max);
 
-		cpu->min_perf = max(min_policy_perf, global_min);
-		cpu->min_perf = min(cpu->min_perf, max_policy_perf);
-		cpu->max_perf = min(max_policy_perf, global_max);
-		cpu->max_perf = max(min_policy_perf, cpu->max_perf);
+		pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu,
+			 global_min, global_max);
 
-		/* Make sure min_perf <= max_perf */
-		cpu->min_perf = min(cpu->min_perf, cpu->max_perf);
-	}
+		cpu->min_perf_ratio = max(min_policy_perf, global_min);
+		cpu->min_perf_ratio = min(cpu->min_perf_ratio, max_policy_perf);
+		cpu->max_perf_ratio = min(max_policy_perf, global_max);
+		cpu->max_perf_ratio = max(min_policy_perf, cpu->max_perf_ratio);
 
-	cpu->max_perf = round_up(cpu->max_perf, EXT_FRAC_BITS);
-	cpu->min_perf = round_up(cpu->min_perf, EXT_FRAC_BITS);
+		/* Make sure min_perf <= max_perf */
+		cpu->min_perf_ratio = min(cpu->min_perf_ratio,
+					  cpu->max_perf_ratio);
 
-	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
-		 fp_ext_toint(cpu->max_perf * 100),
-		 fp_ext_toint(cpu->min_perf * 100));
+	}
+	pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu,
+		 cpu->max_perf_ratio,
+		 cpu->min_perf_ratio);
 }
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
@@ -2115,8 +2127,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	cpu->max_perf = int_ext_tofp(1);
-	cpu->min_perf = 0;
+	cpu->max_perf_ratio = 0xFF;
+	cpu->min_perf_ratio = 0;
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
-- 
cgit v1.2.3


From d50a7d8acd780f54c48703ab1069c2e64a24e4a0 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 12 Jun 2017 17:55:10 +0200
Subject: ARM: cpuidle: Support asymmetric idle definition

Some hardware have clusters with different idle states. The current code does
not support this and fails as it expects all the idle states to be identical.

Because of this, the Mediatek mtk8173 had to create the same idle state for a
big.Little system and now the Hisilicon 960 is facing the same situation.

Solve this by simply assuming the multiple driver will be needed for all the
platforms using the ARM generic cpuidle driver which makes sense because of the
different topologies we can support with a single kernel for ARM32 or ARM64.

Every CPU has its own driver, so every single CPU can specify in the DT the
idle states.

This simple approach allows to support the future dynamIQ system, current SMP
and HMP.

Tested on:
 - 96boards: Hikey 620
 - 96boards: Hikey 960
 - 96boards: dragonboard410c
 - Mediatek 8173

Tested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/Kconfig.arm   |  1 +
 drivers/cpuidle/cpuidle-arm.c | 62 ++++++++++++++++++++++++++-----------------
 2 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 21340e0be73e..f52144808455 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -4,6 +4,7 @@
 config ARM_CPUIDLE
         bool "Generic ARM/ARM64 CPU idle Driver"
         select DT_IDLE_STATES
+	select CPU_IDLE_MULTIPLE_DRIVERS
         help
           Select this to enable generic cpuidle driver for ARM.
           It provides a generic idle driver whose idle states are configured
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index f440d385ed34..7080c384ad5d 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/slab.h>
+#include <linux/topology.h>
 
 #include <asm/cpuidle.h>
 
@@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,
 	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
 }
 
-static struct cpuidle_driver arm_idle_driver = {
+static struct cpuidle_driver arm_idle_driver __initdata = {
 	.name = "arm_idle",
 	.owner = THIS_MODULE,
 	/*
@@ -80,30 +81,42 @@ static const struct of_device_id arm_idle_state_match[] __initconst = {
 static int __init arm_idle_init(void)
 {
 	int cpu, ret;
-	struct cpuidle_driver *drv = &arm_idle_driver;
+	struct cpuidle_driver *drv;
 	struct cpuidle_device *dev;
 
-	/*
-	 * Initialize idle states data, starting at index 1.
-	 * This driver is DT only, if no DT idle states are detected (ret == 0)
-	 * let the driver initialization fail accordingly since there is no
-	 * reason to initialize the idle driver if only wfi is supported.
-	 */
-	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
-	if (ret <= 0)
-		return ret ? : -ENODEV;
-
-	ret = cpuidle_register_driver(drv);
-	if (ret) {
-		pr_err("Failed to register cpuidle driver\n");
-		return ret;
-	}
-
-	/*
-	 * Call arch CPU operations in order to initialize
-	 * idle states suspend back-end specific data
-	 */
 	for_each_possible_cpu(cpu) {
+
+		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);
+		if (!drv) {
+			ret = -ENOMEM;
+			goto out_fail;
+		}
+
+		drv->cpumask = (struct cpumask *)cpumask_of(cpu);
+
+		/*
+		 * Initialize idle states data, starting at index 1.  This
+		 * driver is DT only, if no DT idle states are detected (ret
+		 * == 0) let the driver initialization fail accordingly since
+		 * there is no reason to initialize the idle driver if only
+		 * wfi is supported.
+		 */
+		ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
+		if (ret <= 0) {
+			ret = ret ? : -ENODEV;
+			goto out_fail;
+		}
+
+		ret = cpuidle_register_driver(drv);
+		if (ret) {
+			pr_err("Failed to register cpuidle driver\n");
+			goto out_fail;
+		}
+
+		/*
+		 * Call arch CPU operations in order to initialize
+		 * idle states suspend back-end specific data
+		 */
 		ret = arm_cpuidle_init(cpu);
 
 		/*
@@ -141,10 +154,11 @@ out_fail:
 		dev = per_cpu(cpuidle_devices, cpu);
 		cpuidle_unregister_device(dev);
 		kfree(dev);
+		drv = cpuidle_get_driver();
+		cpuidle_unregister_driver(drv);
+		kfree(drv);
 	}
 
-	cpuidle_unregister_driver(drv);
-
 	return ret;
 }
 device_initcall(arm_idle_init);
-- 
cgit v1.2.3


From f8475cef90082bf0902ddab106112de130d90395 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 23 Jun 2017 22:11:52 -0700
Subject: x86: use common aperfmperf_khz_on_cpu() to calculate KHz using
 APERF/MPERF

The goal of this change is to give users a uniform and meaningful
result when they read /sys/...cpufreq/scaling_cur_freq
on modern x86 hardware, as compared to what they get today.

Modern x86 processors include the hardware needed
to accurately calculate frequency over an interval --
APERF, MPERF, and the TSC.

Here we provide an x86 routine to make this calculation
on supported hardware, and use it in preference to any
driver driver-specific cpufreq_driver.get() routine.

MHz is computed like so:

MHz = base_MHz * delta_APERF / delta_MPERF

MHz is the average frequency of the busy processor
over a measurement interval.  The interval is
defined to be the time between successive invocations
of aperfmperf_khz_on_cpu(), which are expected to to
happen on-demand when users read sysfs attribute
cpufreq/scaling_cur_freq.

As with previous methods of calculating MHz,
idle time is excluded.

base_MHz above is from TSC calibration global "cpu_khz".

This x86 native method to calculate MHz returns a meaningful result
no matter if P-states are controlled by hardware or firmware
and/or if the Linux cpufreq sub-system is or is-not installed.

When this routine is invoked more frequently, the measurement
interval becomes shorter.  However, the code limits re-computation
to 10ms intervals so that average frequency remains meaningful.

Discerning users are encouraged to take advantage of
the turbostat(8) utility, which can gracefully handle
concurrent measurement intervals of arbitrary length.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/cpu/Makefile     |  1 +
 arch/x86/kernel/cpu/aperfmperf.c | 79 ++++++++++++++++++++++++++++++++++++++++
 drivers/cpufreq/cpufreq.c        | 12 +++++-
 include/linux/cpufreq.h          |  2 +
 4 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/aperfmperf.c

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 52000010c62e..cdf82492b770 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -21,6 +21,7 @@ obj-y			+= common.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
 obj-y			+= bugs.o
+obj-$(CONFIG_CPU_FREQ)	+= aperfmperf.o
 
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
new file mode 100644
index 000000000000..d869c8671e36
--- /dev/null
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -0,0 +1,79 @@
+/*
+ * x86 APERF/MPERF KHz calculation for
+ * /sys/.../cpufreq/scaling_cur_freq
+ *
+ * Copyright (C) 2017 Intel Corp.
+ * Author: Len Brown <len.brown@intel.com>
+ *
+ * This file is licensed under GPLv2.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+
+struct aperfmperf_sample {
+	unsigned int	khz;
+	unsigned long	jiffies;
+	u64	aperf;
+	u64	mperf;
+};
+
+static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+
+/*
+ * aperfmperf_snapshot_khz()
+ * On the current CPU, snapshot APERF, MPERF, and jiffies
+ * unless we already did it within 10ms
+ * calculate kHz, save snapshot
+ */
+static void aperfmperf_snapshot_khz(void *dummy)
+{
+	u64 aperf, aperf_delta;
+	u64 mperf, mperf_delta;
+	struct aperfmperf_sample *s = this_cpu_ptr(&samples);
+
+	/* Don't bother re-computing within 10 ms */
+	if (time_before(jiffies, s->jiffies + HZ/100))
+		return;
+
+	rdmsrl(MSR_IA32_APERF, aperf);
+	rdmsrl(MSR_IA32_MPERF, mperf);
+
+	aperf_delta = aperf - s->aperf;
+	mperf_delta = mperf - s->mperf;
+
+	/*
+	 * There is no architectural guarantee that MPERF
+	 * increments faster than we can read it.
+	 */
+	if (mperf_delta == 0)
+		return;
+
+	/*
+	 * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then
+	 *	khz = (cpu_khz * aperf_delta) / mperf_delta
+	 */
+	if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta)
+		s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+	else	/* khz = aperf_delta / (mperf_delta / cpu_khz) */
+		s->khz = div64_u64(aperf_delta,
+			div64_u64(mperf_delta, cpu_khz));
+	s->jiffies = jiffies;
+	s->aperf = aperf;
+	s->mperf = mperf;
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
+	if (!cpu_khz)
+		return 0;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return 0;
+
+	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+
+	return per_cpu(samples.khz, cpu);
+}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 26b643d57847..6e7424d12de9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -632,11 +632,21 @@ show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 show_one(scaling_min_freq, min);
 show_one(scaling_max_freq, max);
 
+__weak unsigned int arch_freq_get_on_cpu(int cpu)
+{
+	return 0;
+}
+
 static ssize_t show_scaling_cur_freq(struct cpufreq_policy *policy, char *buf)
 {
 	ssize_t ret;
+	unsigned int freq;
 
-	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
+	freq = arch_freq_get_on_cpu(policy->cpu);
+	if (freq)
+		ret = sprintf(buf, "%u\n", freq);
+	else if (cpufreq_driver && cpufreq_driver->setpolicy &&
+			cpufreq_driver->get)
 		ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
 	else
 		ret = sprintf(buf, "%u\n", policy->cur);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a5ce0bbeadb5..905117bd5012 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -883,6 +883,8 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 }
 #endif
 
+extern unsigned int arch_freq_get_on_cpu(int cpu);
+
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
-- 
cgit v1.2.3


From 62611cb912f7cb08ef7815780bcc20a09abedd20 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 23 Jun 2017 22:11:53 -0700
Subject: intel_pstate: delete scheduler hook in HWP mode

The cpufreq/scaling_cur_freq sysfs attribute is now provided by
shared x86 cpufreq code on modern x86 systems, including
all systems supported by the intel_pstate driver.

In HWP mode, maintaining that value was the sole purpose of
the scheduler hook, intel_pstate_update_util_hwp(),
so it can now be removed.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index be0290a1a5e2..b00a45595c89 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1736,16 +1736,6 @@ static void intel_pstate_adjust_pstate(struct cpudata *cpu, int target_pstate)
 		fp_toint(cpu->iowait_boost * 100));
 }
 
-static void intel_pstate_update_util_hwp(struct update_util_data *data,
-					 u64 time, unsigned int flags)
-{
-	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
-	u64 delta_ns = time - cpu->sample.time;
-
-	if ((s64)delta_ns >= INTEL_PSTATE_HWP_SAMPLING_INTERVAL)
-		intel_pstate_sample(cpu, time);
-}
-
 static void intel_pstate_update_util_pid(struct update_util_data *data,
 					 u64 time, unsigned int flags)
 {
@@ -1937,6 +1927,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
+	if (hwp_active)
+		return;
+
 	if (cpu->update_util_set)
 		return;
 
@@ -2570,7 +2563,6 @@ static int __init intel_pstate_init(void)
 		} else {
 			hwp_active++;
 			intel_pstate.attr = hwp_cpufreq_attrs;
-			pstate_funcs.update_util = intel_pstate_update_util_hwp;
 			goto hwp_cpu_matched;
 		}
 	} else {
-- 
cgit v1.2.3


From 82b4e03e01bc8def546b52707962809f04ae5c7a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 23 Jun 2017 22:11:54 -0700
Subject: intel_pstate: skip scheduler hook when in "performance" mode

When the governor is set to "performance", intel_pstate does not
need the scheduler hook for doing any calculations.  Under these
conditions, its only purpose is to continue to maintain
cpufreq/scaling_cur_freq.

The cpufreq/scaling_cur_freq sysfs attribute is now provided by
shared x86 cpufreq code on modern x86 systems, including
all systems supported by the intel_pstate driver.

So in "performance" governor mode, the scheduler hook can be skipped.
This applies to both in Software and Hardware P-state control modes.

Suggested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b00a45595c89..1772d309bea9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2044,10 +2044,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 		 */
 		intel_pstate_clear_update_util_hook(policy->cpu);
 		intel_pstate_max_within_limits(cpu);
+	} else {
+		intel_pstate_set_update_util_hook(policy->cpu);
 	}
 
-	intel_pstate_set_update_util_hook(policy->cpu);
-
 	if (hwp_active)
 		intel_pstate_hwp_set(policy->cpu);
 
-- 
cgit v1.2.3


From 5209654a46ee71137ad9b06da99d4ef2794475af Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 7 Jun 2017 10:19:46 -0500
Subject: x86/ACPI/cstate: Allow ACPI C1 FFH MWAIT use on AMD systems

AMD systems support the Monitor/Mwait instructions and these can be used
for ACPI C1 in the same way as on Intel systems.

Three things are needed:
 1) This patch.
 2) BIOS that declares a C1 state in _CST to use FFH, with correct values.
 3) CPUID_Fn00000005_EDX is non-zero on the system.

The BIOS on AMD systems have historically not defined a C1 state in _CST,
so the acpi_idle driver uses HALT for ACPI C1.

Currently released systems have CPUID_Fn00000005_EDX as reserved/RAZ. If a
BIOS is released for these systems that requests a C1 state with FFH, the
FFH implementation in Linux will fail since CPUID_Fn00000005_EDX is 0. The
acpi_idle driver will then fallback to using HALT for ACPI C1.

Future systems are expected to have non-zero CPUID_Fn00000005_EDX and BIOS
support for using FFH for ACPI C1.

Allow ffh_cstate_init() to succeed on AMD systems.

Tested on Fam15h and Fam17h systems.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/acpi/cstate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8233a630280f..dde437f5d14f 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -167,7 +167,8 @@ static int __init ffh_cstate_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
-	if (c->x86_vendor != X86_VENDOR_INTEL)
+	if (c->x86_vendor != X86_VENDOR_INTEL &&
+	    c->x86_vendor != X86_VENDOR_AMD)
 		return -1;
 
 	cpu_cstate_entry = alloc_percpu(struct cstate_entry);
-- 
cgit v1.2.3


From 73808d0fd26b3d3f0f44cc7c469ad1d3c1b570b8 Mon Sep 17 00:00:00 2001
From: "Prakash, Prashanth" <pprakash@codeaurora.org>
Date: Thu, 11 May 2017 16:39:44 -0600
Subject: cpufreq / CPPC: Initialize policy->min to lowest nonlinear
 performance

Description of Lowest Perfomance in ACPI 6.1 specification states:
"Lowest Performance is the absolute lowest performance level of
the platform. Selecting a performance level lower than the lowest
nonlinear performance level may actually cause an efficiency penalty,
but should reduce the instantaneous power consumption of the processor.
In traditional terms, this represents the T-state range of performance
levels."

Set the default value of policy->min to Lowest Nonlinear Performance
to avoid any potential efficiency penalty.

Signed-off-by: Prashanth Prakash <pprakash@codeaurora.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Alexey Klimov <alexey.klimov@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cppc_cpufreq.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index e82bb3c30b92..10be285c9055 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -144,10 +144,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	cppc_dmi_max_khz = cppc_get_dmi_max_khz();
 
-	policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf;
+	/*
+	 * Set min to lowest nonlinear perf to avoid any efficiency penalty (see
+	 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
+	 */
+	policy->min = cpu->perf_caps.lowest_nonlinear_perf * cppc_dmi_max_khz /
+		cpu->perf_caps.highest_perf;
 	policy->max = cppc_dmi_max_khz;
-	policy->cpuinfo.min_freq = policy->min;
-	policy->cpuinfo.max_freq = policy->max;
+
+	/*
+	 * Set cpuinfo.min_freq to Lowest to make the full range of performance
+	 * available if userspace wants to use any perf between lowest & lowest
+	 * nonlinear perf
+	 */
+	policy->cpuinfo.min_freq = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz /
+		cpu->perf_caps.highest_perf;
+	policy->cpuinfo.max_freq = cppc_dmi_max_khz;
+
 	policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
 	policy->shared_type = cpu->shared_type;
 
-- 
cgit v1.2.3


From 654d08a42a5660514df5a5a6ed4dc1b0c978a0a3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 9 Jun 2017 12:29:20 -0700
Subject: intel_idle: Use more common logging style

Remove #define PREFIX and add #define pr_fmt to use more common logging.

Miscellanea:

o Add missing newline to format
o Convert a single printk without KERN_<LEVEL> to pr_info

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 216d7ec88c0c..c2ae819a871c 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -51,6 +51,8 @@
 /* un-comment DEBUG to enable pr_debug() statements */
 #define DEBUG
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
 #include <linux/tick.h>
@@ -65,7 +67,6 @@
 #include <asm/msr.h>
 
 #define INTEL_IDLE_VERSION "0.4.1"
-#define PREFIX "intel_idle: "
 
 static struct cpuidle_driver intel_idle_driver = {
 	.name = "intel_idle",
@@ -1111,7 +1112,7 @@ static int __init intel_idle_probe(void)
 	const struct x86_cpu_id *id;
 
 	if (max_cstate == 0) {
-		pr_debug(PREFIX "disabled\n");
+		pr_debug("disabled\n");
 		return -EPERM;
 	}
 
@@ -1119,8 +1120,8 @@ static int __init intel_idle_probe(void)
 	if (!id) {
 		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 		    boot_cpu_data.x86 == 6)
-			pr_debug(PREFIX "does not run on family %d model %d\n",
-				boot_cpu_data.x86, boot_cpu_data.x86_model);
+			pr_debug("does not run on family %d model %d\n",
+				 boot_cpu_data.x86, boot_cpu_data.x86_model);
 		return -ENODEV;
 	}
 
@@ -1134,13 +1135,13 @@ static int __init intel_idle_probe(void)
 	    !mwait_substates)
 			return -ENODEV;
 
-	pr_debug(PREFIX "MWAIT substates: 0x%x\n", mwait_substates);
+	pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
 
 	icpu = (const struct idle_cpu *)id->driver_data;
 	cpuidle_state_table = icpu->state_table;
 
-	pr_debug(PREFIX "v" INTEL_IDLE_VERSION
-		" model 0x%X\n", boot_cpu_data.x86_model);
+	pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
+		 boot_cpu_data.x86_model);
 
 	return 0;
 }
@@ -1340,8 +1341,7 @@ static void __init intel_idle_cpuidle_driver_init(void)
 			break;
 
 		if (cstate + 1 > max_cstate) {
-			printk(PREFIX "max_cstate %d reached\n",
-				max_cstate);
+			pr_info("max_cstate %d reached\n", max_cstate);
 			break;
 		}
 
@@ -1358,8 +1358,8 @@ static void __init intel_idle_cpuidle_driver_init(void)
 
 		/* if state marked as disabled, skip it */
 		if (cpuidle_state_table[cstate].disabled != 0) {
-			pr_debug(PREFIX "state %s is disabled",
-				cpuidle_state_table[cstate].name);
+			pr_debug("state %s is disabled\n",
+				 cpuidle_state_table[cstate].name);
 			continue;
 		}
 
@@ -1395,7 +1395,7 @@ static int intel_idle_cpu_init(unsigned int cpu)
 	dev->cpu = cpu;
 
 	if (cpuidle_register_device(dev)) {
-		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
+		pr_debug("cpuidle_register_device %d failed!\n", cpu);
 		return -EIO;
 	}
 
@@ -1447,8 +1447,8 @@ static int __init intel_idle_init(void)
 	retval = cpuidle_register_driver(&intel_idle_driver);
 	if (retval) {
 		struct cpuidle_driver *drv = cpuidle_get_driver();
-		printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",
-			drv ? drv->name : "none");
+		printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
+		       drv ? drv->name : "none");
 		goto init_driver_fail;
 	}
 
@@ -1460,8 +1460,8 @@ static int __init intel_idle_init(void)
 	if (retval < 0)
 		goto hp_setup_fail;
 
-	pr_debug(PREFIX "lapic_timer_reliable_states 0x%x\n",
-		lapic_timer_reliable_states);
+	pr_debug("lapic_timer_reliable_states 0x%x\n",
+		 lapic_timer_reliable_states);
 
 	return 0;
 
-- 
cgit v1.2.3


From 3ed09c94580de9d5b18cc35d1f97e9f24cd9233b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 26 Jun 2017 15:38:15 +1000
Subject: cpuidle: menu: allow state 0 to be disabled

The menu driver does not allow state0 to be disabled completely.
If it is disabled but other enabled states don't meet latency
requirements, it is still used.

Fix this by starting with the first enabled idle state. Fall back
to state 0 if no idle states are enabled (arguably this should be
-EINVAL if it is attempted, but this is the minimal fix).

Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b2330fd69e34..61b64c2b2cb8 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -286,6 +286,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	struct device *device = get_cpu_device(dev->cpu);
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
+	int first_idx;
+	int idx;
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
@@ -335,11 +337,11 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		if (data->next_timer_us > polling_threshold &&
 		    latency_req > s->exit_latency && !s->disabled &&
 		    !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable)
-			data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+			first_idx = CPUIDLE_DRIVER_STATE_START;
 		else
-			data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
+			first_idx = CPUIDLE_DRIVER_STATE_START - 1;
 	} else {
-		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+		first_idx = 0;
 	}
 
 	/*
@@ -359,20 +361,28 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
 	 */
-	for (i = data->last_state_idx + 1; i < drv->state_count; i++) {
+	idx = -1;
+	for (i = first_idx; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
 		if (s->disabled || su->disable)
 			continue;
+		if (idx == -1)
+			idx = i; /* first enabled state */
 		if (s->target_residency > data->predicted_us)
 			break;
 		if (s->exit_latency > latency_req)
 			break;
 
-		data->last_state_idx = i;
+		idx = i;
 	}
 
+	if (idx == -1)
+		idx = 0; /* No states enabled. Must use 0. */
+
+	data->last_state_idx = idx;
+
 	return data->last_state_idx;
 }
 
-- 
cgit v1.2.3


From fab24dcc395637557a7988a867e7b3a5823917a9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 29 Jun 2017 01:47:56 +0200
Subject: cpufreq: intel_pstate: Clean up after performance governor changes

After commit 82b4e03e01bc (intel_pstate: skip scheduler hook when in
"performance" mode) get_target_pstate_use_performance() and
get_target_pstate_use_cpu_load() are never called if scaling_governor
is "performance", so drop the CPUFREQ_POLICY_PERFORMANCE checks from
them as they will never trigger anyway.

Moreover, the documentation needs to be updated to reflect the change
made by the above commit, so do that too.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst | 6 ++----
 drivers/cpufreq/intel_pstate.c                | 6 ------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 33d703989ea8..1d6249825efc 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -157,10 +157,8 @@ Without HWP, this P-state selection algorithm is always the same regardless of
 the processor model and platform configuration.
 
 It selects the maximum P-state it is allowed to use, subject to limits set via
-``sysfs``, every time the P-state selection computations are carried out by the
-driver's utilization update callback for the given CPU (that does not happen
-more often than every 10 ms), but the hardware configuration will not be changed
-if the new P-state is the same as the current one.
+``sysfs``, every time the driver configuration for the given CPU is updated
+(e.g. via ``sysfs``).
 
 This is the default P-state selection algorithm if the
 :c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1772d309bea9..756483251832 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1620,9 +1620,6 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 	int32_t busy_frac, boost;
 	int target, avg_pstate;
 
-	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
-		return cpu->pstate.turbo_pstate;
-
 	busy_frac = div_fp(sample->mperf, sample->tsc);
 
 	boost = cpu->iowait_boost;
@@ -1659,9 +1656,6 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
-	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE)
-		return cpu->pstate.turbo_pstate;
-
 	/*
 	 * perf_scaled is the ratio of the average P-state during the last
 	 * sampling period to the P-state requested last time (in percent).
-- 
cgit v1.2.3


From 8183003e48cbb9d6d276b6330eb5edecf95830f3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 29 Jun 2017 01:49:44 +0200
Subject: cpufreq: Update scaling_cur_freq documentation

Commit f8475cef9008 "x86: use common aperfmperf_khz_on_cpu() to
calculate KHz using APERF/MPERF" modified the way the scaling_cur_freq
cpufreq policy attribute in sysfs is handled on contemporary
Intel-based x86 systems, so update the documentation to reflect
that change.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpufreq.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 09aa2e949787..463cf7e73db8 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -269,16 +269,16 @@ are the following:
 ``scaling_cur_freq``
 	Current frequency of all of the CPUs belonging to this policy (in kHz).
 
-	For the majority of scaling drivers, this is the frequency of the last
-	P-state requested by the driver from the hardware using the scaling
+	In the majority of cases, this is the frequency of the last P-state
+	requested by the scaling driver from the hardware using the scaling
 	interface provided by it, which may or may not reflect the frequency
 	the CPU is actually running at (due to hardware design and other
 	limitations).
 
-	Some scaling drivers (e.g. |intel_pstate|) attempt to provide
-	information more precisely reflecting the current CPU frequency through
-	this attribute, but that still may not be the exact current CPU
-	frequency as seen by the hardware at the moment.
+	Some architectures (e.g. ``x86``) may attempt to provide information
+	more precisely reflecting the current CPU frequency through this
+	attribute, but that still may not be the exact current CPU frequency as
+	seen by the hardware at the moment.
 
 ``scaling_driver``
 	The scaling driver currently in use.
-- 
cgit v1.2.3