diff options
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 37 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 36 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_performance.c | 18 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_powersave.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_userspace.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 116 | ||||
-rw-r--r-- | include/linux/cpufreq.h | 25 | ||||
-rw-r--r-- | include/trace/events/power.h | 22 | ||||
-rw-r--r-- | kernel/trace/power-traces.c | 1 |
10 files changed, 162 insertions, 123 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e979ec78b695..34b17447e0d1 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -959,6 +959,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) return cpufreq_add_dev_symlink(policy); } +__weak struct cpufreq_governor *cpufreq_default_governor(void) +{ + return NULL; +} + static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; @@ -968,11 +973,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) /* Update governor of new_policy to the governor used before hotplug */ gov = find_governor(policy->last_governor); - if (gov) + if (gov) { pr_debug("Restoring governor %s for cpu %d\n", policy->governor->name, policy->cpu); - else - gov = CPUFREQ_DEFAULT_GOVERNOR; + } else { + gov = cpufreq_default_governor(); + if (!gov) + return -ENODATA; + } new_policy.governor = gov; @@ -1920,21 +1928,16 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_target); +__weak struct cpufreq_governor *cpufreq_fallback_governor(void) +{ + return NULL; +} + static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) { int ret; - /* Only must be defined when default governor is known to have latency - restrictions, like e.g. conservative or ondemand. - That this is the case is already ensured in Kconfig - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE - struct cpufreq_governor *gov = &cpufreq_gov_performance; -#else - struct cpufreq_governor *gov = NULL; -#endif - /* Don't start any governor operations if we are entering suspend */ if (cpufreq_suspended) return 0; @@ -1948,12 +1951,14 @@ static int __cpufreq_governor(struct cpufreq_policy *policy, if (policy->governor->max_transition_latency && policy->cpuinfo.transition_latency > policy->governor->max_transition_latency) { - if (!gov) - return -EINVAL; - else { + struct cpufreq_governor *gov = cpufreq_fallback_governor(); + + if (gov) { pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", policy->governor->name, gov->name); policy->governor = gov; + } else { + return -EINVAL; } } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 606ad74abe6e..8504a70a4785 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -26,10 +26,7 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE -static -#endif -struct cpufreq_governor cpufreq_gov_conservative = { +static struct cpufreq_governor cpufreq_gov_conservative = { .name = "conservative", .governor = cs_cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, @@ -399,6 +396,11 @@ MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_conservative; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index eae51070c034..929e193ac1c1 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -31,9 +31,7 @@ static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); static struct od_ops od_ops; -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND static struct cpufreq_governor cpufreq_gov_ondemand; -#endif static unsigned int default_powersave_bias; @@ -554,6 +552,19 @@ static struct common_dbs_data od_dbs_cdata = { .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), }; +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); +} + +static struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + static void od_set_powersave_bias(unsigned int powersave_bias) { struct cpufreq_policy *policy; @@ -605,22 +616,6 @@ void od_unregister_powersave_bias_handler(void) } EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); -static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); -} - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = od_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { return cpufreq_register_governor(&cpufreq_gov_ondemand); @@ -638,6 +633,11 @@ MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_ondemand; +} + fs_initcall(cpufreq_gov_dbs_init); #else module_init(cpufreq_gov_dbs_init); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index cf117deb39b1..af9f4b96f5a8 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -33,10 +33,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy, return 0; } -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE -static -#endif -struct cpufreq_governor cpufreq_gov_performance = { +static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", .governor = cpufreq_governor_performance, .owner = THIS_MODULE, @@ -52,6 +49,19 @@ static void __exit cpufreq_gov_performance_exit(void) cpufreq_unregister_governor(&cpufreq_gov_performance); } +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif +#ifndef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +struct cpufreq_governor *cpufreq_fallback_governor(void) +{ + return &cpufreq_gov_performance; +} +#endif + MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index e3b874c235ea..b8b400232a74 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -33,10 +33,7 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE -static -#endif -struct cpufreq_governor cpufreq_gov_powersave = { +static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, @@ -57,6 +54,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_powersave; +} + fs_initcall(cpufreq_gov_powersave_init); #else module_init(cpufreq_gov_powersave_init); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 4dbf1db16aca..4d16f45ee1da 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -89,10 +89,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy, return rc; } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE -static -#endif -struct cpufreq_governor cpufreq_gov_userspace = { +static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", .governor = cpufreq_governor_userspace, .store_setspeed = cpufreq_set, @@ -116,6 +113,11 @@ MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); MODULE_LICENSE("GPL"); #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &cpufreq_gov_userspace; +} + fs_initcall(cpufreq_gov_userspace_init); #else module_init(cpufreq_gov_userspace_init); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 547890fd9572..1bbc10a54c59 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -28,6 +28,8 @@ #include <linux/of.h> #include <linux/reboot.h> #include <linux/slab.h> +#include <linux/cpu.h> +#include <trace/events/power.h> #include <asm/cputhreads.h> #include <asm/firmware.h> @@ -42,13 +44,24 @@ static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; static bool rebooting, throttled, occ_reset; +static unsigned int *core_to_chip_map; + +static const char * const throttle_reason[] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" +}; static struct chip { unsigned int id; bool throttled; + bool restore; + u8 throttle_reason; cpumask_t mask; struct work_struct throttle; - bool restore; } *chips; static int nr_chips; @@ -312,13 +325,14 @@ static inline unsigned int get_nominal_index(void) static void powernv_cpufreq_throttle_check(void *data) { unsigned int cpu = smp_processor_id(); + unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)]; unsigned long pmsr; int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); for (i = 0; i < nr_chips; i++) - if (chips[i].id == cpu_to_chip_id(cpu)) + if (chips[i].id == chip_id) break; /* Check for Pmax Capping */ @@ -328,17 +342,17 @@ static void powernv_cpufreq_throttle_check(void *data) goto next; chips[i].throttled = true; if (pmsr_pmax < powernv_pstate_info.nominal) - pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.nominal); - else - pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", - cpu, chips[i].id, pmsr_pmax, - powernv_pstate_info.max); + pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.nominal); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } else if (chips[i].throttled) { chips[i].throttled = false; - pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, - chips[i].id, pmsr_pmax); + trace_powernv_throttle(chips[i].id, + throttle_reason[chips[i].throttle_reason], + pmsr_pmax); } /* Check if Psafe_mode_active is set in PMSR. */ @@ -356,7 +370,7 @@ next: if (throttled) { pr_info("PMSR = %16lx\n", pmsr); - pr_crit("CPU Frequency could be throttled\n"); + pr_warn("CPU Frequency could be throttled\n"); } } @@ -423,18 +437,19 @@ void powernv_cpufreq_work_fn(struct work_struct *work) { struct chip *chip = container_of(work, struct chip, throttle); unsigned int cpu; - cpumask_var_t mask; + cpumask_t mask; - smp_call_function_any(&chip->mask, + get_online_cpus(); + cpumask_and(&mask, &chip->mask, cpu_online_mask); + smp_call_function_any(&mask, powernv_cpufreq_throttle_check, NULL, 0); if (!chip->restore) - return; + goto out; chip->restore = false; - cpumask_copy(mask, &chip->mask); - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int index, tcpu; + for_each_cpu(cpu, &mask) { + int index; struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); @@ -442,20 +457,12 @@ void powernv_cpufreq_work_fn(struct work_struct *work) policy.cur, CPUFREQ_RELATION_C, &index); powernv_cpufreq_target_index(&policy, index); - for_each_cpu(tcpu, policy.cpus) - cpumask_clear_cpu(tcpu, mask); + cpumask_andnot(&mask, &mask, policy.cpus); } +out: + put_online_cpus(); } -static char throttle_reason[][30] = { - "No throttling", - "Power Cap", - "Processor Over Temperature", - "Power Supply Failure", - "Over Current", - "OCC Reset" - }; - static int powernv_cpufreq_occ_msg(struct notifier_block *nb, unsigned long msg_type, void *_msg) { @@ -481,7 +488,7 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, */ if (!throttled) { throttled = true; - pr_crit("CPU frequency is throttled for duration\n"); + pr_warn("CPU frequency is throttled for duration\n"); } break; @@ -505,23 +512,18 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb, return 0; } - if (omsg.throttle_status && + for (i = 0; i < nr_chips; i++) + if (chips[i].id == omsg.chip) + break; + + if (omsg.throttle_status >= 0 && omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) - pr_info("OCC: Chip %u Pmax reduced due to %s\n", - (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else if (!omsg.throttle_status) - pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, - throttle_reason[omsg.throttle_status]); - else - return 0; + chips[i].throttle_reason = omsg.throttle_status; - for (i = 0; i < nr_chips; i++) - if (chips[i].id == omsg.chip) { - if (!omsg.throttle_status) - chips[i].restore = true; - schedule_work(&chips[i].throttle); - } + if (!omsg.throttle_status) + chips[i].restore = true; + + schedule_work(&chips[i].throttle); } return 0; } @@ -556,29 +558,41 @@ static int init_chip_info(void) unsigned int chip[256]; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + cpumask_t cpu_mask; + int ret = -ENOMEM; + + core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int), + GFP_KERNEL); + if (!core_to_chip_map) + goto out; - for_each_possible_cpu(cpu) { + cpumask_copy(&cpu_mask, cpu_possible_mask); + for_each_cpu(cpu, &cpu_mask) { unsigned int id = cpu_to_chip_id(cpu); if (prev_chip_id != id) { prev_chip_id = id; chip[nr_chips++] = id; } + core_to_chip_map[cpu_core_index_of_thread(cpu)] = id; + cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu)); } - chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); if (!chips) - return -ENOMEM; + goto free_chip_map; for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; - chips[i].throttled = false; cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); - chips[i].restore = false; } return 0; +free_chip_map: + kfree(core_to_chip_map); +out: + return ret; } static int __init powernv_cpufreq_init(void) @@ -612,6 +626,8 @@ static void __exit powernv_cpufreq_exit(void) unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); opal_message_notifier_unregister(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); + kfree(chips); + kfree(core_to_chip_map); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 88a4215125bc..d0bf555b6bbf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -464,29 +464,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); -/* CPUFREQ DEFAULT GOVERNOR */ -/* - * Performance governor is fallback governor if any other gov failed to auto - * load due latency restrictions - */ -#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE -extern struct cpufreq_governor cpufreq_gov_performance; -#endif -#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) -extern struct cpufreq_governor cpufreq_gov_powersave; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) -extern struct cpufreq_governor cpufreq_gov_userspace; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND) -extern struct cpufreq_governor cpufreq_gov_ondemand; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_ondemand) -#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) -extern struct cpufreq_governor cpufreq_gov_conservative; -#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) -#endif +struct cpufreq_governor *cpufreq_default_governor(void); +struct cpufreq_governor *cpufreq_fallback_governor(void); /********************************************************************* * FREQUENCY TABLE HELPERS * diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 284244ebfe8d..19e50300ce7d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -38,6 +38,28 @@ DEFINE_EVENT(cpu, cpu_idle, TP_ARGS(state, cpu_id) ); +TRACE_EVENT(powernv_throttle, + + TP_PROTO(int chip_id, const char *reason, int pmax), + + TP_ARGS(chip_id, reason, pmax), + + TP_STRUCT__entry( + __field(int, chip_id) + __string(reason, reason) + __field(int, pmax) + ), + + TP_fast_assign( + __entry->chip_id = chip_id; + __assign_str(reason, reason); + __entry->pmax = pmax; + ), + + TP_printk("Chip %d Pmax %d %s", __entry->chip_id, + __entry->pmax, __get_str(reason)) +); + TRACE_EVENT(pstate_sample, TP_PROTO(u32 core_busy, diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index eb4220a132ec..81b87451c0ea 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -15,4 +15,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); +EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle); |