diff options
Diffstat (limited to 'drivers/idle/intel_idle.c')
-rw-r--r-- | drivers/idle/intel_idle.c | 411 |
1 files changed, 222 insertions, 189 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 7833e650789f..f4495841bf68 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2,8 +2,9 @@ /* * intel_idle.c - native hardware idle loop for modern Intel processors * - * Copyright (c) 2013, Intel Corporation. + * Copyright (c) 2013 - 2020, Intel Corporation. * Len Brown <len.brown@intel.com> + * Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ /* @@ -25,11 +26,6 @@ /* * Known limitations * - * The driver currently initializes for_each_online_cpu() upon modprobe. - * It it unaware of subsequent processors hot-added to the system. - * This means that if you boot with maxcpus=n and later online - * processors above n, those processors will use C1 only. - * * ACPI has a .suspend hack to turn off deep c-statees during suspend * to avoid complications with the lapic timer workaround. * Have not seen issues with suspend, but may need same workaround here. @@ -55,7 +51,7 @@ #include <asm/mwait.h> #include <asm/msr.h> -#define INTEL_IDLE_VERSION "0.4.1" +#define INTEL_IDLE_VERSION "0.5.1" static struct cpuidle_driver intel_idle_driver = { .name = "intel_idle", @@ -63,12 +59,14 @@ static struct cpuidle_driver intel_idle_driver = { }; /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; +static unsigned int disabled_states_mask; + +static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; -static unsigned int mwait_substates; +static unsigned long auto_demotion_disable_flags; +static bool disable_promotion_to_c1e; -#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF -/* Reliable LAPIC Timer States, bit 1 for C1 etc. */ -static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */ +static bool lapic_timer_always_reliable; struct idle_cpu { struct cpuidle_state *state_table; @@ -83,13 +81,10 @@ struct idle_cpu { bool use_acpi; }; -static const struct idle_cpu *icpu; -static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; -static int intel_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index); -static void intel_idle_s2idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index); -static struct cpuidle_state *cpuidle_state_table; +static const struct idle_cpu *icpu __initdata; +static struct cpuidle_state *cpuidle_state_table __initdata; + +static unsigned int mwait_substates __initdata; /* * Enable this state by default even if the ACPI _CST does not list it. @@ -102,7 +97,7 @@ static struct cpuidle_state *cpuidle_state_table; * If this flag is set, SW flushes the TLB, so even if the * HW doesn't do the flushing, this flag is safe to use. */ -#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 +#define CPUIDLE_FLAG_TLB_FLUSHED BIT(16) /* * MWAIT takes an 8-bit "hint" in EAX "suggesting" @@ -114,12 +109,87 @@ static struct cpuidle_state *cpuidle_state_table; #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) #define MWAIT2flg(eax) ((eax & 0xFF) << 24) +/** + * intel_idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. + * @drv: cpuidle driver (assumed to point to intel_idle_driver). + * @index: Target idle state index. + * + * Use the MWAIT instruction to notify the processor that the CPU represented by + * @dev is idle and it can try to enter the idle state corresponding to @index. + * + * If the local APIC timer is not known to be reliable in the target idle state, + * enable one-shot tick broadcasting for the target CPU before executing MWAIT. + * + * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to + * flushing user TLBs. + * + * Must be called under local_irq_disable(). + */ +static __cpuidle int intel_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + unsigned long ecx = 1; /* break on interrupt flag */ + bool uninitialized_var(tick); + int cpu = smp_processor_id(); + + /* + * leave_mm() to avoid costly and often unnecessary wakeups + * for flushing the user TLB's associated with the active mm. + */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) + leave_mm(cpu); + + if (!static_cpu_has(X86_FEATURE_ARAT) && !lapic_timer_always_reliable) { + /* + * Switch over to one-shot tick broadcast if the target C-state + * is deeper than C1. + */ + if ((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) { + tick = true; + tick_broadcast_enter(); + } else { + tick = false; + } + } + + mwait_idle_with_hints(eax, ecx); + + if (!static_cpu_has(X86_FEATURE_ARAT) && tick) + tick_broadcast_exit(); + + return index; +} + +/** + * intel_idle_s2idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. + * @drv: cpuidle driver (assumed to point to intel_idle_driver). + * @index: Target idle state index. + * + * Use the MWAIT instruction to notify the processor that the CPU represented by + * @dev is idle and it can try to enter the idle state corresponding to @index. + * + * Invoked as a suspend-to-idle callback routine with frozen user space, frozen + * scheduler tick and suspended scheduler clock on the target CPU. + */ +static __cpuidle void intel_idle_s2idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + unsigned long eax = flg2MWAIT(drv->states[index].flags); + unsigned long ecx = 1; /* break on interrupt flag */ + + mwait_idle_with_hints(eax, ecx); +} + /* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. */ -static struct cpuidle_state nehalem_cstates[] = { +static struct cpuidle_state nehalem_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -156,7 +226,7 @@ static struct cpuidle_state nehalem_cstates[] = { .enter = NULL } }; -static struct cpuidle_state snb_cstates[] = { +static struct cpuidle_state snb_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -201,7 +271,7 @@ static struct cpuidle_state snb_cstates[] = { .enter = NULL } }; -static struct cpuidle_state byt_cstates[] = { +static struct cpuidle_state byt_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -246,7 +316,7 @@ static struct cpuidle_state byt_cstates[] = { .enter = NULL } }; -static struct cpuidle_state cht_cstates[] = { +static struct cpuidle_state cht_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -291,7 +361,7 @@ static struct cpuidle_state cht_cstates[] = { .enter = NULL } }; -static struct cpuidle_state ivb_cstates[] = { +static struct cpuidle_state ivb_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -336,7 +406,7 @@ static struct cpuidle_state ivb_cstates[] = { .enter = NULL } }; -static struct cpuidle_state ivt_cstates[] = { +static struct cpuidle_state ivt_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -373,7 +443,7 @@ static struct cpuidle_state ivt_cstates[] = { .enter = NULL } }; -static struct cpuidle_state ivt_cstates_4s[] = { +static struct cpuidle_state ivt_cstates_4s[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -410,7 +480,7 @@ static struct cpuidle_state ivt_cstates_4s[] = { .enter = NULL } }; -static struct cpuidle_state ivt_cstates_8s[] = { +static struct cpuidle_state ivt_cstates_8s[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -447,7 +517,7 @@ static struct cpuidle_state ivt_cstates_8s[] = { .enter = NULL } }; -static struct cpuidle_state hsw_cstates[] = { +static struct cpuidle_state hsw_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -515,7 +585,7 @@ static struct cpuidle_state hsw_cstates[] = { { .enter = NULL } }; -static struct cpuidle_state bdw_cstates[] = { +static struct cpuidle_state bdw_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -584,7 +654,7 @@ static struct cpuidle_state bdw_cstates[] = { .enter = NULL } }; -static struct cpuidle_state skl_cstates[] = { +static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -653,7 +723,7 @@ static struct cpuidle_state skl_cstates[] = { .enter = NULL } }; -static struct cpuidle_state skx_cstates[] = { +static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -682,7 +752,7 @@ static struct cpuidle_state skx_cstates[] = { .enter = NULL } }; -static struct cpuidle_state atom_cstates[] = { +static struct cpuidle_state atom_cstates[] __initdata = { { .name = "C1E", .desc = "MWAIT 0x00", @@ -718,7 +788,7 @@ static struct cpuidle_state atom_cstates[] = { { .enter = NULL } }; -static struct cpuidle_state tangier_cstates[] = { +static struct cpuidle_state tangier_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -762,7 +832,7 @@ static struct cpuidle_state tangier_cstates[] = { { .enter = NULL } }; -static struct cpuidle_state avn_cstates[] = { +static struct cpuidle_state avn_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -782,7 +852,7 @@ static struct cpuidle_state avn_cstates[] = { { .enter = NULL } }; -static struct cpuidle_state knl_cstates[] = { +static struct cpuidle_state knl_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -803,7 +873,7 @@ static struct cpuidle_state knl_cstates[] = { .enter = NULL } }; -static struct cpuidle_state bxt_cstates[] = { +static struct cpuidle_state bxt_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -864,7 +934,7 @@ static struct cpuidle_state bxt_cstates[] = { .enter = NULL } }; -static struct cpuidle_state dnv_cstates[] = { +static struct cpuidle_state dnv_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", @@ -893,225 +963,164 @@ static struct cpuidle_state dnv_cstates[] = { .enter = NULL } }; -/** - * intel_idle - * @dev: cpuidle_device - * @drv: cpuidle driver - * @index: index of cpuidle state - * - * Must be called under local_irq_disable(). - */ -static __cpuidle int intel_idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) -{ - unsigned long ecx = 1; /* break on interrupt flag */ - struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned int cstate; - bool uninitialized_var(tick); - int cpu = smp_processor_id(); - - /* - * leave_mm() to avoid costly and often unnecessary wakeups - * for flushing the user TLB's associated with the active mm. - */ - if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED) - leave_mm(cpu); - - if (!static_cpu_has(X86_FEATURE_ARAT)) { - cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & - MWAIT_CSTATE_MASK) + 1; - tick = false; - if (!(lapic_timer_reliable_states & (1 << (cstate)))) { - tick = true; - tick_broadcast_enter(); - } - } - - mwait_idle_with_hints(eax, ecx); - - if (!static_cpu_has(X86_FEATURE_ARAT) && tick) - tick_broadcast_exit(); - - return index; -} - -/** - * intel_idle_s2idle - simplified "enter" callback routine for suspend-to-idle - * @dev: cpuidle_device - * @drv: cpuidle driver - * @index: state index - */ -static void intel_idle_s2idle(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int index) -{ - unsigned long ecx = 1; /* break on interrupt flag */ - unsigned long eax = flg2MWAIT(drv->states[index].flags); - - mwait_idle_with_hints(eax, ecx); -} - -static const struct idle_cpu idle_cpu_nehalem = { +static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_nhx = { +static const struct idle_cpu idle_cpu_nhx __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_atom = { +static const struct idle_cpu idle_cpu_atom __initconst = { .state_table = atom_cstates, }; -static const struct idle_cpu idle_cpu_tangier = { +static const struct idle_cpu idle_cpu_tangier __initconst = { .state_table = tangier_cstates, }; -static const struct idle_cpu idle_cpu_lincroft = { +static const struct idle_cpu idle_cpu_lincroft __initconst = { .state_table = atom_cstates, .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, }; -static const struct idle_cpu idle_cpu_snb = { +static const struct idle_cpu idle_cpu_snb __initconst = { .state_table = snb_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_snx = { +static const struct idle_cpu idle_cpu_snx __initconst = { .state_table = snb_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_byt = { +static const struct idle_cpu idle_cpu_byt __initconst = { .state_table = byt_cstates, .disable_promotion_to_c1e = true, .byt_auto_demotion_disable_flag = true, }; -static const struct idle_cpu idle_cpu_cht = { +static const struct idle_cpu idle_cpu_cht __initconst = { .state_table = cht_cstates, .disable_promotion_to_c1e = true, .byt_auto_demotion_disable_flag = true, }; -static const struct idle_cpu idle_cpu_ivb = { +static const struct idle_cpu idle_cpu_ivb __initconst = { .state_table = ivb_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_ivt = { +static const struct idle_cpu idle_cpu_ivt __initconst = { .state_table = ivt_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_hsw = { +static const struct idle_cpu idle_cpu_hsw __initconst = { .state_table = hsw_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_hsx = { +static const struct idle_cpu idle_cpu_hsx __initconst = { .state_table = hsw_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_bdw = { +static const struct idle_cpu idle_cpu_bdw __initconst = { .state_table = bdw_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_bdx = { +static const struct idle_cpu idle_cpu_bdx __initconst = { .state_table = bdw_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_skl = { +static const struct idle_cpu idle_cpu_skl __initconst = { .state_table = skl_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_skx = { +static const struct idle_cpu idle_cpu_skx __initconst = { .state_table = skx_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_avn = { +static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_knl = { +static const struct idle_cpu idle_cpu_knl __initconst = { .state_table = knl_cstates, .use_acpi = true, }; -static const struct idle_cpu idle_cpu_bxt = { +static const struct idle_cpu idle_cpu_bxt __initconst = { .state_table = bxt_cstates, .disable_promotion_to_c1e = true, }; -static const struct idle_cpu idle_cpu_dnv = { +static const struct idle_cpu idle_cpu_dnv __initconst = { .state_table = dnv_cstates, .disable_promotion_to_c1e = true, .use_acpi = true, }; static const struct x86_cpu_id intel_idle_ids[] __initconst = { - INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), - INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), - INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), - INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), - INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), - INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), - INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), - INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), - INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), - INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), - INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), - INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), - INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), - INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), - INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), - INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), - INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), - INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), - INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), - INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), - INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), - INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), - INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), - INTEL_CPU_FAM6(KABYLAKE, idle_cpu_skl), - INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), - INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), - INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), - INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), - INTEL_CPU_FAM6(ATOM_GOLDMONT_PLUS, idle_cpu_bxt), - INTEL_CPU_FAM6(ATOM_GOLDMONT_D, idle_cpu_dnv), - INTEL_CPU_FAM6(ATOM_TREMONT_D, idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), + X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), {} }; -#define INTEL_CPU_FAM6_MWAIT \ - { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } - static const struct x86_cpu_id intel_mwait_ids[] __initconst = { - INTEL_CPU_FAM6_MWAIT, + X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), {} }; @@ -1131,6 +1140,10 @@ static bool no_acpi __read_mostly; module_param(no_acpi, bool, 0444); MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); +static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ +module_param_named(use_acpi, force_use_acpi, bool, 0444); +MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); + static struct acpi_processor_power acpi_state_table __initdata; /** @@ -1230,6 +1243,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (cx->type > ACPI_STATE_C2) state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + if (disabled_states_mask & BIT(cstate)) + state->flags |= CPUIDLE_FLAG_OFF; + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } @@ -1258,16 +1274,18 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint) return true; } #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +#define force_use_acpi (false) + static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -/* - * ivt_idle_state_table_update(void) +/** + * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * - * Tune IVT multi-socket targets - * Assumption: num_sockets == (max_package_num + 1) + * Tune IVT multi-socket targets. + * Assumption: num_sockets == (max_package_num + 1). */ static void __init ivt_idle_state_table_update(void) { @@ -1313,11 +1331,11 @@ static unsigned long long __init irtl_2_usec(unsigned long long irtl) return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); } -/* - * bxt_idle_state_table_update(void) +/** + * bxt_idle_state_table_update - Fix up the Broxton idle states table. * - * On BXT, we trust the IRTL to show the definitive maximum latency - * We use the same value for target_residency. + * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the + * definitive maximum latency and use the same value for target_residency. */ static void __init bxt_idle_state_table_update(void) { @@ -1360,11 +1378,11 @@ static void __init bxt_idle_state_table_update(void) } } -/* - * sklh_idle_state_table_update(void) + +/** + * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. * - * On SKL-H (model 0x5e) disable C8 and C9 if: - * C10 is enabled and SGX disabled + * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. */ static void __init sklh_idle_state_table_update(void) { @@ -1460,8 +1478,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) && - !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && + !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; drv->state_count++; @@ -1473,13 +1493,17 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) } } -/* - * intel_idle_cpuidle_driver_init() - * allocate, initialize cpuidle_states +/** + * intel_idle_cpuidle_driver_init - Create the list of available idle states. + * @drv: cpuidle driver structure to initialize. */ static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { cpuidle_poll_state_init(drv); + + if (disabled_states_mask & BIT(0)) + drv->states[0].flags |= CPUIDLE_FLAG_OFF; + drv->state_count = 1; if (icpu) @@ -1493,7 +1517,7 @@ static void auto_demotion_disable(void) unsigned long long msr_bits; rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); - msr_bits &= ~(icpu->auto_demotion_disable_flags); + msr_bits &= ~auto_demotion_disable_flags; wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } @@ -1506,10 +1530,12 @@ static void c1e_promotion_disable(void) wrmsrl(MSR_IA32_POWER_CTL, msr_bits); } -/* - * intel_idle_cpu_init() - * allocate, initialize, register cpuidle_devices - * @cpu: cpu/core to initialize +/** + * intel_idle_cpu_init - Register the target CPU with the cpuidle core. + * @cpu: CPU to initialize. + * + * Register a cpuidle device object for @cpu and update its MSRs in accordance + * with the processor model flags. */ static int intel_idle_cpu_init(unsigned int cpu) { @@ -1523,13 +1549,10 @@ static int intel_idle_cpu_init(unsigned int cpu) return -EIO; } - if (!icpu) - return 0; - - if (icpu->auto_demotion_disable_flags) + if (auto_demotion_disable_flags) auto_demotion_disable(); - if (icpu->disable_promotion_to_c1e) + if (disable_promotion_to_c1e) c1e_promotion_disable(); return 0; @@ -1539,7 +1562,7 @@ static int intel_idle_cpu_online(unsigned int cpu) { struct cpuidle_device *dev; - if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) + if (!lapic_timer_always_reliable) tick_broadcast_enable(); /* @@ -1607,7 +1630,9 @@ static int __init intel_idle_init(void) icpu = (const struct idle_cpu *)id->driver_data; if (icpu) { cpuidle_state_table = icpu->state_table; - if (icpu->use_acpi) + auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; + disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { return -ENODEV; @@ -1631,15 +1656,15 @@ static int __init intel_idle_init(void) } if (boot_cpu_has(X86_FEATURE_ARAT)) /* Always Reliable APIC Timer */ - lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE; + lapic_timer_always_reliable = true; retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", intel_idle_cpu_online, NULL); if (retval < 0) goto hp_setup_fail; - pr_debug("lapic_timer_reliable_states 0x%x\n", - lapic_timer_reliable_states); + pr_debug("Local APIC timer is reliable in %s\n", + lapic_timer_always_reliable ? "all C-states" : "C1"); return 0; @@ -1660,3 +1685,11 @@ device_initcall(intel_idle_init); * is the easiest way (currently) to continue doing that. */ module_param(max_cstate, int, 0444); +/* + * The positions of the bits that are set in this number are the indices of the + * idle states to be disabled by default (as reflected by the names of the + * corresponding idle state directories in sysfs, "state0", "state1" ... + * "state<i>" ..., where <i> is the index of the given state). + */ +module_param_named(states_off, disabled_states_mask, uint, 0444); +MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); |