diff options
-rw-r--r-- | arch/arm/mach-davinci/cpuidle.c | 84 | ||||
-rw-r--r-- | arch/x86/include/asm/mwait.h | 3 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 3 | ||||
-rw-r--r-- | drivers/acpi/processor_idle.c | 47 | ||||
-rw-r--r-- | drivers/idle/intel_idle.c | 278 | ||||
-rw-r--r-- | include/linux/cpuidle.h | 22 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 36 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 48 |
8 files changed, 271 insertions, 250 deletions
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c index 9107691adbdb..5ac9e9384b15 100644 --- a/arch/arm/mach-davinci/cpuidle.c +++ b/arch/arm/mach-davinci/cpuidle.c @@ -25,35 +25,44 @@ #define DAVINCI_CPUIDLE_MAX_STATES 2 -struct davinci_ops { - void (*enter) (u32 flags); - void (*exit) (u32 flags); - u32 flags; -}; +static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device); +static void __iomem *ddr2_reg_base; +static bool ddr2_pdown; + +static void davinci_save_ddr_power(int enter, bool pdown) +{ + u32 val; + + val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET); + + if (enter) { + if (pdown) + val |= DDR2_SRPD_BIT; + else + val &= ~DDR2_SRPD_BIT; + val |= DDR2_LPMODEN_BIT; + } else { + val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT); + } + + __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET); +} /* Actual code that puts the SoC in different idle states */ static int davinci_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct davinci_ops *ops = cpuidle_get_statedata(state_usage); - - if (ops && ops->enter) - ops->enter(ops->flags); + davinci_save_ddr_power(1, ddr2_pdown); index = cpuidle_wrap_enter(dev, drv, index, arm_cpuidle_simple_enter); - if (ops && ops->exit) - ops->exit(ops->flags); + davinci_save_ddr_power(0, ddr2_pdown); return index; } -/* fields in davinci_ops.flags */ -#define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN BIT(0) - static struct cpuidle_driver davinci_idle_driver = { .name = "cpuidle-davinci", .owner = THIS_MODULE, @@ -70,45 +79,6 @@ static struct cpuidle_driver davinci_idle_driver = { .state_count = DAVINCI_CPUIDLE_MAX_STATES, }; -static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device); -static void __iomem *ddr2_reg_base; - -static void davinci_save_ddr_power(int enter, bool pdown) -{ - u32 val; - - val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET); - - if (enter) { - if (pdown) - val |= DDR2_SRPD_BIT; - else - val &= ~DDR2_SRPD_BIT; - val |= DDR2_LPMODEN_BIT; - } else { - val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT); - } - - __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET); -} - -static void davinci_c2state_enter(u32 flags) -{ - davinci_save_ddr_power(1, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN)); -} - -static void davinci_c2state_exit(u32 flags) -{ - davinci_save_ddr_power(0, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN)); -} - -static struct davinci_ops davinci_states[DAVINCI_CPUIDLE_MAX_STATES] = { - [1] = { - .enter = davinci_c2state_enter, - .exit = davinci_c2state_exit, - }, -}; - static int __init davinci_cpuidle_probe(struct platform_device *pdev) { int ret; @@ -124,11 +94,7 @@ static int __init davinci_cpuidle_probe(struct platform_device *pdev) ddr2_reg_base = pdata->ddr2_ctlr_base; - if (pdata->ddr2_pdown) - davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN; - cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]); - - device->state_count = DAVINCI_CPUIDLE_MAX_STATES; + ddr2_pdown = pdata->ddr2_pdown; ret = cpuidle_register_driver(&davinci_idle_driver); if (ret) { diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bcdff997668c..2f366d0ac6b4 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,7 +4,8 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 -#define MWAIT_MAX_NUM_CSTATES 8 +#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) +#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59fb1a74..8d013f5153bc 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -103,6 +103,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_IA32_POWER_CTL 0x000001fc + #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 @@ -272,6 +274,7 @@ #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 #define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_TSC_ADJUST 0x0000003b diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index ed9a1cc690be..8b433cb08a33 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -28,19 +28,12 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/slab.h> #include <linux/acpi.h> #include <linux/dmi.h> -#include <linux/moduleparam.h> -#include <linux/sched.h> /* need_resched() */ -#include <linux/pm_qos.h> +#include <linux/sched.h> /* need_resched() */ #include <linux/clockchips.h> #include <linux/cpuidle.h> -#include <linux/irqflags.h> /* * Include the apic definitions for x86 to have the APIC timer related defines @@ -52,22 +45,14 @@ #include <asm/apic.h> #endif -#include <asm/io.h> -#include <asm/uaccess.h> - #include <acpi/acpi_bus.h> #include <acpi/processor.h> -#include <asm/processor.h> #define PREFIX "ACPI: " #define ACPI_PROCESSOR_CLASS "processor" #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_idle"); -#define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY) -#define C2_OVERHEAD 1 /* 1us */ -#define C3_OVERHEAD 1 /* 1us */ -#define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; module_param(max_cstate, uint, 0000); @@ -81,6 +66,8 @@ module_param(latency_factor, uint, 0644); static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); +static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX]; + static int disabled_by_idle_boot_param(void) { return boot_option_idle_override == IDLE_POLL || @@ -736,8 +723,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -760,8 +746,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, */ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; ACPI_FLUSH_CPU_CACHE(); @@ -791,8 +776,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -850,8 +834,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); + struct acpi_processor_cx *cx = acpi_cstate[index]; pr = __this_cpu_read(processors); @@ -943,13 +926,13 @@ struct cpuidle_driver acpi_idle_driver = { * device i.e. per-cpu data * * @pr: the ACPI processor + * @dev : the cpuidle device */ -static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) +static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr, + struct cpuidle_device *dev) { int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; - struct cpuidle_state_usage *state_usage; - struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id); if (!pr->flags.power_setup_done) return -EINVAL; @@ -968,7 +951,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { cx = &pr->power.states[i]; - state_usage = &dev->states_usage[count]; if (!cx->valid) continue; @@ -979,8 +961,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) continue; #endif - - cpuidle_set_statedata(state_usage, cx); + acpi_cstate[count] = cx; count++; if (count == CPUIDLE_STATE_MAX) @@ -1104,7 +1085,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr) cpuidle_disable_device(dev); acpi_processor_get_power_info(pr); if (pr->flags.power) { - acpi_processor_setup_cpuidle_cx(pr); + acpi_processor_setup_cpuidle_cx(pr, dev); ret = cpuidle_enable_device(dev); } cpuidle_resume_and_unlock(); @@ -1162,8 +1143,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) continue; acpi_processor_get_power_info(_pr); if (_pr->flags.power) { - acpi_processor_setup_cpuidle_cx(_pr); dev = per_cpu(acpi_cpuidle_device, cpu); + acpi_processor_setup_cpuidle_cx(_pr, dev); cpuidle_enable_device(dev); } } @@ -1232,7 +1213,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr) return -ENOMEM; per_cpu(acpi_cpuidle_device, pr->id) = dev; - acpi_processor_setup_cpuidle_cx(pr); + acpi_processor_setup_cpuidle_cx(pr, dev); /* Register per-cpu cpuidle_device. Cpuidle driver * must already be registered before registering device diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 2df9414a72f7..5d6675013864 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = { .en_core_tk_irqen = 1, }; /* intel_idle.max_cstate=0 disables driver */ -static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; +static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int mwait_substates; @@ -90,6 +90,7 @@ struct idle_cpu { * Indicate which enable bits to clear here. */ unsigned long auto_demotion_disable_flags; + bool disable_promotion_to_c1e; }; static const struct idle_cpu *icpu; @@ -109,162 +110,206 @@ static struct cpuidle_state *cpuidle_state_table; #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 /* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) + * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. + * + * We store the hint at the top of our "flags" for each state. + */ +#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) +#define MWAIT2flg(eax) ((eax & 0xFF) << 24) + +/* * States are indexed by the cstate number, * which is also the index into the MWAIT hint array. * Thus C0 is a dummy. */ -static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = { + { .name = "C1-NHM", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 3, .target_residency = 6, .enter = &intel_idle }, - { /* MWAIT C2 */ + { + .name = "C1E-NHM", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { .name = "C3-NHM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-NHM", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle }, + { + .enter = NULL } }; -static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = { + { .name = "C1-SNB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 1, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, .enter = &intel_idle }, - { /* MWAIT C2 */ + { + .name = "C1E-SNB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { .name = "C3-SNB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-SNB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7-SNB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, .enter = &intel_idle }, + { + .enter = NULL } }; -static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ +static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = { + { .name = "C1-IVB", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle }, - { /* MWAIT C2 */ + { + .name = "C1E-IVB", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { .name = "C3-IVB", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, .enter = &intel_idle }, - { /* MWAIT C3 */ + { .name = "C6-IVB", .desc = "MWAIT 0x20", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, .enter = &intel_idle }, - { /* MWAIT C4 */ + { .name = "C7-IVB", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, .enter = &intel_idle }, + { + .enter = NULL } }; -static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { - { /* MWAIT C0 */ }, - { /* MWAIT C1 */ - .name = "C1-ATM", +static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = { + { + .name = "C1-HSW", .desc = "MWAIT 0x00", - .flags = CPUIDLE_FLAG_TIME_VALID, - .exit_latency = 1, - .target_residency = 4, + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle }, + { + .name = "C1E-HSW", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle }, + { + .name = "C3-HSW", + .desc = "MWAIT 0x10", + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 33, + .target_residency = 100, + .enter = &intel_idle }, + { + .name = "C6-HSW", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 133, + .target_residency = 400, + .enter = &intel_idle }, + { + .name = "C7s-HSW", + .desc = "MWAIT 0x32", + .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 166, + .target_residency = 500, + .enter = &intel_idle }, + { + .enter = NULL } +}; + +static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = { + { + .name = "C1E-ATM", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 10, + .target_residency = 20, .enter = &intel_idle }, - { /* MWAIT C2 */ + { .name = "C2-ATM", .desc = "MWAIT 0x10", - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID, .exit_latency = 20, .target_residency = 80, .enter = &intel_idle }, - { /* MWAIT C3 */ }, - { /* MWAIT C4 */ + { .name = "C4-ATM", .desc = "MWAIT 0x30", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, .enter = &intel_idle }, - { /* MWAIT C5 */ }, - { /* MWAIT C6 */ + { .name = "C6-ATM", .desc = "MWAIT 0x52", - .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, .enter = &intel_idle }, + { + .enter = NULL } }; -static long get_driver_data(int cstate) -{ - int driver_data; - switch (cstate) { - - case 1: /* MWAIT C1 */ - driver_data = 0x00; - break; - case 2: /* MWAIT C2 */ - driver_data = 0x10; - break; - case 3: /* MWAIT C3 */ - driver_data = 0x20; - break; - case 4: /* MWAIT C4 */ - driver_data = 0x30; - break; - case 5: /* MWAIT C5 */ - driver_data = 0x40; - break; - case 6: /* MWAIT C6 */ - driver_data = 0x52; - break; - default: - driver_data = 0x00; - } - return driver_data; -} - /** * intel_idle * @dev: cpuidle_device @@ -278,8 +323,7 @@ static int intel_idle(struct cpuidle_device *dev, { unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; - unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); + unsigned long eax = flg2MWAIT(state->flags); unsigned int cstate; int cpu = smp_processor_id(); @@ -362,10 +406,19 @@ static void auto_demotion_disable(void *dummy) msr_bits &= ~(icpu->auto_demotion_disable_flags); wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); } +static void c1e_promotion_disable(void *dummy) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} static const struct idle_cpu idle_cpu_nehalem = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = true, }; static const struct idle_cpu idle_cpu_atom = { @@ -379,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = { static const struct idle_cpu idle_cpu_snb = { .state_table = snb_cstates, + .disable_promotion_to_c1e = true, }; static const struct idle_cpu idle_cpu_ivb = { .state_table = ivb_cstates, + .disable_promotion_to_c1e = true, +}; + +static const struct idle_cpu idle_cpu_hsw = { + .state_table = hsw_cstates, + .disable_promotion_to_c1e = true, }; #define ICPU(model, cpu) \ @@ -402,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = { ICPU(0x2d, idle_cpu_snb), ICPU(0x3a, idle_cpu_ivb), ICPU(0x3e, idle_cpu_ivb), + ICPU(0x3c, idle_cpu_hsw), + ICPU(0x3f, idle_cpu_hsw), + ICPU(0x45, idle_cpu_hsw), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); @@ -484,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void) drv->state_count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - int num_substates; + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int num_substates, mwait_hint, mwait_cstate, mwait_substate; - if (cstate > max_cstate) { + if (cpuidle_state_table[cstate].enter == NULL) + break; + + if (cstate + 1 > max_cstate) { printk(PREFIX "max_cstate %d reached\n", max_cstate); break; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); + mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint); + /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) + num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) { - /* does the driver not know about the state? */ - if (*cpuidle_state_table[cstate].name == '\0') - pr_debug(PREFIX "unaware of model 0x%x" - " MWAIT %d please" - " contact lenb@kernel.org\n", - boot_cpu_data.x86_model, cstate); + + /* if sub-state in table is not enumerated by CPUID */ + if ((mwait_substate + 1) > num_substates) continue; - } - if ((cstate > 2) && + if (((mwait_cstate + 1) > 2) && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halts in idle" " states deeper than C2"); @@ -523,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void) if (icpu->auto_demotion_disable_flags) on_each_cpu(auto_demotion_disable, NULL, 1); + if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */ + on_each_cpu(c1e_promotion_disable, NULL, 1); + return 0; } @@ -541,25 +606,28 @@ static int intel_idle_cpu_init(int cpu) dev->state_count = 1; - for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { - int num_substates; + for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { + int num_substates, mwait_hint, mwait_cstate, mwait_substate; + + if (cpuidle_state_table[cstate].enter == NULL) + continue; - if (cstate > max_cstate) { + if (cstate + 1 > max_cstate) { printk(PREFIX "max_cstate %d reached\n", max_cstate); break; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); + mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint); + /* does the state exist in CPUID.MWAIT? */ - num_substates = (mwait_substates >> ((cstate) * 4)) - & MWAIT_SUBSTATE_MASK; - if (num_substates == 0) - continue; - /* is the state not enabled? */ - if (cpuidle_state_table[cstate].enter == NULL) - continue; + num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) + & MWAIT_SUBSTATE_MASK; - dev->states_usage[dev->state_count].driver_data = - (void *)get_driver_data(cstate); + /* if sub-state in table is not enumerated by CPUID */ + if ((mwait_substate + 1) > num_substates) + continue; dev->state_count += 1; } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 24cd1037b6d6..480c14dc1ddd 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -32,8 +32,6 @@ struct cpuidle_driver; ****************************/ struct cpuidle_state_usage { - void *driver_data; - unsigned long long disable; unsigned long long usage; unsigned long long time; /* in US */ @@ -62,26 +60,6 @@ struct cpuidle_state { #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -/** - * cpuidle_get_statedata - retrieves private driver state data - * @st_usage: the state usage statistics - */ -static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage) -{ - return st_usage->driver_data; -} - -/** - * cpuidle_set_statedata - stores private driver state data - * @st_usage: the state usage statistics - * @data: the private data - */ -static inline void -cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data) -{ - st_usage->driver_data = data; -} - struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 0d7dc2cfefb5..b4ddb748356c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval. .PP The \fB-v\fP option increases verbosity. .PP -The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34" -.PP The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. .PP The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. @@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0. ie. it is the total number un-halted cycles elapsed per time divided by the number of CPUs. .SH SMI COUNTING EXAMPLE On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. -Using the -m option, you can display how many SMIs have fired since reset, or if there -are SMIs during the measurement interval, you can display the delta using the -d option. +This counter is shown by default under the "SMI" column. .nf -[root@x980 ~]# turbostat -m 0x34 -cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 - 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 - 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 - 0 6 0.14 1.63 3.38 0x00000056 5.30 - 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 - 1 8 0.10 1.65 3.38 0x00000056 18.05 - 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 - 2 10 0.10 1.63 3.38 0x00000056 6.93 - 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 - 8 7 0.08 1.64 3.38 0x00000056 5.12 - 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 - 9 9 0.09 1.68 3.38 0x00000056 9.32 - 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 - 10 11 1.72 1.65 3.38 0x00000056 15.05 +[root@x980 ~]# turbostat +cor CPU %c0 GHz TSC SMI %c1 %c3 %c6 CTMP %pc3 %pc6 + 0.11 1.91 3.38 0 1.84 0.26 97.79 29 0.82 83.87 + 0 0 0.40 1.63 3.38 0 10.27 0.12 89.20 20 0.82 83.88 + 0 6 0.06 1.63 3.38 0 10.61 + 1 2 0.37 2.63 3.38 0 0.02 0.10 99.51 22 + 1 8 0.01 1.62 3.38 0 0.39 + 2 4 0.07 1.62 3.38 0 0.04 0.07 99.82 23 + 2 10 0.02 1.62 3.38 0 0.09 + 8 1 0.23 1.64 3.38 0 0.10 1.07 98.60 24 + 8 7 0.02 1.64 3.38 0 0.31 + 9 3 0.03 1.62 3.38 0 0.03 0.05 99.89 29 + 9 9 0.02 1.62 3.38 0 0.05 + 10 5 0.07 1.62 3.38 0 0.08 0.12 99.73 27 + 10 11 0.03 1.62 3.38 0 0.13 ^C -[root@x980 ~]# .fi .SH NOTES diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ce6d46038f74..6f3214ed4444 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -58,6 +58,7 @@ unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; +int do_smi; double bclk; unsigned int show_pkg; unsigned int show_core; @@ -99,6 +100,7 @@ struct thread_data { unsigned long long extra_delta64; unsigned long long extra_msr32; unsigned long long extra_delta32; + unsigned int smi_count; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 @@ -248,6 +250,8 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " GHz"); outp += sprintf(outp, " TSC"); + if (do_smi) + outp += sprintf(outp, " SMI"); if (extra_delta_offset32) outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); if (extra_delta_offset64) @@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, extra_msr_offset32, t->extra_msr32); fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset64, t->extra_msr64); + if (do_smi) + fprintf(stderr, "SMI: %08X\n", t->smi_count); } if (c) { @@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, * RAM_W: %5.2 * GHz: "GHz" 3 columns %3.2 * TSC: "TSC" 3 columns %3.2 + * SMI: "SMI" 4 columns %4d * percentage " %pc3" %6.2 * Perf Status percentage: %5.2 * "CTMP" 4 columns %4d @@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC */ outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); + /* SMI */ + if (do_smi) + outp += sprintf(outp, "%4d", t->smi_count); + /* delta */ if (extra_delta_offset32) outp += sprintf(outp, " %11llu", t->extra_delta32); @@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, */ old->extra_msr32 = new->extra_msr32; old->extra_msr64 = new->extra_msr64; + + if (do_smi) + old->smi_count = new->smi_count - old->smi_count; } int delta_cpu(struct thread_data *t, struct core_data *c, @@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; + t->smi_count = 0; t->extra_delta32 = 0; t->extra_delta64 = 0; @@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -4; } + if (do_smi) { + if (get_msr(cpu, MSR_SMI_COUNT, &msr)) + return -5; + t->smi_count = msr & 0xFFFFFFFF; + } if (extra_delta_offset32) { if (get_msr(cpu, extra_delta_offset32, &msr)) return -5; @@ -908,8 +928,7 @@ void print_verbose_header(void) get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); - if (verbose) - fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -919,13 +938,16 @@ void print_verbose_header(void) fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", ratio, bclk, ratio * bclk); + get_msr(0, MSR_IA32_POWER_CTL, &msr); + fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n", + msr, msr & 0x2 ? "EN" : "DIS"); + if (!do_ivt_turbo_ratio_limit) goto print_nhm_turbo_ratio_limits; get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); - if (verbose) - fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1016,8 +1038,7 @@ print_nhm_turbo_ratio_limits: get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - if (verbose) - fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ case 0x3E: /* IVB Xeon */ + case 0x3C: /* HSW */ + case 0x3F: /* HSW */ + case 0x45: /* HSW */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model) switch (model) { case 0x2A: case 0x3A: + case 0x3C: /* HSW */ + case 0x3F: /* HSW */ + case 0x45: /* HSW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; break; case 0x2D: @@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model) case 0x2D: case 0x3A: /* IVB */ case 0x3E: /* IVB Xeon */ + case 0x3C: /* HSW */ + case 0x3F: /* HSW */ + case 0x45: /* HSW */ return 1; } return 0; @@ -1883,6 +1913,7 @@ void check_cpuid() do_nehalem_platform_info = genuine_intel && has_invariant_tsc; do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ + do_smi = do_nhm_cstates; do_snb_cstates = is_snb(family, model); bclk = discover_bclk(family, model); @@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv) case 'c': sscanf(optarg, "%x", &extra_delta_offset32); break; - case 's': - extra_delta_offset32 = 0x34; /* SMI counter */ - break; case 'C': sscanf(optarg, "%x", &extra_delta_offset64); break; @@ -2248,7 +2276,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose) - fprintf(stderr, "turbostat v3.0 November 23, 2012" + fprintf(stderr, "turbostat v3.2 February 11, 2013" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); |