diff options
64 files changed, 709 insertions, 239 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0c5c5f4fce6..5e22c3f1f8bd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -315,8 +315,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. CPU-intensive style benchmark, and it can vary highly in a microbenchmark depending on workload and compiler. - 1: only for 32-bit processes - 2: only for 64-bit processes + 32: only for 32-bit processes + 64: only for 64-bit processes on: enable for both 32- and 64-bit processes off: disable for both 32- and 64-bit processes diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c475379199b1..8e9c98edc068 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -353,15 +353,15 @@ validate_group(struct perf_event *event) fake_pmu.used_mask = fake_used_mask; if (!validate_event(&fake_pmu, leader)) - return -ENOSPC; + return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { if (!validate_event(&fake_pmu, sibling)) - return -ENOSPC; + return -EINVAL; } if (!validate_event(&fake_pmu, event)) - return -ENOSPC; + return -EINVAL; return 0; } diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 4f2971bcf8e5..315fc0b250f8 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -623,7 +623,7 @@ static int mipspmu_event_init(struct perf_event *event) if (!atomic_inc_not_zero(&active_events)) { if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) { atomic_dec(&active_events); - return -ENOSPC; + return -EINVAL; } mutex_lock(&pmu_reserve_mutex); @@ -732,15 +732,15 @@ static int validate_group(struct perf_event *event) memset(&fake_cpuc, 0, sizeof(fake_cpuc)); if (!validate_event(&fake_cpuc, leader)) - return -ENOSPC; + return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { if (!validate_event(&fake_cpuc, sibling)) - return -ENOSPC; + return -EINVAL; } if (!validate_event(&fake_cpuc, event)) - return -ENOSPC; + return -EINVAL; return 0; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 524d23b8610c..4f289ff0b7fe 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -599,10 +599,10 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ - if (bits) { - skey ^= bits; - page_set_storage_key(address, skey, 1); - } + if (bits & _PAGE_CHANGED) + page_set_storage_key(address, skey ^ bits, 1); + else if (bits) + page_reset_referenced(address); /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ /* Get host changed & referenced bits from pgste */ diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 450931a45b68..573bc29551ef 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -296,13 +296,6 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; - if (addr == (addr_t) &dummy->regs.psw.addr) - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -614,11 +607,6 @@ static int __poke_user_compat(struct task_struct *child, /* Transfer 31 bit amode bit to psw mask. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); - /* - * The debugger changed the instruction address, - * reset system call restart, see signal.c:do_signal - */ - task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; @@ -905,6 +893,14 @@ static int s390_last_break_get(struct task_struct *target, return 0; } +static int s390_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -951,6 +947,7 @@ static const struct user_regset s390_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_last_break_get, + .set = s390_last_break_set, }, #endif [REGSET_SYSTEM_CALL] = { @@ -1116,6 +1113,14 @@ static int s390_compat_last_break_get(struct task_struct *target, return 0; } +static int s390_compat_last_break_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -1139,6 +1144,7 @@ static const struct user_regset s390_compat_regsets[] = { .size = sizeof(long), .align = sizeof(long), .get = s390_compat_last_break_get, + .set = s390_compat_last_break_set, }, [REGSET_SYSTEM_CALL] = { .core_note_type = NT_S390_SYSTEM_CALL, diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e58a462949b1..e54c4ff8abaa 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -579,7 +579,7 @@ static unsigned long __init find_crash_base(unsigned long crash_size, *msg = "first memory chunk must be at least crashkernel size"; return 0; } - if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + if (OLDMEM_BASE && crash_size == OLDMEM_SIZE) return OLDMEM_BASE; for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 05a85bc14c98..7f6f9f354545 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -460,9 +460,9 @@ void do_signal(struct pt_regs *regs) regs->svc_code >> 16); break; } - /* No longer in a system call */ - clear_thread_flag(TIF_SYSCALL); } + /* No longer in a system call */ + clear_thread_flag(TIF_SYSCALL); if ((is_compat_task() ? handle_signal32(signr, &ka, &info, oldset, regs) : @@ -486,6 +486,7 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ + clear_thread_flag(TIF_SYSCALL); if (current_thread_info()->system_call) { regs->svc_code = current_thread_info()->system_call; switch (regs->gprs[2]) { @@ -500,9 +501,6 @@ void do_signal(struct pt_regs *regs) regs->gprs[2] = regs->orig_gpr2; set_thread_flag(TIF_SYSCALL); break; - default: - clear_thread_flag(TIF_SYSCALL); - break; } } diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 4420993acc47..925b605eb5c6 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -3,11 +3,15 @@ #include <linux/notifier.h> -#define IPCMSG_VRTC 0xFA /* Set vRTC device */ - -/* Command id associated with message IPCMSG_VRTC */ -#define IPC_CMD_VRTC_SETTIME 1 /* Set time */ -#define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ +#define IPCMSG_WARM_RESET 0xF0 +#define IPCMSG_COLD_RESET 0xF1 +#define IPCMSG_SOFT_RESET 0xF2 +#define IPCMSG_COLD_BOOT 0xF3 + +#define IPCMSG_VRTC 0xFA /* Set vRTC device */ + /* Command id associated with message IPCMSG_VRTC */ + #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ + #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ /* Read single register */ int intel_scu_ipc_ioread8(u16 addr, u8 *data); diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h index e6283129c821..93f79094c224 100644 --- a/arch/x86/include/asm/mrst.h +++ b/arch/x86/include/asm/mrst.h @@ -31,11 +31,20 @@ enum mrst_cpu_type { }; extern enum mrst_cpu_type __mrst_cpu_chip; + +#ifdef CONFIG_X86_INTEL_MID + static inline enum mrst_cpu_type mrst_identify_cpu(void) { return __mrst_cpu_chip; } +#else /* !CONFIG_X86_INTEL_MID */ + +#define mrst_identify_cpu() (0) + +#endif /* !CONFIG_X86_INTEL_MID */ + enum mrst_timer_options { MRST_TIMER_DEFAULT, MRST_TIMER_APBT_ONLY, diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 084ef95274cd..95203d40ffdd 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -169,7 +169,14 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high) return native_write_msr_safe(msr, low, high); } -/* rdmsr with exception handling */ +/* + * rdmsr with exception handling. + * + * Please note that the exception handling works only after we've + * switched to the "smart" #GP handler in trap_init() which knows about + * exception tables - using this macro earlier than that causes machine + * hangs on boxes which do not implement the @msr in the first argument. + */ #define rdmsr_safe(msr, p1, p2) \ ({ \ int __err; \ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index fa7b9176b76c..431793e5d484 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -32,6 +32,22 @@ extern int no_timer_check; * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" + * + * In: + * + * ns = cycles * cyc2ns_scale / SC + * + * Although we may still have enough bits to store the value of ns, + * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, + * leading to an incorrect result. + * + * To avoid this, we can decompose 'cycles' into quotient and remainder + * of division by SC. Then, + * + * ns = (quot * SC + rem) * cyc2ns_scale / SC + * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC + * + * - sqazi@google.com */ DECLARE_PER_CPU(unsigned long, cyc2ns); @@ -41,9 +57,14 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { + unsigned long long quot; + unsigned long long rem; int cpu = smp_processor_id(); unsigned long long ns = per_cpu(cyc2ns_offset, cpu); - ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; + quot = (cyc >> CYC2NS_SCALE_FACTOR); + rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); + ns += quot * per_cpu(cyc2ns, cpu) + + ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR); return ns; } diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 10474fb1185d..cf1d73643f60 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -57,6 +57,7 @@ #define UV1_HUB_PART_NUMBER 0x88a5 #define UV2_HUB_PART_NUMBER 0x8eb8 +#define UV2_HUB_PART_NUMBER_X 0x1111 /* Compat: if this #define is present, UV headers support UV2 */ #define UV2_HUB_IS_SUPPORTED 1 diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 62ae3001ae02..9d59bbacd4e3 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -93,6 +93,8 @@ static int __init early_get_pnodeid(void) if (node_id.s.part_number == UV2_HUB_PART_NUMBER) uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; + if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) + uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; uv_hub_info->hub_revision = uv_min_hub_revision_id; pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index a71efcdbb092..97b26356e9ee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -547,6 +547,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, if (tmp != mask_lo) { printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND); mask_lo = tmp; } } @@ -693,6 +694,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); + wbinvd(); } static void post_set(void) __releases(set_atomicity_lock) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 640891014b2a..2bda212a0010 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -312,12 +312,8 @@ int x86_setup_perfctr(struct perf_event *event) return -EOPNOTSUPP; } - /* - * Do not allow config1 (extended registers) to propagate, - * there's no sane user-space generalization yet: - */ if (attr->type == PERF_TYPE_RAW) - return 0; + return x86_pmu_extra_regs(event->attr.config, event); if (attr->type == PERF_TYPE_HW_CACHE) return set_ext_hw_attr(hwc, event); @@ -588,7 +584,7 @@ done: x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); } } - return num ? -ENOSPC : 0; + return num ? -EINVAL : 0; } /* @@ -607,7 +603,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, if (is_x86_event(leader)) { if (n >= max_count) - return -ENOSPC; + return -EINVAL; cpuc->event_list[n] = leader; n++; } @@ -620,7 +616,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, continue; if (n >= max_count) - return -ENOSPC; + return -EINVAL; cpuc->event_list[n] = event; n++; @@ -1316,7 +1312,7 @@ static int validate_event(struct perf_event *event) c = x86_pmu.get_event_constraints(fake_cpuc, event); if (!c || !c->weight) - ret = -ENOSPC; + ret = -EINVAL; if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(fake_cpuc, event); @@ -1341,7 +1337,7 @@ static int validate_group(struct perf_event *event) { struct perf_event *leader = event->group_leader; struct cpu_hw_events *fake_cpuc; - int ret = -ENOSPC, n; + int ret = -EINVAL, n; fake_cpuc = allocate_fake_cpuc(); if (IS_ERR(fake_cpuc)) diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index ab6343d21825..3b8a2d30d14e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -199,8 +199,7 @@ static int force_ibs_eilvt_setup(void) goto out; } - pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); - pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); + pr_info("IBS: LVT offset %d assigned\n", offset); return 0; out: @@ -265,19 +264,23 @@ perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *h static __init int amd_ibs_init(void) { u32 caps; - int ret; + int ret = -EINVAL; caps = __get_ibs_caps(); if (!caps) return -ENODEV; /* ibs not supported by the cpu */ - if (!ibs_eilvt_valid()) { - ret = force_ibs_eilvt_setup(); - if (ret) { - pr_err("Failed to setup IBS, %d\n", ret); - return ret; - } - } + /* + * Force LVT offset assignment for family 10h: The offsets are + * not assigned by the BIOS for this family, so the OS is + * responsible for doing it. If the OS assignment fails, fall + * back to BIOS settings and try to setup this. + */ + if (boot_cpu_data.x86 == 0x10) + force_ibs_eilvt_setup(); + + if (!ibs_eilvt_valid()) + goto out; get_online_cpus(); ibs_caps = caps; @@ -287,7 +290,11 @@ static __init int amd_ibs_init(void) smp_call_function(setup_APIC_ibs, NULL, 1); put_online_cpus(); - return perf_event_ibs_init(); + ret = perf_event_ibs_init(); +out: + if (ret) + pr_err("Failed to setup IBS, %d\n", ret); + return ret; } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2be5ebe99872..8d601b18bf9f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1545,6 +1545,13 @@ static void intel_clovertown_quirks(void) x86_pmu.pebs_constraints = NULL; } +static void intel_sandybridge_quirks(void) +{ + printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + x86_pmu.pebs = 0; + x86_pmu.pebs_constraints = NULL; +} + __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -1694,6 +1701,7 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ + x86_pmu.quirks = intel_sandybridge_quirks; case 45: /* SandyBridge, "Romely-EP" */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c0d238f49db8..73da6b64f5b7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -493,6 +493,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long from = cpuc->lbr_entries[0].from; unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; + int is_64bit = 0; /* * We don't need to fixup if the PEBS assist is fault like @@ -544,7 +545,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) } else kaddr = (void *)to; - kernel_insn_init(&insn, kaddr); +#ifdef CONFIG_X86_64 + is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); +#endif + insn_init(&insn, kaddr, is_64bit); insn_get_length(&insn); to += insn.length; } while (to < ip); diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 492bf1358a7c..ef484d9d0a25 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1268,7 +1268,7 @@ reserve: } done: - return num ? -ENOSPC : 0; + return num ? -EINVAL : 0; } static __initconst const struct x86_pmu p4_pmu = { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index acf8fbf8fbda..69bca468c47a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -38,6 +38,9 @@ static inline void stack_overflow_check(struct pt_regs *regs) #ifdef CONFIG_DEBUG_STACKOVERFLOW u64 curbase = (u64)task_stack_page(current); + if (user_mode_vm(regs)) + return; + WARN_ONCE(regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && regs->sp < curbase + sizeof(struct thread_info) + diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f2d2a664e797..9d46f5e43b51 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -256,7 +256,7 @@ static int __init microcode_dev_init(void) return 0; } -static void microcode_dev_exit(void) +static void __exit microcode_dev_exit(void) { misc_deregister(µcode_dev); } @@ -519,10 +519,8 @@ static int __init microcode_init(void) microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); + if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); - } get_online_cpus(); mutex_lock(µcode_mutex); @@ -532,14 +530,12 @@ static int __init microcode_init(void) mutex_unlock(µcode_mutex); put_online_cpus(); - if (error) { - platform_device_unregister(microcode_pdev); - return error; - } + if (error) + goto out_pdev; error = microcode_dev_init(); if (error) - return error; + goto out_sysdev_driver; register_syscore_ops(&mc_syscore_ops); register_hotcpu_notifier(&mc_cpu_notifier); @@ -548,6 +544,20 @@ static int __init microcode_init(void) " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); return 0; + +out_sysdev_driver: + get_online_cpus(); + mutex_lock(µcode_mutex); + + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + + mutex_unlock(µcode_mutex); + put_online_cpus(); + +out_pdev: + platform_device_unregister(microcode_pdev); + return error; + } module_init(microcode_init); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9103b89c145a..0741b062a304 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m) } #endif + set_bit(m->busid, mp_bus_not_pci); if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->busid, mp_bus_not_pci); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->busid] = MP_BUS_ISA; #endif diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index b78643d0f9a5..03920a15a632 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, quirk_amd_nb_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5, + quirk_amd_nb_node); + #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e334be1182b9..37a458b521a6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -124,7 +124,7 @@ __setup("reboot=", reboot_setup); */ /* - * Some machines require the "reboot=b" commandline option, + * Some machines require the "reboot=b" or "reboot=k" commandline options, * this quirk makes that automatic. */ static int __init set_bios_reboot(const struct dmi_system_id *d) @@ -136,6 +136,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) return 0; } +static int __init set_kbd_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_KBD) { + reboot_type = BOOT_KBD; + printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); + } + return 0; +} + static struct dmi_system_id __initdata reboot_dmi_table[] = { { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, @@ -295,7 +304,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, { /* Handle reboot issue on Acer Aspire one */ - .callback = set_bios_reboot, + .callback = set_kbd_reboot, .ident = "Acer Aspire One A110", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), @@ -443,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), }, }, + { /* Handle problems with rebooting on the OptiPlex 990. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + }, + }, { } }; diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 348ce016a835..af6db6ec5b2a 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -12,6 +12,7 @@ #include <asm/vsyscall.h> #include <asm/x86_init.h> #include <asm/time.h> +#include <asm/mrst.h> #ifdef CONFIG_X86_32 /* @@ -242,6 +243,10 @@ static __init int add_rtc_cmos(void) if (of_have_populated_dt()) return 0; + /* Intel MID platforms don't have ioport rtc */ + if (mrst_identify_cpu()) + return -ENODEV; + platform_device_register(&rtc_device); dev_info(&rtc_device.dev, "registered platform RTC device (no PNP device found)\n"); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index b49962662101..f4f29b19fac5 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -45,6 +45,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); + arch_flush_lazy_mmu_mode(); return (void *)vaddr; } @@ -88,6 +89,7 @@ void __kunmap_atomic(void *kvaddr) */ kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_DEBUG_HIGHMEM else { diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c54deca..f148cf652678 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +static int nmi_timer; int __init oprofile_arch_init(struct oprofile_operations *ops) { @@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) #ifdef CONFIG_X86_LOCAL_APIC ret = op_nmi_init(ops); #endif + nmi_timer = (ret != 0); #ifdef CONFIG_X86_IO_APIC - if (ret < 0) + if (nmi_timer) ret = op_nmi_timer_init(ops); #endif ops->backtrace = x86_backtrace; @@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) void oprofile_arch_exit(void) { #ifdef CONFIG_X86_LOCAL_APIC - op_nmi_exit(); + if (!nmi_timer) + op_nmi_exit(); #endif } diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index b1489a06a49d..ad4ec1cb097e 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -76,6 +76,20 @@ struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; EXPORT_SYMBOL_GPL(sfi_mrtc_array); int sfi_mrtc_num; +static void mrst_power_off(void) +{ + if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1); +} + +static void mrst_reboot(void) +{ + if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); + else + intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); +} + /* parse all the mtimer info to a static mtimer array */ static int __init sfi_parse_mtmr(struct sfi_table_header *table) { @@ -265,17 +279,6 @@ static int mrst_i8042_detect(void) return 0; } -/* Reboot and power off are handled by the SCU on a MID device */ -static void mrst_power_off(void) -{ - intel_scu_ipc_simple_command(0xf1, 1); -} - -static void mrst_reboot(void) -{ - intel_scu_ipc_simple_command(0xf1, 0); -} - /* * Moorestown does not have external NMI source nor port 0x61 to report * NMI status. The possible NMI sources are from pmu as a result of NMI @@ -484,6 +487,46 @@ static void __init *max7315_platform_data(void *info) return max7315; } +static void *tca6416_platform_data(void *info) +{ + static struct pca953x_platform_data tca6416; + struct i2c_board_info *i2c_info = info; + int gpio_base, intr; + char base_pin_name[SFI_NAME_LEN + 1]; + char intr_pin_name[SFI_NAME_LEN + 1]; + + strcpy(i2c_info->type, "tca6416"); + strcpy(base_pin_name, "tca6416_base"); + strcpy(intr_pin_name, "tca6416_int"); + + gpio_base = get_gpio_by_name(base_pin_name); + intr = get_gpio_by_name(intr_pin_name); + + if (gpio_base == -1) + return NULL; + tca6416.gpio_base = gpio_base; + if (intr != -1) { + i2c_info->irq = intr + MRST_IRQ_OFFSET; + tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET; + } else { + i2c_info->irq = -1; + tca6416.irq_base = -1; + } + return &tca6416; +} + +static void *mpu3050_platform_data(void *info) +{ + struct i2c_board_info *i2c_info = info; + int intr = get_gpio_by_name("mpu3050_int"); + + if (intr == -1) + return NULL; + + i2c_info->irq = intr + MRST_IRQ_OFFSET; + return NULL; +} + static void __init *emc1403_platform_data(void *info) { static short intr2nd_pdata; @@ -646,12 +689,15 @@ static void *msic_ocd_platform_data(void *info) static const struct devs_id __initconst device_ids[] = { {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data}, {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data}, + {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data}, {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data}, {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data}, + {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data}, {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data}, {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data}, {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data}, + {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data}, /* MSIC subdevices */ {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data}, diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index dbcb0bcfd8da..4e018d6a7639 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_ARCH_DAVINCI) += gpio-davinci.o obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o -obj-$(CONFIG_MACH_KS8695) += gpio-ks8695.o +obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o obj-$(CONFIG_GPIO_LANGWELL) += gpio-langwell.o obj-$(CONFIG_ARCH_LPC32XX) += gpio-lpc32xx.o obj-$(CONFIG_GPIO_MAX730X) += gpio-max730x.o diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c0c7820d4c46..a004c3945c67 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3524,7 +3524,7 @@ found: return 0; } -int dmar_parse_rmrr_atsr_dev(void) +int __init dmar_parse_rmrr_atsr_dev(void) { struct dmar_rmrr_unit *rmrr, *rmrr_n; struct dmar_atsr_unit *atsr, *atsr_n; diff --git a/drivers/iommu/intr_remapping.c b/drivers/iommu/intr_remapping.c index 07c9f189f314..6777ca049471 100644 --- a/drivers/iommu/intr_remapping.c +++ b/drivers/iommu/intr_remapping.c @@ -773,7 +773,7 @@ int __init parse_ioapics_under_ir(void) return ir_supported; } -int ir_dev_scope_init(void) +int __init ir_dev_scope_init(void) { if (!intr_remapping_enabled) return 0; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index dccd8636095c..f8c752e408a6 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,26 +239,45 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } +static int timer_mode; + static int __init oprofile_init(void) { int err; + /* always init architecture to setup backtrace support */ err = oprofile_arch_init(&oprofile_ops); - if (err < 0 || timer) { - printk(KERN_INFO "oprofile: using timer interrupt.\n"); + + timer_mode = err || timer; /* fall back to timer mode on errors */ + if (timer_mode) { + if (!err) + oprofile_arch_exit(); err = oprofile_timer_init(&oprofile_ops); if (err) return err; } - return oprofilefs_register(); + + err = oprofilefs_register(); + if (!err) + return 0; + + /* failed */ + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); + + return err; } static void __exit oprofile_exit(void) { - oprofile_timer_exit(); oprofilefs_unregister(); - oprofile_arch_exit(); + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); } diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 3ef44624f510..878fba126582 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -110,6 +110,7 @@ int oprofile_timer_init(struct oprofile_operations *ops) ops->start = oprofile_hrtimer_start; ops->stop = oprofile_hrtimer_stop; ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); return 0; } diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c index cffcb7c00b00..01fa671ec97f 100644 --- a/drivers/power/intel_mid_battery.c +++ b/drivers/power/intel_mid_battery.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(debug, "Flag to enable PMIC Battery debug messages."); #define PMIC_BATT_CHR_SBATDET_MASK (1 << 5) #define PMIC_BATT_CHR_SDCLMT_MASK (1 << 6) #define PMIC_BATT_CHR_SUSBOVP_MASK (1 << 7) -#define PMIC_BATT_CHR_EXCPT_MASK 0xC6 +#define PMIC_BATT_CHR_EXCPT_MASK 0x86 + #define PMIC_BATT_ADC_ACCCHRG_MASK (1 << 31) #define PMIC_BATT_ADC_ACCCHRGVAL_MASK 0x7FFFFFFF @@ -304,11 +305,6 @@ static void pmic_battery_read_status(struct pmic_power_module_info *pbi) pbi->batt_status = POWER_SUPPLY_STATUS_NOT_CHARGING; pmic_battery_log_event(BATT_EVENT_BATOVP_EXCPT); batt_exception = 1; - } else if (r8 & PMIC_BATT_CHR_SDCLMT_MASK) { - pbi->batt_health = POWER_SUPPLY_HEALTH_OVERVOLTAGE; - pbi->batt_status = POWER_SUPPLY_STATUS_NOT_CHARGING; - pmic_battery_log_event(BATT_EVENT_DCLMT_EXCPT); - batt_exception = 1; } else if (r8 & PMIC_BATT_CHR_STEMP_MASK) { pbi->batt_health = POWER_SUPPLY_HEALTH_OVERHEAT; pbi->batt_status = POWER_SUPPLY_STATUS_NOT_CHARGING; @@ -316,6 +312,10 @@ static void pmic_battery_read_status(struct pmic_power_module_info *pbi) batt_exception = 1; } else { pbi->batt_health = POWER_SUPPLY_HEALTH_GOOD; + if (r8 & PMIC_BATT_CHR_SDCLMT_MASK) { + /* PMIC will change charging current automatically */ + pmic_battery_log_event(BATT_EVENT_DCLMT_EXCPT); + } } } diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index e8326f26fa2f..dc4c2748bbc3 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -63,7 +63,7 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) */ delta = timespec_sub(old_system, old_rtc); delta_delta = timespec_sub(delta, old_delta); - if (abs(delta_delta.tv_sec) >= 2) { + if (delta_delta.tv_sec < -2 || delta_delta.tv_sec >= 2) { /* * if delta_delta is too large, assume time correction * has occured and set old_delta to the current delta. @@ -97,9 +97,8 @@ static int rtc_resume(struct device *dev) rtc_tm_to_time(&tm, &new_rtc.tv_sec); new_rtc.tv_nsec = 0; - if (new_rtc.tv_sec <= old_rtc.tv_sec) { - if (new_rtc.tv_sec < old_rtc.tv_sec) - pr_debug("%s: time travel!\n", dev_name(&rtc->dev)); + if (new_rtc.tv_sec < old_rtc.tv_sec) { + pr_debug("%s: time travel!\n", dev_name(&rtc->dev)); return 0; } @@ -116,7 +115,8 @@ static int rtc_resume(struct device *dev) sleep_time = timespec_sub(sleep_time, timespec_sub(new_system, old_system)); - timekeeping_inject_sleeptime(&sleep_time); + if (sleep_time.tv_sec >= 0) + timekeeping_inject_sleeptime(&sleep_time); return 0; } diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 8e286259a007..fa4d9f324189 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -319,6 +319,20 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } EXPORT_SYMBOL_GPL(rtc_read_alarm); +static int ___rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) +{ + int err; + + if (!rtc->ops) + err = -ENODEV; + else if (!rtc->ops->set_alarm) + err = -EINVAL; + else + err = rtc->ops->set_alarm(rtc->dev.parent, alarm); + + return err; +} + static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { struct rtc_time tm; @@ -342,14 +356,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) * over right here, before we set the alarm. */ - if (!rtc->ops) - err = -ENODEV; - else if (!rtc->ops->set_alarm) - err = -EINVAL; - else - err = rtc->ops->set_alarm(rtc->dev.parent, alarm); - - return err; + return ___rtc_set_alarm(rtc, alarm); } int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) @@ -763,6 +770,20 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) return 0; } +static void rtc_alarm_disable(struct rtc_device *rtc) +{ + struct rtc_wkalrm alarm; + struct rtc_time tm; + + __rtc_read_time(rtc, &tm); + + alarm.time = rtc_ktime_to_tm(ktime_add(rtc_tm_to_ktime(tm), + ktime_set(300, 0))); + alarm.enabled = 0; + + ___rtc_set_alarm(rtc, &alarm); +} + /** * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue * @rtc rtc device @@ -784,8 +805,10 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer) struct rtc_wkalrm alarm; int err; next = timerqueue_getnext(&rtc->timerqueue); - if (!next) + if (!next) { + rtc_alarm_disable(rtc); return; + } alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); @@ -847,7 +870,8 @@ again: err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) goto again; - } + } else + rtc_alarm_disable(rtc); mutex_unlock(&rtc->ops_lock); } diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 75c3f1f8fd43..a84631a7391d 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -529,10 +529,7 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data) int chsc_chp_vary(struct chp_id chpid, int on) { struct channel_path *chp = chpid_to_chp(chpid); - struct chp_link link; - memset(&link, 0, sizeof(struct chp_link)); - link.chpid = chpid; /* Wait until previous actions have settled. */ css_wait_for_slow_path(); /* @@ -542,10 +539,10 @@ int chsc_chp_vary(struct chp_id chpid, int on) /* Try to update the channel path descritor. */ chsc_determine_base_channel_path_desc(chpid, &chp->desc); for_each_subchannel_staged(s390_subchannel_vary_chpid_on, - __s390_vary_chpid_on, &link); + __s390_vary_chpid_on, &chpid); } else for_each_subchannel_staged(s390_subchannel_vary_chpid_off, - NULL, &link); + NULL, &chpid); return 0; } diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 155a82bcb9e5..4a1ff5c2eb88 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -68,8 +68,13 @@ struct schib { __u8 mda[4]; /* model dependent area */ } __attribute__ ((packed,aligned(4))); +/* + * When rescheduled, todo's with higher values will overwrite those + * with lower values. + */ enum sch_todo { SCH_TODO_NOTHING, + SCH_TODO_EVAL, SCH_TODO_UNREG, }; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 92d7324acb1c..21908e67bf67 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -195,51 +195,6 @@ void css_sch_device_unregister(struct subchannel *sch) } EXPORT_SYMBOL_GPL(css_sch_device_unregister); -static void css_sch_todo(struct work_struct *work) -{ - struct subchannel *sch; - enum sch_todo todo; - - sch = container_of(work, struct subchannel, todo_work); - /* Find out todo. */ - spin_lock_irq(sch->lock); - todo = sch->todo; - CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid, - sch->schid.sch_no, todo); - sch->todo = SCH_TODO_NOTHING; - spin_unlock_irq(sch->lock); - /* Perform todo. */ - if (todo == SCH_TODO_UNREG) - css_sch_device_unregister(sch); - /* Release workqueue ref. */ - put_device(&sch->dev); -} - -/** - * css_sched_sch_todo - schedule a subchannel operation - * @sch: subchannel - * @todo: todo - * - * Schedule the operation identified by @todo to be performed on the slow path - * workqueue. Do nothing if another operation with higher priority is already - * scheduled. Needs to be called with subchannel lock held. - */ -void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo) -{ - CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n", - sch->schid.ssid, sch->schid.sch_no, todo); - if (sch->todo >= todo) - return; - /* Get workqueue ref. */ - if (!get_device(&sch->dev)) - return; - sch->todo = todo; - if (!queue_work(cio_work_q, &sch->todo_work)) { - /* Already queued, release workqueue ref. */ - put_device(&sch->dev); - } -} - static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw) { int i; @@ -466,6 +421,65 @@ static void css_evaluate_subchannel(struct subchannel_id schid, int slow) css_schedule_eval(schid); } +/** + * css_sched_sch_todo - schedule a subchannel operation + * @sch: subchannel + * @todo: todo + * + * Schedule the operation identified by @todo to be performed on the slow path + * workqueue. Do nothing if another operation with higher priority is already + * scheduled. Needs to be called with subchannel lock held. + */ +void css_sched_sch_todo(struct subchannel *sch, enum sch_todo todo) +{ + CIO_MSG_EVENT(4, "sch_todo: sched sch=0.%x.%04x todo=%d\n", + sch->schid.ssid, sch->schid.sch_no, todo); + if (sch->todo >= todo) + return; + /* Get workqueue ref. */ + if (!get_device(&sch->dev)) + return; + sch->todo = todo; + if (!queue_work(cio_work_q, &sch->todo_work)) { + /* Already queued, release workqueue ref. */ + put_device(&sch->dev); + } +} + +static void css_sch_todo(struct work_struct *work) +{ + struct subchannel *sch; + enum sch_todo todo; + int ret; + + sch = container_of(work, struct subchannel, todo_work); + /* Find out todo. */ + spin_lock_irq(sch->lock); + todo = sch->todo; + CIO_MSG_EVENT(4, "sch_todo: sch=0.%x.%04x, todo=%d\n", sch->schid.ssid, + sch->schid.sch_no, todo); + sch->todo = SCH_TODO_NOTHING; + spin_unlock_irq(sch->lock); + /* Perform todo. */ + switch (todo) { + case SCH_TODO_NOTHING: + break; + case SCH_TODO_EVAL: + ret = css_evaluate_known_subchannel(sch, 1); + if (ret == -EAGAIN) { + spin_lock_irq(sch->lock); + css_sched_sch_todo(sch, todo); + spin_unlock_irq(sch->lock); + } + break; + case SCH_TODO_UNREG: + css_sch_device_unregister(sch); + break; + } + /* Release workqueue ref. */ + put_device(&sch->dev); +} + static struct idset *slow_subchannel_set; static spinlock_t slow_subchannel_lock; static wait_queue_head_t css_eval_wq; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index d734f4a0ecac..47269858ecb6 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1868,9 +1868,9 @@ static void __ccw_device_pm_restore(struct ccw_device *cdev) */ cdev->private->flags.resuming = 1; cdev->private->path_new_mask = LPM_ANYPATH; - css_schedule_eval(sch->schid); + css_sched_sch_todo(sch, SCH_TODO_EVAL); spin_unlock_irq(sch->lock); - css_complete_work(); + css_wait_for_slow_path(); /* cdev may have been moved to a different subchannel. */ sch = to_subchannel(cdev->dev.parent); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 52c233fa2b12..1b853513c891 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -496,8 +496,26 @@ static void ccw_device_reset_path_events(struct ccw_device *cdev) cdev->private->pgid_reset_mask = 0; } -void -ccw_device_verify_done(struct ccw_device *cdev, int err) +static void create_fake_irb(struct irb *irb, int type) +{ + memset(irb, 0, sizeof(*irb)); + if (type == FAKE_CMD_IRB) { + struct cmd_scsw *scsw = &irb->scsw.cmd; + scsw->cc = 1; + scsw->fctl = SCSW_FCTL_START_FUNC; + scsw->actl = SCSW_ACTL_START_PEND; + scsw->stctl = SCSW_STCTL_STATUS_PEND; + } else if (type == FAKE_TM_IRB) { + struct tm_scsw *scsw = &irb->scsw.tm; + scsw->x = 1; + scsw->cc = 1; + scsw->fctl = SCSW_FCTL_START_FUNC; + scsw->actl = SCSW_ACTL_START_PEND; + scsw->stctl = SCSW_STCTL_STATUS_PEND; + } +} + +void ccw_device_verify_done(struct ccw_device *cdev, int err) { struct subchannel *sch; @@ -520,12 +538,8 @@ callback: ccw_device_done(cdev, DEV_STATE_ONLINE); /* Deliver fake irb to device driver, if needed. */ if (cdev->private->flags.fake_irb) { - memset(&cdev->private->irb, 0, sizeof(struct irb)); - cdev->private->irb.scsw.cmd.cc = 1; - cdev->private->irb.scsw.cmd.fctl = SCSW_FCTL_START_FUNC; - cdev->private->irb.scsw.cmd.actl = SCSW_ACTL_START_PEND; - cdev->private->irb.scsw.cmd.stctl = - SCSW_STCTL_STATUS_PEND; + create_fake_irb(&cdev->private->irb, + cdev->private->flags.fake_irb); cdev->private->flags.fake_irb = 0; if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index f98698d5735e..ec7fb6d3b479 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -198,7 +198,7 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, if (cdev->private->state == DEV_STATE_VERIFY) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { - cdev->private->flags.fake_irb = 1; + cdev->private->flags.fake_irb = FAKE_CMD_IRB; cdev->private->intparm = intparm; return 0; } else @@ -213,9 +213,9 @@ int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, ret = cio_set_options (sch, flags); if (ret) return ret; - /* Adjust requested path mask to excluded varied off paths. */ + /* Adjust requested path mask to exclude unusable paths. */ if (lpm) { - lpm &= sch->opm; + lpm &= sch->lpm; if (lpm == 0) return -EACCES; } @@ -605,11 +605,21 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw, sch = to_subchannel(cdev->dev.parent); if (!sch->schib.pmcw.ena) return -EINVAL; + if (cdev->private->state == DEV_STATE_VERIFY) { + /* Remember to fake irb when finished. */ + if (!cdev->private->flags.fake_irb) { + cdev->private->flags.fake_irb = FAKE_TM_IRB; + cdev->private->intparm = intparm; + return 0; + } else + /* There's already a fake I/O around. */ + return -EBUSY; + } if (cdev->private->state != DEV_STATE_ONLINE) return -EIO; - /* Adjust requested path mask to excluded varied off paths. */ + /* Adjust requested path mask to exclude unusable paths. */ if (lpm) { - lpm &= sch->opm; + lpm &= sch->lpm; if (lpm == 0) return -EACCES; } diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 2ebb492a5c17..76253dfcc1be 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -111,6 +111,9 @@ enum cdev_todo { CDEV_TODO_UNREG_EVAL, }; +#define FAKE_CMD_IRB 1 +#define FAKE_TM_IRB 2 + struct ccw_device_private { struct ccw_device *cdev; struct subchannel *sch; @@ -138,7 +141,7 @@ struct ccw_device_private { unsigned int doverify:1; /* delayed path verification */ unsigned int donotify:1; /* call notify function */ unsigned int recog_done:1; /* dev. recog. complete */ - unsigned int fake_irb:1; /* deliver faked irb */ + unsigned int fake_irb:2; /* deliver faked irb */ unsigned int resuming:1; /* recognition while resume */ unsigned int pgroup:1; /* pathgroup is set up */ unsigned int mpath:1; /* multipathing is set up */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index ec94f049e995..96bbe9d12a79 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1552,6 +1552,8 @@ static void ap_reset(struct ap_device *ap_dev) rc = ap_init_queue(ap_dev->qid); if (rc == -ENODEV) ap_dev->unregistered = 1; + else + __ap_schedule_poll_timer(); } static int __ap_poll_device(struct ap_device *ap_dev, unsigned long *flags) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 94b1e356c02a..32574eef9394 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -126,6 +126,8 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif +#define INIT_TASK_COMM "swapper" + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,7 +164,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = "swapper", \ + .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 172ba70306d1..2aaee0ca9da8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -517,8 +517,12 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 +#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 +#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603 #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604 +#define PCI_DEVICE_ID_AMD_15H_NB_F5 0x1605 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1e9ebe5e0091..b1f89122bf6a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -822,6 +822,7 @@ struct perf_event { int mmap_locked; struct user_struct *mmap_user; struct ring_buffer *rb; + struct list_head rb_entry; /* poll related */ wait_queue_head_t waitq; diff --git a/kernel/events/core.c b/kernel/events/core.c index 0e8457da6f95..600c1629b64d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb); + void __weak perf_event_print_debug(void) { } extern __weak const char *perf_pmu_name(void) @@ -2173,7 +2176,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, perf_event_sched_in(cpuctx, ctx, task); - cpuctx->task_ctx = ctx; + if (ctx->nr_events) + cpuctx->task_ctx = ctx; perf_pmu_enable(ctx->pmu); perf_ctx_unlock(cpuctx, ctx); @@ -3190,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) struct ring_buffer *rb; unsigned int events = POLL_HUP; + /* + * Race between perf_event_set_output() and perf_poll(): perf_poll() + * grabs the rb reference but perf_event_set_output() overrides it. + * Here is the timeline for two threads T1, T2: + * t0: T1, rb = rcu_dereference(event->rb) + * t1: T2, old_rb = event->rb + * t2: T2, event->rb = new rb + * t3: T2, ring_buffer_detach(old_rb) + * t4: T1, ring_buffer_attach(rb1) + * t5: T1, poll_wait(event->waitq) + * + * To avoid this problem, we grab mmap_mutex in perf_poll() + * thereby ensuring that the assignment of the new ring buffer + * and the detachment of the old buffer appear atomic to perf_poll() + */ + mutex_lock(&event->mmap_mutex); + rcu_read_lock(); rb = rcu_dereference(event->rb); - if (rb) + if (rb) { + ring_buffer_attach(event, rb); events = atomic_xchg(&rb->poll, 0); + } rcu_read_unlock(); + mutex_unlock(&event->mmap_mutex); + poll_wait(file, &event->waitq, wait); return events; @@ -3496,6 +3521,49 @@ unlock: return ret; } +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (!list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + if (!list_empty(&event->rb_entry)) + goto unlock; + + list_add(&event->rb_entry, &rb->event_list); +unlock: + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_detach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_wakeup(struct perf_event *event) +{ + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { + wake_up_all(&event->waitq); + } + rcu_read_unlock(); +} + static void rb_free_rcu(struct rcu_head *rcu_head) { struct ring_buffer *rb; @@ -3521,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) static void ring_buffer_put(struct ring_buffer *rb) { + struct perf_event *event, *n; + unsigned long flags; + if (!atomic_dec_and_test(&rb->refcount)) return; + spin_lock_irqsave(&rb->event_lock, flags); + list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + } + spin_unlock_irqrestore(&rb->event_lock, flags); + call_rcu(&rb->rcu_head, rb_free_rcu); } @@ -3546,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); vma->vm_mm->pinned_vm -= event->mmap_locked; rcu_assign_pointer(event->rb, NULL); + ring_buffer_detach(event, rb); mutex_unlock(&event->mmap_mutex); ring_buffer_put(rb); @@ -3700,7 +3779,7 @@ static const struct file_operations perf_fops = { void perf_event_wakeup(struct perf_event *event) { - wake_up_all(&event->waitq); + ring_buffer_wakeup(event); if (event->pending_kill) { kill_fasync(&event->fasync, SIGIO, event->pending_kill); @@ -5822,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->group_entry); INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); + INIT_LIST_HEAD(&event->rb_entry); + init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); @@ -6028,6 +6109,8 @@ set: old_rb = event->rb; rcu_assign_pointer(event->rb, rb); + if (old_rb) + ring_buffer_detach(event, old_rb); ret = 0; unlock: mutex_unlock(&event->mmap_mutex); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..64568a699375 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -22,6 +22,9 @@ struct ring_buffer { local_t lost; /* nr records lost */ long watermark; /* wakeup watermark */ + /* poll crap */ + spinlock_t event_lock; + struct list_head event_list; struct perf_event_mmap_page *user_page; void *data_pages[0]; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0f..7f3011c6b57f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->writable = 1; atomic_set(&rb->refcount, 1); + + INIT_LIST_HEAD(&rb->event_list); + spin_lock_init(&rb->event_lock); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0e2b179bc7b3..1da999f5e746 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) static int irq_wait_for_interrupt(struct irqaction *action) { + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) { @@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action) return 0; } schedule(); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); return -1; } diff --git a/kernel/sched.c b/kernel/sched.c index 0e9344a71be3..d6b149ccf925 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/ctype.h> #include <linux/ftrace.h> #include <linux/slab.h> +#include <linux/init_task.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); * * This waits for completion of a specific task to be signaled. It is * interruptible. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); * This waits for either a completion of a specific task to be * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, @@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); +#if defined(CONFIG_SMP) + sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif } /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7c..a78ed2736ba7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ + long tg_weight; + + /* + * Use this CPU's actual weight instead of the last load_contribution + * to gain a more accurate current total weight. See + * update_cfs_rq_load_contribution(). + */ + tg_weight = atomic_read(&tg->load_weight); + tg_weight -= cfs_rq->load_contribution; + tg_weight += cfs_rq->load.weight; + + return tg_weight; +} + static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long load_weight, load, shares; + long tg_weight, load, shares; + tg_weight = calc_tg_weight(tg, cfs_rq); load = cfs_rq->load.weight; - load_weight = atomic_read(&tg->load_weight); - load_weight += load; - load_weight -= cfs_rq->load_contribution; - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; + if (tg_weight) + shares /= tg_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; @@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) + if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) return; __return_cfs_rq_runtime(cfs_rq); @@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + * s_i = rw_i / \Sum rw_j (1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + * rw_i = { 2, 4, 1, 0 } + * s_i = { 2/7, 4/7, 1/7, 0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + * rw'_i = { 3, 4, 1, 0 } + * s'_i = { 3/8, 4/8, 1/8, 0 } + * + * We can then compute the difference in effective weight by using: + * + * dw_i = S * (s'_i - s_i) (3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group. */ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - if (!tg->parent) + if (!tg->parent) /* the trivial, non-cgroup case */ return wl; for_each_sched_entity(se) { - long lw, w; + long w, W; tg = se->my_q->tg; - w = se->my_q->load.weight; - /* use this cpu's instantaneous contribution */ - lw = atomic_read(&tg->load_weight); - lw -= se->my_q->load_contribution; - lw += w + wg; + /* + * W = @wg + \Sum rw_j + */ + W = wg + calc_tg_weight(tg, se->my_q); - wl += w; + /* + * w = rw_i + @wl + */ + w = se->my_q->load.weight + wl; - if (lw > 0 && wl < lw) - wl = (wl * tg->shares) / lw; + /* + * wl = S * s'_i; see (2) + */ + if (W > 0 && w < W) + wl = (w * tg->shares) / W; else wl = tg->shares; - /* zero point is MIN_SHARES */ + /* + * Per the above, wl is the new se->load.weight value; since + * those are clipped to [MIN_SHARES, ...) do so now. See + * calc_cfs_shares(). + */ if (wl < MIN_SHARES) wl = MIN_SHARES; + + /* + * wl = dw_i = S * (s'_i - s_i); see (3) + */ wl -= se->load.weight; + + /* + * Recursively apply this logic to all parent groups to compute + * the final effective load change on the root group. Since + * only the @tg group gets extra weight, all parent groups can + * only redistribute existing shares. @wl is the shift in shares + * resulting from this level per the above. + */ wg = 0; } @@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - int i; + struct sched_group *sg; + int i, smt = 0; /* * If the task is going to be woken-up on this cpu and if it is @@ -2269,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target) * Otherwise, iterate the domains and find an elegible idle cpu. */ rcu_read_lock(); +again: for_each_domain(target, sd) { - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) - break; + if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) + continue; - for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { - if (idle_cpu(i)) { - target = i; - break; + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { + if (!smt) { + smt = 1; + goto again; } + break; } - /* - * Lets stop looking for an idle sibling when we reached - * the domain that spans the current cpu and prev_cpu. - */ - if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && - cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) - break; + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + for_each_cpu(i, sched_group_cpus(sg)) { + if (!idle_cpu(i)) + goto next; + } + + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; +next: + sg = sg->next; + } while (sg != sd->groups); } +done: rcu_read_unlock(); return target; @@ -3511,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, } /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @sd: sched_domain whose statistics are to be updated. * @this_cpu: Cpu for which load balance is currently performed. * @idle: Idle status of this_cpu diff --git a/kernel/sched_features.h b/kernel/sched_features.h index efa0a7b75dde..84802245abd2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1) SCHED_FEAT(TTWU_QUEUE, 1) SCHED_FEAT(FORCE_SD_OVERLAP, 0) +SCHED_FEAT(RT_RUNTIME_SHARE, 1) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 056cbd2e2a27..583a1368afe6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq) { int more = 0; + if (!sched_feat(RT_RUNTIME_SHARE)) + return more; + if (rt_rq->rt_time > rt_rq->rt_runtime) { raw_spin_unlock(&rt_rq->rt_runtime_lock); more = do_balance_runtime(rt_rq); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1ecd6ba36d6c..c4eb71c8b2ea 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -387,6 +387,7 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { + old->event_handler = clockevents_handle_noop; clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cfc65e1eb9fb..da2f760e780c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -548,7 +548,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) * note a margin of 12.5% is used because this can be computed with * a shift, versus say 10% which would require division. */ - return max_nsecs - (max_nsecs >> 5); + return max_nsecs - (max_nsecs >> 3); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -669,7 +669,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) * ~ 0.06ppm granularity for NTP. We apply the same 12.5% * margin as we do in clocksource_max_deferment() */ - sec = (cs->mask - (cs->mask >> 5)); + sec = (cs->mask - (cs->mask >> 3)); do_div(sec, freq); do_div(sec, scale); if (!sec) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f954282d9a82..fd4a7b1625a2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; - clockevents_exchange_device(NULL, dev); + clockevents_exchange_device(tick_broadcast_device.evtdev, dev); tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 816d3d074979..d6e7926dcd26 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1686,7 +1686,7 @@ static int replace_system_preds(struct event_subsystem *system, * replace the filter for the call. */ filter = call->filter; - call->filter = filter_item->filter; + rcu_assign_pointer(call->filter, filter_item->filter); filter_item->filter = filter; fail = false; @@ -1741,7 +1741,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) filter = call->filter; if (!filter) goto out_unlock; - call->filter = NULL; + RCU_INIT_POINTER(call->filter, NULL); /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); @@ -1782,7 +1782,7 @@ out: * string */ tmp = call->filter; - call->filter = filter; + rcu_assign_pointer(call->filter, filter); if (tmp) { /* Make sure the call is done with the filter */ synchronize_sched(); diff --git a/mm/slab.c b/mm/slab.c index 708efe886154..83311c9aaf9d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -595,6 +595,7 @@ static enum { PARTIAL_AC, PARTIAL_L3, EARLY, + LATE, FULL } g_cpucache_up; @@ -671,7 +672,7 @@ static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; - if (g_cpucache_up != FULL) + if (g_cpucache_up < LATE) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { @@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; + g_cpucache_up = LATE; + /* Annotate slab for lockdep -- annotate the malloc caches */ init_lock_keys(); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index e42626422587..d7915d4e77cb 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -34,6 +34,16 @@ int __perf_evsel__sample_size(u64 sample_type) return size; } +static void hists__init(struct hists *hists) +{ + memset(hists, 0, sizeof(*hists)); + hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; + hists->entries_in = &hists->entries_in_array[0]; + hists->entries_collapsed = RB_ROOT; + hists->entries = RB_ROOT; + pthread_mutex_init(&hists->lock, NULL); +} + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index a36a3fa81ffb..abef2703cd24 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1211,13 +1211,3 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) return ret; } - -void hists__init(struct hists *hists) -{ - memset(hists, 0, sizeof(*hists)); - hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; - hists->entries_in = &hists->entries_in_array[0]; - hists->entries_collapsed = RB_ROOT; - hists->entries = RB_ROOT; - pthread_mutex_init(&hists->lock, NULL); -} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c86c1d27bd1e..89289c8e935e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -63,8 +63,6 @@ struct hists { struct callchain_cursor callchain_cursor; }; -void hists__init(struct hists *hists); - struct hist_entry *__hists__add_entry(struct hists *self, struct addr_location *al, struct symbol *parent, u64 period); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 85c1e6b76f0a..0f4555ce9063 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1333,6 +1333,10 @@ int perf_session__cpu_bitmap(struct perf_session *session, } map = cpu_map__new(cpu_list); + if (map == NULL) { + pr_err("Invalid cpu_list\n"); + return -1; + } for (i = 0; i < map->nr; i++) { int cpu = map->map[i]; |