diff options
48 files changed, 614 insertions, 424 deletions
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d7ee0d4c072d..1a347f481e5e 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -197,7 +197,7 @@ void cpu_idle(void) cpu_relax(); } else { stop_critical_timings(); - if (cpuidle_call_idle()) + if (cpuidle_idle_call()) pm_idle(); start_critical_timings(); /* diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 6fc03aff046c..c38d22e5e902 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -156,7 +156,7 @@ prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ #define STUB_SET_VARIABLE(prefix, adjust_arg) \ static efi_status_t \ prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ - unsigned long attr, unsigned long data_size, \ + u32 attr, unsigned long data_size, \ void *data) \ { \ struct ia64_fpreg fr[6]; \ diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index b1dc71f5534e..4054b31e0fa9 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -258,10 +258,10 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) -static __inline__ int +static __inline__ s64 __atomic64_add_return(s64 i, atomic64_t *v) { - int ret; + s64 ret; unsigned long flags; _atomic_spin_lock_irqsave(v, flags); diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 67a33cc27ef2..2388bdb32832 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -5,11 +5,14 @@ #include <linux/futex.h> #include <linux/uaccess.h> +#include <asm/atomic.h> #include <asm/errno.h> static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { + unsigned long int flags; + u32 val; int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; @@ -18,21 +21,58 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr))) return -EFAULT; pagefault_disable(); + _atomic_spin_lock_irqsave(uaddr, flags); + switch (op) { case FUTEX_OP_SET: + /* *(int *)UADDR2 = OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) + ret = put_user(oparg, uaddr); + break; case FUTEX_OP_ADD: + /* *(int *)UADDR2 += OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval + oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_OR: + /* *(int *)UADDR2 |= OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval | oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_ANDN: + /* *(int *)UADDR2 &= ~OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval & ~oparg; + ret = put_user(val, uaddr); + } + break; case FUTEX_OP_XOR: + /* *(int *)UADDR2 ^= OPARG; */ + ret = get_user(oldval, uaddr); + if (!ret) { + val = oldval ^ oparg; + ret = put_user(val, uaddr); + } + break; default: ret = -ENOSYS; } + _atomic_spin_unlock_irqrestore(uaddr, flags); + pagefault_enable(); if (!ret) { @@ -54,7 +94,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + int ret; u32 val; + unsigned long flags; /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is * our gateway page, and causes no end of trouble... @@ -65,12 +107,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - if (get_user(val, uaddr)) - return -EFAULT; - if (val == oldval && put_user(newval, uaddr)) - return -EFAULT; + /* HPPA has no cmpxchg in hardware and therefore the + * best we can do here is use an array of locks. The + * lock selected is based on a hash of the userspace + * address. This should scale to a couple of CPUs. + */ + + _atomic_spin_lock_irqsave(uaddr, flags); + + ret = get_user(val, uaddr); + + if (!ret && val == oldval) + ret = put_user(newval, uaddr); + *uval = val; - return 0; + + _atomic_spin_unlock_irqrestore(uaddr, flags); + + return ret; } #endif /*__KERNEL__*/ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 3392de3e7be0..d61de64f990a 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -821,8 +821,9 @@ #define __NR_open_by_handle_at (__NR_Linux + 326) #define __NR_syncfs (__NR_Linux + 327) #define __NR_setns (__NR_Linux + 328) +#define __NR_sendmmsg (__NR_Linux + 329) -#define __NR_Linux_syscalls (__NR_setns + 1) +#define __NR_Linux_syscalls (__NR_sendmmsg + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 34a4f5a2fffb..e66366fd2abc 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -427,6 +427,7 @@ ENTRY_COMP(open_by_handle_at) ENTRY_SAME(syncfs) ENTRY_SAME(setns) + ENTRY_COMP(sendmmsg) /* Nothing yet */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0f98bbddade5..ed5cb5af5281 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -274,11 +274,11 @@ config MARCH_Z10 on older machines. config MARCH_Z196 - bool "IBM zEnterprise 196" + bool "IBM zEnterprise 114 and 196" help - Select this to enable optimizations for IBM zEnterprise 196 - (2817 series). The kernel will be slightly faster but will not work - on older machines. + Select this to enable optimizations for IBM zEnterprise 114 and 196 + (2818 and 2817 series). The kernel will be slightly faster but will + not work on older machines. endchoice diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 5e95d95450b3..97cc4403fabf 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -167,5 +167,6 @@ enum diag308_rc { }; extern int diag308(unsigned long subcode, void *addr); +extern void diag308_reset(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index f26280d9e88d..e85c911aabf0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -18,6 +18,7 @@ void system_call(void); void pgm_check_handler(void); void mcck_int_handler(void); void io_int_handler(void); +void psw_restart_int_handler(void); #ifdef CONFIG_32BIT @@ -150,7 +151,10 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ - __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ + + /* 64 bit save area */ + __u64 save_area_64; /* 0x0e08 */ + __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -286,7 +290,10 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + + /* 64 bit save area */ + __u64 save_area_64; /* 0x0e0c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 55dfcc8bdc0d..a4b6229e5d4b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -119,14 +119,12 @@ struct stack_frame { * Do necessary setup to start up a new thread. */ #define start_thread(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ regs->psw.mask = psw_user_bits; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ } while (0) #define start_thread31(regs, new_psw, new_stackp) do { \ - set_fs(USER_DS); \ regs->psw.mask = psw_user32_bits; \ regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ regs->gprs[15] = new_stackp; \ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index d382629a0172..6582f69f2389 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -113,6 +113,7 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); +extern void copy_to_absolute_zero(void *dest, void *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 05d8f38734ec..532fd4322156 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -27,12 +27,9 @@ int main(void) BLANK(); DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__THREAD_per_cause, - offsetof(struct task_struct, thread.per_event.cause)); - DEFINE(__THREAD_per_address, - offsetof(struct task_struct, thread.per_event.address)); - DEFINE(__THREAD_per_paid, - offsetof(struct task_struct, thread.per_event.paid)); + DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); @@ -142,6 +139,7 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); + DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 209938c1dfc8..255435663bf8 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -76,6 +76,42 @@ s390_base_pgm_handler_fn: .quad 0 .previous +# +# Calls diag 308 subcode 1 and continues execution +# +# The following conditions must be ensured before calling this function: +# * Prefix register = 0 +# * Lowcore protection is disabled +# +ENTRY(diag308_reset) + larl %r4,.Lctlregs # Save control registers + stctg %c0,%c15,0(%r4) + larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 + lghi %r3,0 + lg %r4,0(%r4) # Save PSW + sturg %r4,%r3 # Use sturg, because of large pages + lghi %r1,1 + diag %r1,%r1,0x308 +.Lrestart_part2: + lhi %r0,0 # Load r0 with zero + lhi %r1,2 # Use mode 2 = ESAME (dump) + sigp %r1,%r0,0x12 # Switch to ESAME mode + sam64 # Switch to 64 bit addressing mode + larl %r4,.Lctlregs # Restore control registers + lctlg %c0,%c15,0(%r4) + br %r14 +.align 16 +.Lrestart_psw: + .long 0x00080000,0x80000000 + .Lrestart_part2 + + .section .bss +.align 8 +.Lctlregs: + .rept 16 + .quad 0 + .endr + .previous + #else /* CONFIG_64BIT */ ENTRY(s390_base_mcck_handler) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index eee999853a7c..a9a285b8c4ad 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -380,20 +380,13 @@ asmlinkage long sys32_sigreturn(void) goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -413,31 +406,22 @@ asmlinkage long sys32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_gprs_high(regs, frame->gprs_high)) goto badframe; - err = __get_user(ss_sp, &frame->uc.uc_stack.ss_sp); st.ss_sp = compat_ptr(ss_sp); err |= __get_user(st.ss_size, &frame->uc.uc_stack.ss_size); err |= __get_user(st.ss_flags, &frame->uc.uc_stack.ss_flags); if (err) goto badframe; - set_fs (KERNEL_DS); do_sigaltstack((stack_t __force __user *)&st, NULL, regs->gprs[15]); set_fs (old_fs); - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -605,10 +589,10 @@ give_sigsegv: * OK, we're invoking a handler */ -int -handle_signal32(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +int handle_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { + sigset_t blocked; int ret; /* Set up the stack frame */ @@ -616,15 +600,12 @@ handle_signal32(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame32(sig, ka, info, oldset, regs); else ret = setup_frame32(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - return ret; + if (ret) + return ret; + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, sig); + set_current_blocked(&blocked); + return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3eab7cfab07c..02ec8fe7d03f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -849,6 +849,34 @@ restart_crash: restart_go: #endif +# +# PSW restart interrupt handler +# +ENTRY(psw_restart_int_handler) + st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + basr %r15,0 +0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack + l %r15,0(%r15) + ahi %r15,-SP_SIZE # make room for pt_regs + stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw + xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + basr %r14,0 +1: l %r14,.Ldo_restart-1b(%r14) + basr %r14,%r14 + + basr %r14,0 # load disabled wait PSW if +2: lpsw restart_psw_crash-2b(%r14) # do_restart returns + .align 4 +.Ldo_restart: + .long do_restart +.Lrestart_stack: + .long restart_stack + .align 8 +restart_psw_crash: + .long 0x000a0000,0x00000000 + restart_psw_crash + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 7a0fd426ca92..5f729d627cef 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -865,6 +865,26 @@ restart_crash: restart_go: #endif +# +# PSW restart interrupt handler +# +ENTRY(psw_restart_int_handler) + stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + larl %r15,restart_stack # load restart stack + lg %r15,0(%r15) + aghi %r15,-SP_SIZE # make room for pt_regs + stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 + brasl %r14,do_restart + + larl %r14,restart_psw_crash # load disabled wait PSW if + lpswe 0(%r14) # do_restart returns + .align 8 +restart_psw_crash: + .quad 0x0002000080000000,0x0000000000000000 + restart_psw_crash + .section .kprobes.text, "ax" #ifdef CONFIG_CHECK_STACK diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index a689070be287..04361d5a4279 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -45,11 +45,13 @@ * - halt * - power off * - reipl + * - restart */ #define ON_PANIC_STR "on_panic" #define ON_HALT_STR "on_halt" #define ON_POFF_STR "on_poff" #define ON_REIPL_STR "on_reboot" +#define ON_RESTART_STR "on_restart" struct shutdown_action; struct shutdown_trigger { @@ -1544,17 +1546,20 @@ static char vmcmd_on_reboot[128]; static char vmcmd_on_panic[128]; static char vmcmd_on_halt[128]; static char vmcmd_on_poff[128]; +static char vmcmd_on_restart[128]; DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt); DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff); +DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart); static struct attribute *vmcmd_attrs[] = { &sys_vmcmd_on_reboot_attr.attr, &sys_vmcmd_on_panic_attr.attr, &sys_vmcmd_on_halt_attr.attr, &sys_vmcmd_on_poff_attr.attr, + &sys_vmcmd_on_restart_attr.attr, NULL, }; @@ -1576,6 +1581,8 @@ static void vmcmd_run(struct shutdown_trigger *trigger) cmd = vmcmd_on_halt; else if (strcmp(trigger->name, ON_POFF_STR) == 0) cmd = vmcmd_on_poff; + else if (strcmp(trigger->name, ON_RESTART_STR) == 0) + cmd = vmcmd_on_restart; else return; @@ -1707,6 +1714,34 @@ static void do_panic(void) stop_run(&on_panic_trigger); } +/* on restart */ + +static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, + &reipl_action}; + +static ssize_t on_restart_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%s\n", on_restart_trigger.action->name); +} + +static ssize_t on_restart_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return set_trigger(buf, &on_restart_trigger, len); +} + +static struct kobj_attribute on_restart_attr = + __ATTR(on_restart, 0644, on_restart_show, on_restart_store); + +void do_restart(void) +{ + smp_send_stop(); + on_restart_trigger.action->fn(&on_restart_trigger); + stop_run(&on_restart_trigger); +} + /* on halt */ static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action}; @@ -1783,7 +1818,9 @@ static void __init shutdown_triggers_init(void) if (sysfs_create_file(&shutdown_actions_kset->kobj, &on_poff_attr.attr)) goto fail; - + if (sysfs_create_file(&shutdown_actions_kset->kobj, + &on_restart_attr.attr)) + goto fail; return; fail: panic("shutdown_triggers_init failed\n"); @@ -1959,6 +1996,12 @@ static void do_reset_calls(void) { struct reset_call *reset; +#ifdef CONFIG_64BIT + if (diag308_set_works) { + diag308_reset(); + return; + } +#endif list_for_each_entry(reset, &rcall, list) reset->fn(); } diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index 78eb7cfbd3d1..e690975403f4 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp 2000,2009 + * Copyright IBM Corp 2000,2011 * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>, * Denis Joseph Barrow, */ @@ -8,6 +8,64 @@ #include <asm/asm-offsets.h> # +# store_status +# +# Prerequisites to run this function: +# - Prefix register is set to zero +# - Original prefix register is stored in "dump_prefix_page" +# - Lowcore protection is off +# +ENTRY(store_status) + /* Save register one and load save area base */ + stg %r1,__LC_SAVE_AREA_64(%r0) + lghi %r1,SAVE_AREA_BASE + /* General purpose registers */ + stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + lg %r2,__LC_SAVE_AREA_64(%r0) + stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) + /* Control registers */ + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Access registers */ + stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point registers */ + std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Floating point control register */ + stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* CPU timer */ + stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1) + /* Saved prefix register */ + larl %r2,dump_prefix_page + mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2) + /* Clock comparator - seven bytes */ + larl %r2,.Lclkcmp + stckc 0(%r2) + mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2) + /* Program status word */ + epsw %r2,%r3 + st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1) + st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1) + larl %r2,store_status + stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) + br %r14 +.align 8 +.Lclkcmp: .quad 0x0000000000000000 + +# # do_reipl_asm # Parameter: r2 = schid of reipl device # @@ -15,22 +73,7 @@ ENTRY(do_reipl_asm) basr %r13,0 .Lpg0: lpswe .Lnewpsw-.Lpg0(%r13) -.Lpg1: # do store status of all registers - - stg %r1,.Lregsave-.Lpg0(%r13) - lghi %r1,0x1000 - stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-0x1000(%r1) - lg %r0,.Lregsave-.Lpg0(%r13) - stg %r0,__LC_GPREGS_SAVE_AREA-0x1000+8(%r1) - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-0x1000(%r1) - stam %a0,%a15,__LC_AREGS_SAVE_AREA-0x1000(%r1) - lg %r10,.Ldump_pfx-.Lpg0(%r13) - mvc __LC_PREFIX_SAVE_AREA-0x1000(4,%r1),0(%r10) - stfpc __LC_FP_CREG_SAVE_AREA-0x1000(%r1) - stckc .Lclkcmp-.Lpg0(%r13) - mvc __LC_CLOCK_COMP_SAVE_AREA-0x1000(7,%r1),.Lclkcmp-.Lpg0(%r13) - stpt __LC_CPU_TIMER_SAVE_AREA-0x1000(%r1) - stg %r13, __LC_PSW_SAVE_AREA-0x1000+8(%r1) +.Lpg1: brasl %r14,store_status lctlg %c6,%c6,.Lall-.Lpg0(%r13) lgr %r1,%r2 @@ -67,10 +110,7 @@ ENTRY(do_reipl_asm) st %r14,.Ldispsw+12-.Lpg0(%r13) lpswe .Ldispsw-.Lpg0(%r13) .align 8 -.Lclkcmp: .quad 0x0000000000000000 .Lall: .quad 0x00000000ff000000 -.Ldump_pfx: .quad dump_prefix_page -.Lregsave: .quad 0x0000000000000000 .align 16 /* * These addresses have to be 31 bit otherwise diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0c35dee10b00..7b371c37061d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -346,7 +346,7 @@ setup_lowcore(void) lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; lc->restart_psw.addr = - PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; if (user_mode != HOME_SPACE_MODE) lc->restart_psw.mask |= PSW_ASC_HOME; lc->external_new_psw.mask = psw_kernel_bits; @@ -529,6 +529,27 @@ static void __init setup_memory_end(void) memory_end = memory_size; } +void *restart_stack __attribute__((__section__(".data"))); + +/* + * Setup new PSW and allocate stack for PSW restart interrupt + */ +static void __init setup_restart_psw(void) +{ + psw_t psw; + + restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); + restart_stack += ASYNC_SIZE; + + /* + * Setup restart PSW for absolute zero lowcore. This is necesary + * if PSW restart is done on an offline CPU that has lowcore zero + */ + psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); +} + static void __init setup_memory(void) { @@ -731,6 +752,7 @@ static void __init setup_hwcaps(void) strcpy(elf_platform, "z10"); break; case 0x2817: + case 0x2818: strcpy(elf_platform, "z196"); break; } @@ -792,6 +814,7 @@ setup_arch(char **cmdline_p) setup_addressing_mode(); setup_memory(); setup_resources(); + setup_restart_psw(); setup_lowcore(); cpu_init(); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index abbb3c3c7aab..9a40e1cc5ec3 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -57,17 +57,15 @@ typedef struct */ SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) { - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + sigset_t blocked; + current->saved_sigmask = current->blocked; + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); set_current_state(TASK_INTERRUPTIBLE); schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - + set_restore_sigmask(); return -ERESTARTNOHAND; } @@ -172,18 +170,11 @@ SYSCALL_DEFINE0(sigreturn) goto badframe; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; - return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -199,21 +190,14 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - + set_current_blocked(&set); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->gprs[15]) == -EFAULT) goto badframe; return regs->gprs[2]; - badframe: force_sig(SIGSEGV, current); return 0; @@ -385,14 +369,11 @@ give_sigsegv: return -EFAULT; } -/* - * OK, we're invoking a handler - */ - -static int -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) +static int handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, + struct pt_regs *regs) { + sigset_t blocked; int ret; /* Set up the stack frame */ @@ -400,17 +381,13 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, ret = setup_rt_frame(sig, ka, info, oldset, regs); else ret = setup_frame(sig, ka, oldset, regs); - - if (ret == 0) { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return ret; + if (ret) + return ret; + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, sig); + set_current_blocked(&blocked); + return 0; } /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a6d85c0a7f20..6ab16ac64d29 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -452,23 +452,27 @@ out: */ int __cpuinit start_secondary(void *cpuvoid) { - /* Setup the cpu */ cpu_init(); preempt_disable(); - /* Enable TOD clock interrupts on the secondary cpu. */ init_cpu_timer(); - /* Enable cpu timer interrupts on the secondary cpu. */ init_cpu_vtimer(); - /* Enable pfault pseudo page faults on this cpu. */ pfault_init(); - /* call cpu notifiers */ notify_cpu_starting(smp_processor_id()); - /* Mark this cpu as online */ ipi_call_lock(); set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); - /* Switch on interrupts */ + __ctl_clear_bit(0, 28); /* Disable lowcore protection */ + S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; + __ctl_set_bit(0, 28); /* Enable lowcore protection */ + /* + * Wait until the cpu which brought this one up marked it + * active before enabling interrupts. + */ + while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) + cpu_relax(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); @@ -507,7 +511,11 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - + lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.addr = + PSW_ADDR_AMODE | (unsigned long) restart_int_handler; + if (user_mode != HOME_SPACE_MODE) + lowcore->restart_psw.mask |= PSW_ASC_HOME; #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { unsigned long save_area; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 51e5cd9b906a..5dbbaa6e594c 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -85,3 +85,19 @@ int memcpy_real(void *dest, void *src, size_t count) arch_local_irq_restore(flags); return rc; } + +/* + * Copy memory to absolute zero + */ +void copy_to_absolute_zero(void *dest, void *src, size_t count) +{ + unsigned long cr0; + + BUG_ON((unsigned long) dest + count >= sizeof(struct _lowcore)); + preempt_disable(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + memcpy_real(dest + store_prefix(), src, count); + __ctl_load(cr0, 0, 0); + preempt_enable(); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2adb23938a7f..4d1f2bce87b3 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -528,6 +528,7 @@ static inline void page_table_free_pgste(unsigned long *table) static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { + return NULL; } static inline void page_table_free_pgste(unsigned long *table) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 3c45de1db716..32114e0941ae 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -101,7 +101,7 @@ void cpu_idle(void) local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); - if (cpuidle_call_idle()) + if (cpuidle_idle_call()) pm_idle(); /* * Sanity check to ensure that pm_idle() returns diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 26374b2a55a2..b48967b499da 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -62,9 +62,9 @@ #include <linux/slab.h> static DEFINE_MUTEX(dma_list_mutex); +static DEFINE_IDR(dma_idr); static LIST_HEAD(dma_device_list); static long dmaengine_ref_count; -static struct idr dma_idr; /* --- sysfs implementation --- */ @@ -1050,8 +1050,6 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies); static int __init dma_bus_init(void) { - idr_init(&dma_idr); - mutex_init(&dma_list_mutex); return class_register(&dma_devclass); } arch_initcall(dma_bus_init); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index d845dc4b7103..f519c93a61e7 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -73,10 +73,10 @@ /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor */ -static const u8 xor_idx_to_desc __read_mostly = 0xd0; -static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc __read_mostly = 0xf8; -static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 xor_idx_to_desc = 0xe0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) { diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index fab37d1cf48d..5e3a40f79945 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -72,6 +72,17 @@ static struct pci_device_id ioat_pci_tbl[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) }, + { 0, } }; MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c index 30da70d06a6d..cdae207028a7 100644 --- a/drivers/eisa/pci_eisa.c +++ b/drivers/eisa/pci_eisa.c @@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev, return 0; } -static struct pci_device_id __initdata pci_eisa_pci_tbl[] = { +static struct pci_device_id pci_eisa_pci_tbl[] = { { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 }, { 0, } }; -static struct pci_driver __initdata pci_eisa_driver = { +static struct pci_driver __refdata pci_eisa_driver = { .name = "pci_eisa", .id_table = pci_eisa_pci_tbl, .probe = pci_eisa_init, diff --git a/drivers/of/base.c b/drivers/of/base.c index fb28b5af733b..3ff22e32b602 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,39 +17,14 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/ctype.h> #include <linux/module.h> #include <linux/of.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/proc_fs.h> -/** - * struct alias_prop - Alias property in 'aliases' node - * @link: List node to link the structure in aliases_lookup list - * @alias: Alias property name - * @np: Pointer to device_node that the alias stands for - * @id: Index value from end of alias name - * @stem: Alias string without the index - * - * The structure represents one alias property of 'aliases' node as - * an entry in aliases_lookup list. - */ -struct alias_prop { - struct list_head link; - const char *alias; - struct device_node *np; - int id; - char stem[0]; -}; - -static LIST_HEAD(aliases_lookup); - struct device_node *allnodes; struct device_node *of_chosen; -struct device_node *of_aliases; - -static DEFINE_MUTEX(of_aliases_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -1013,108 +988,3 @@ out_unlock: } #endif /* defined(CONFIG_OF_DYNAMIC) */ -static void of_alias_add(struct alias_prop *ap, struct device_node *np, - int id, const char *stem, int stem_len) -{ - ap->id = id; - ap->np = np; - strncpy(ap->stem, stem, stem_len); - ap->stem[stem_len] = 0; - list_add_tail(&ap->link, &aliases_lookup); - pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n", - ap->alias, ap->stem, ap->id, np ? np->full_name : NULL); -} - -/** - * of_alias_scan() - Scan all properties of 'aliases' node - * - * The function scans all the properties of 'aliases' node and populate - * the global lookup table with the properties. It returns the - * number of alias_prop found, or error code in error case. - */ -__init void of_alias_scan(void) -{ - struct property *pp; - - if (!of_aliases) - return; - - for_each_property(pp, of_aliases->properties) { - const char *start = pp->name; - const char *end = start + strlen(start); - struct device_node *np; - struct alias_prop *ap; - int id, len; - - /* Skip those we do not want to proceed */ - if (!strcmp(pp->name, "name") || - !strcmp(pp->name, "phandle") || - !strcmp(pp->name, "linux,phandle")) - continue; - - np = of_find_node_by_path(pp->value); - if (!np) - continue; - - /* walk alias backwards to extract the id and 'stem' string */ - while (isdigit(*(end-1)) && end > start) - end--; - len = end - start; - id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1; - - /* Allocate an alias_prop with enough space for the stem */ - ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4); - if (!ap) - continue; - ap->alias = start; - of_alias_add(ap, np, id, start, len); - } -} - -/** - * of_alias_get_id() - Get alias id for the given device_node - * @np: Pointer to the given device_node - * @stem: Alias stem of the given device_node - * - * The function travels the lookup table to get alias id for the given - * device_node and alias stem. It returns the alias id if find it. - * If not, dynamically creates one in the lookup table and returns it, - * or returns error code if fail to create. - */ -int of_alias_get_id(struct device_node *np, const char *stem) -{ - struct alias_prop *app; - int id = 0; - bool found = false; - - mutex_lock(&of_aliases_mutex); - list_for_each_entry(app, &aliases_lookup, link) { - if (strcmp(app->stem, stem) != 0) - continue; - - if (np == app->np) { - found = true; - id = app->id; - break; - } - - if (id <= app->id) - id = app->id + 1; - } - - /* If an id is not found, then allocate a new one */ - if (!found) { - app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4); - if (!app) { - id = -ENODEV; - goto out; - } - of_alias_add(app, np, id, stem, strlen(stem)); - } - - out: - mutex_unlock(&of_aliases_mutex); - - return id; -} -EXPORT_SYMBOL_GPL(of_alias_get_id); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 13d6d3a96b31..65200af29c52 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -707,12 +707,10 @@ void __init unflatten_device_tree(void) __unflatten_device_tree(initial_boot_params, &allnodes, early_init_dt_alloc_memory_arch); - /* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */ + /* Get pointer to OF "/chosen" node for use everywhere */ of_chosen = of_find_node_by_path("/chosen"); if (of_chosen == NULL) of_chosen = of_find_node_by_path("/chosen@0"); - of_aliases = of_find_node_by_path("/aliases"); - of_alias_scan(); } #endif /* CONFIG_OF_EARLY_FLATTREE */ diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 432444af7ee4..a1d3ddba99cc 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -24,6 +24,7 @@ #include <linux/mutex.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> #include <asm/ccwdev.h> #include <asm/ebcdic.h> @@ -888,11 +889,11 @@ char *dasd_get_user_string(const char __user *user_buf, size_t user_len) { char *buffer; - buffer = kmalloc(user_len + 1, GFP_KERNEL); + buffer = vmalloc(user_len + 1); if (buffer == NULL) return ERR_PTR(-ENOMEM); if (copy_from_user(buffer, user_buf, user_len) != 0) { - kfree(buffer); + vfree(buffer); return ERR_PTR(-EFAULT); } /* got the string, now strip linefeed. */ @@ -930,7 +931,7 @@ static ssize_t dasd_stats_write(struct file *file, dasd_profile_off(prof); } else rc = -EINVAL; - kfree(buffer); + vfree(buffer); return rc; } @@ -1042,7 +1043,7 @@ static ssize_t dasd_stats_global_write(struct file *file, dasd_global_profile_level = DASD_PROFILE_OFF; } else rc = -EINVAL; - kfree(buffer); + vfree(buffer); return rc; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 30fb979d684d..6e835c9fdfcb 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1461,6 +1461,15 @@ dasd_eckd_check_characteristics(struct dasd_device *device) "Read device characteristic failed, rc=%d", rc); goto out_err3; } + + if ((device->features & DASD_FEATURE_USERAW) && + !(private->rdc_data.facilities.RT_in_LR)) { + dev_err(&device->cdev->dev, "The storage server does not " + "support raw-track access\n"); + rc = -EINVAL; + goto out_err3; + } + /* find the valid cylinder size */ if (private->rdc_data.no_cyl == LV_COMPAT_CYL && private->rdc_data.long_no_cyl) diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 6c3c5364d082..e12989fff4ff 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -312,14 +312,14 @@ static ssize_t dasd_stats_proc_write(struct file *file, pr_info("The statistics have been reset\n"); } else goto out_parse_error; - kfree(buffer); + vfree(buffer); return user_len; out_parse_error: rc = -EINVAL; pr_warning("%s is not a supported value for /proc/dasd/statistics\n", str); out_error: - kfree(buffer); + vfree(buffer); return rc; #else pr_warning("/proc/dasd/statistics: is not activated in this kernel\n"); diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index 7ad30e72f868..5f9f929e891c 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -82,12 +82,9 @@ static int proc_handler_callhome(struct ctl_table *ctl, int write, return -EFAULT; } else { len = *count; - rc = copy_from_user(buf, buffer, sizeof(buf)); - if (rc != 0) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - if (strict_strtoul(buf, 0, &val) != 0) - return -EINVAL; + rc = kstrtoul_from_user(buffer, len, 0, &val); + if (rc) + return rc; if (val != 0 && val != 1) return -EINVAL; callhome_enabled = val; diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 7bc643f3f5ab..e5c966462c5a 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -14,6 +14,8 @@ #include "chsc.h" #define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */ +#define QDIO_BUSY_BIT_RETRY_DELAY 10 /* 10 milliseconds */ +#define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */ #define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ /* diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index f8b03a636e49..0e615cb912d0 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -188,19 +188,13 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf, struct qdio_irq *irq_ptr = seq->private; struct qdio_q *q; unsigned long val; - char buf[8]; int ret, i; if (!irq_ptr) return 0; - if (count >= sizeof(buf)) - return -EINVAL; - if (copy_from_user(&buf, ubuf, count)) - return -EFAULT; - buf[count] = 0; - - ret = strict_strtoul(buf, 10, &val); - if (ret < 0) + + ret = kstrtoul_from_user(ubuf, count, 10, &val); + if (ret) return ret; switch (val) { diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index e58169c32474..288c9140290e 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit) unsigned long schid = *((u32 *) &q->irq_ptr->schid); unsigned int fc = QDIO_SIGA_WRITE; u64 start_time = 0; - int cc; + int retries = 0, cc; if (is_qebsm(q)) { schid = q->irq_ptr->sch_token; @@ -325,6 +325,7 @@ again: /* hipersocket busy condition */ if (unlikely(*busy_bit)) { WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2); + retries++; if (!start_time) { start_time = get_clock(); @@ -333,6 +334,11 @@ again: if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE) goto again; } + if (retries) { + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, + "%4x cc2 BB1:%1d", SCH_NO(q), q->nr); + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries); + } return cc; } @@ -728,13 +734,14 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q) static int qdio_kick_outbound_q(struct qdio_q *q) { + int retries = 0, cc; unsigned int busy_bit; - int cc; if (!need_siga_out(q)) return 0; DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr); +retry: qperf_inc(q, siga_write); cc = qdio_siga_output(q, &busy_bit); @@ -743,7 +750,11 @@ static int qdio_kick_outbound_q(struct qdio_q *q) break; case 2: if (busy_bit) { - DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr); + while (++retries < QDIO_BUSY_BIT_RETRIES) { + mdelay(QDIO_BUSY_BIT_RETRY_DELAY); + goto retry; + } + DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr); cc |= QDIO_ERROR_SIGA_BUSY; } else DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr); @@ -753,6 +764,10 @@ static int qdio_kick_outbound_q(struct qdio_q *q) DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc); break; } + if (retries) { + DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr); + DBF_ERROR("count:%u", retries); + } return cc; } diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 827db7654594..7e91b3d368cd 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1286,22 +1286,17 @@ static int serial_imx_resume(struct platform_device *dev) static int serial_imx_probe_dt(struct imx_port *sport, struct platform_device *pdev) { + static int portnum = 0; struct device_node *np = pdev->dev.of_node; const struct of_device_id *of_id = of_match_device(imx_uart_dt_ids, &pdev->dev); - int ret; if (!np) return -ENODEV; - ret = of_alias_get_id(np, "serial"); - if (ret < 0) { - pr_err("%s: failed to get alias id, errno %d\n", - __func__, ret); - return -ENODEV; - } else { - sport->port.line = ret; - } + sport->port.line = portnum++; + if (sport->port.line >= UART_NR) + return -EINVAL; if (of_get_property(np, "fsl,uart-has-rtscts", NULL)) sport->have_rtscts = 1; diff --git a/fs/block_dev.c b/fs/block_dev.c index f28680553288..ff77262e887c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); int error; + + error = filemap_write_and_wait_range(filp->f_mapping, start, end); + if (error) + return error; /* * There is no need to serialise calls to blkdev_issue_flush with diff --git a/fs/namei.c b/fs/namei.c index 445fd5da11fa..3d607bd80e09 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask) #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl; - /* - * Under RCU walk, we cannot even do a "get_cached_acl()", - * because that involves locking and getting a refcount on - * a cached ACL. - * - * So the only case we handle during RCU walking is the - * case of a cached "no ACL at all", which needs no locks - * or refcounts. - */ if (mask & MAY_NOT_BLOCK) { - if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) + acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); + if (!acl) return -EAGAIN; - return -ECHILD; + /* no ->get_acl() calls in RCU mode... */ + if (acl == ACL_NOT_CACHED) + return -ECHILD; + return posix_acl_permission(inode, acl, mask); } acl = get_cached_acl(inode, ACL_TYPE_ACCESS); diff --git a/include/linux/of.h b/include/linux/of.h index bc3dc6399547..0085bb01c041 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -68,7 +68,6 @@ struct device_node { /* Pointer for first entry in chain of all nodes. */ extern struct device_node *allnodes; extern struct device_node *of_chosen; -extern struct device_node *of_aliases; extern rwlock_t devtree_lock; static inline bool of_have_populated_dt(void) @@ -210,9 +209,6 @@ extern int of_device_is_available(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, const char *name, int *lenp); -#define for_each_property(pp, properties) \ - for (pp = properties; pp != NULL; pp = pp->next) - extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -225,10 +221,6 @@ extern int of_parse_phandles_with_args(struct device_node *np, const char *list_name, const char *cells_name, int index, struct device_node **out_node, const void **out_args); -extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align); -extern void of_alias_scan(void); -extern int of_alias_get_id(struct device_node *np, const char *stem); - extern int of_machine_is_compatible(const char *compat); extern int prom_add_property(struct device_node* np, struct property* prop); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index b74b74ffe0e7..c84d900fbbb3 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -97,6 +97,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node); extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_dt_add_memory_arch(u64 base, u64 size); +extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align); extern u64 dt_mem_next_cell(int s, __be32 **cellp); /* diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b00c4ec5056e..ae96bbe54518 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2709,6 +2709,16 @@ #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f +#define PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 +#define PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e +#define PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 951bba82d50d..b7681102a4b9 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -9,6 +9,7 @@ #define __LINUX_POSIX_ACL_H #include <linux/slab.h> +#include <linux/rcupdate.h> #define ACL_UNDEFINED_ID (-1) @@ -38,7 +39,10 @@ struct posix_acl_entry { }; struct posix_acl { - atomic_t a_refcount; + union { + atomic_t a_refcount; + struct rcu_head a_rcu; + }; unsigned int a_count; struct posix_acl_entry a_entries[0]; }; @@ -65,7 +69,7 @@ static inline void posix_acl_release(struct posix_acl *acl) { if (acl && atomic_dec_and_test(&acl->a_refcount)) - kfree(acl); + kfree_rcu(acl, a_rcu); } @@ -84,20 +88,22 @@ extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct inode *, int, struct posix_acl *); #ifdef CONFIG_FS_POSIX_ACL -static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) +static inline struct posix_acl **acl_by_type(struct inode *inode, int type) { - struct posix_acl **p, *acl; switch (type) { case ACL_TYPE_ACCESS: - p = &inode->i_acl; - break; + return &inode->i_acl; case ACL_TYPE_DEFAULT: - p = &inode->i_default_acl; - break; + return &inode->i_default_acl; default: - return ERR_PTR(-EINVAL); + BUG(); } - acl = ACCESS_ONCE(*p); +} + +static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) +{ + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *acl = ACCESS_ONCE(*p); if (acl) { spin_lock(&inode->i_lock); acl = *p; @@ -108,41 +114,20 @@ static inline struct posix_acl *get_cached_acl(struct inode *inode, int type) return acl; } -static inline int negative_cached_acl(struct inode *inode, int type) +static inline struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - struct posix_acl **p, *acl; - switch (type) { - case ACL_TYPE_ACCESS: - p = &inode->i_acl; - break; - case ACL_TYPE_DEFAULT: - p = &inode->i_default_acl; - break; - default: - BUG(); - } - acl = ACCESS_ONCE(*p); - if (acl) - return 0; - return 1; + return rcu_dereference(*acl_by_type(inode, type)); } static inline void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct posix_acl *old = NULL; + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *old; spin_lock(&inode->i_lock); - switch (type) { - case ACL_TYPE_ACCESS: - old = inode->i_acl; - inode->i_acl = posix_acl_dup(acl); - break; - case ACL_TYPE_DEFAULT: - old = inode->i_default_acl; - inode->i_default_acl = posix_acl_dup(acl); - break; - } + old = *p; + rcu_assign_pointer(*p, posix_acl_dup(acl)); spin_unlock(&inode->i_lock); if (old != ACL_NOT_CACHED) posix_acl_release(old); @@ -150,18 +135,11 @@ static inline void set_cached_acl(struct inode *inode, static inline void forget_cached_acl(struct inode *inode, int type) { - struct posix_acl *old = NULL; + struct posix_acl **p = acl_by_type(inode, type); + struct posix_acl *old; spin_lock(&inode->i_lock); - switch (type) { - case ACL_TYPE_ACCESS: - old = inode->i_acl; - inode->i_acl = ACL_NOT_CACHED; - break; - case ACL_TYPE_DEFAULT: - old = inode->i_default_acl; - inode->i_default_acl = ACL_NOT_CACHED; - break; - } + old = *p; + *p = ACL_NOT_CACHED; spin_unlock(&inode->i_lock); if (old != ACL_NOT_CACHED) posix_acl_release(old); diff --git a/ipc/shm.c b/ipc/shm.c index b5bae9d945b6..02ecf2c078fc 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -105,9 +105,16 @@ void shm_exit_ns(struct ipc_namespace *ns) } #endif -void __init shm_init (void) +static int __init ipc_ns_init(void) { shm_init_ns(&init_ipc_ns); + return 0; +} + +pure_initcall(ipc_ns_init); + +void __init shm_init (void) +{ ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", diff --git a/kernel/futex.c b/kernel/futex.c index 0a308970c24a..11cbe052b2e8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -218,6 +218,8 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, + * VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -229,12 +231,12 @@ static void drop_futex_key_refs(union futex_key *key) * lock_page() might sleep, the caller should not hold a spinlock. */ static int -get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; - int err; + int err, ro = 0; /* * The futex address must be "naturally" aligned. @@ -262,8 +264,18 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) again: err = get_user_pages_fast(address, 1, 1, &page); + /* + * If write access is not required (eg. FUTEX_WAIT), try + * and get read-only access. + */ + if (err == -EFAULT && rw == VERIFY_READ) { + err = get_user_pages_fast(address, 1, 0, &page); + ro = 1; + } if (err < 0) return err; + else + err = 0; #ifdef CONFIG_TRANSPARENT_HUGEPAGE page_head = page; @@ -305,6 +317,13 @@ again: if (!page_head->mapping) { unlock_page(page_head); put_page(page_head); + /* + * ZERO_PAGE pages don't have a mapping. Avoid a busy loop + * trying to find one. RW mapping would have COW'd (and thus + * have a mapping) so this page is RO and won't ever change. + */ + if ((page_head == ZERO_PAGE(address))) + return -EFAULT; goto again; } @@ -316,6 +335,15 @@ again: * the object not the particular process. */ if (PageAnon(page_head)) { + /* + * A RO anonymous page will never change and thus doesn't make + * sense for futex operations. + */ + if (ro) { + err = -EFAULT; + goto out; + } + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; @@ -327,9 +355,10 @@ again: get_futex_key_refs(key); +out: unlock_page(page_head); put_page(page_head); - return 0; + return err; } static inline void put_futex_key(union futex_key *key) @@ -940,7 +969,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -986,10 +1015,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -1243,10 +1272,11 @@ retry: pi_state = NULL; } - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); + ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -1790,7 +1820,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, VERIFY_READ); if (unlikely(ret != 0)) return ret; @@ -1941,7 +1971,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, } retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2060,7 +2090,7 @@ retry: if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); + ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -2249,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); + ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3956f5149e25..8c24294e477f 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2468,7 +2468,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) BUG_ON(usage_bit >= LOCK_USAGE_STATES); - if (hlock_class(hlock)->key == &__lockdep_no_validate__) + if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) continue; if (!mark_lock(curr, hlock, usage_bit)) @@ -2485,23 +2485,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip) { struct task_struct *curr = current; - if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) - return; - - if (unlikely(curr->hardirqs_enabled)) { - /* - * Neither irq nor preemption are disabled here - * so this is racy by nature but losing one hit - * in a stat is not a big deal. - */ - __debug_atomic_inc(redundant_hardirqs_on); - return; - } /* we'll do an OFF -> ON transition: */ curr->hardirqs_enabled = 1; - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) - return; /* * We are going to turn hardirqs on, so set the * usage bit for all held locks: @@ -2529,9 +2515,25 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; + if (unlikely(current->hardirqs_enabled)) { + /* + * Neither irq nor preemption are disabled here + * so this is racy by nature but losing one hit + * in a stat is not a big deal. + */ + __debug_atomic_inc(redundant_hardirqs_on); + return; + } + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) + return; + + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + current->lockdep_recursion = 1; __trace_hardirqs_on_caller(ip); current->lockdep_recursion = 0; @@ -2872,10 +2874,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - int i; - - for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) - lock->class_cache[i] = NULL; + memset(lock, 0, sizeof(*lock)); #ifdef CONFIG_LOCK_STAT lock->cpu = raw_smp_processor_id(); diff --git a/mm/slab.c b/mm/slab.c index 95947400702b..6d90a091fdca 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -622,6 +622,51 @@ int slab_is_available(void) static struct lock_class_key on_slab_l3_key; static struct lock_class_key on_slab_alc_key; +static struct lock_class_key debugobj_l3_key; +static struct lock_class_key debugobj_alc_key; + +static void slab_set_lock_classes(struct kmem_cache *cachep, + struct lock_class_key *l3_key, struct lock_class_key *alc_key, + int q) +{ + struct array_cache **alc; + struct kmem_list3 *l3; + int r; + + l3 = cachep->nodelists[q]; + if (!l3) + return; + + lockdep_set_class(&l3->list_lock, l3_key); + alc = l3->alien; + /* + * FIXME: This check for BAD_ALIEN_MAGIC + * should go away when common slab code is taught to + * work even without alien caches. + * Currently, non NUMA code returns BAD_ALIEN_MAGIC + * for alloc_alien_cache, + */ + if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) + return; + for_each_node(r) { + if (alc[r]) + lockdep_set_class(&alc[r]->lock, alc_key); + } +} + +static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) +{ + slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node); +} + +static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) +{ + int node; + + for_each_online_node(node) + slab_set_debugobj_lock_classes_node(cachep, node); +} + static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; @@ -630,29 +675,14 @@ static void init_node_lock_keys(int q) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { - struct array_cache **alc; struct kmem_list3 *l3; - int r; l3 = s->cs_cachep->nodelists[q]; if (!l3 || OFF_SLAB(s->cs_cachep)) continue; - lockdep_set_class(&l3->list_lock, &on_slab_l3_key); - alc = l3->alien; - /* - * FIXME: This check for BAD_ALIEN_MAGIC - * should go away when common slab code is taught to - * work even without alien caches. - * Currently, non NUMA code returns BAD_ALIEN_MAGIC - * for alloc_alien_cache, - */ - if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) - continue; - for_each_node(r) { - if (alc[r]) - lockdep_set_class(&alc[r]->lock, - &on_slab_alc_key); - } + + slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, + &on_slab_alc_key, q); } } @@ -671,6 +701,14 @@ static void init_node_lock_keys(int q) static inline void init_lock_keys(void) { } + +static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node) +{ +} + +static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) +{ +} #endif /* @@ -1264,6 +1302,8 @@ static int __cpuinit cpuup_prepare(long cpu) spin_unlock_irq(&l3->list_lock); kfree(shared); free_alien_cache(alien); + if (cachep->flags & SLAB_DEBUG_OBJECTS) + slab_set_debugobj_lock_classes_node(cachep, node); } init_node_lock_keys(node); @@ -1626,6 +1666,9 @@ void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; + /* Annotate slab for lockdep -- annotate the malloc caches */ + init_lock_keys(); + /* 6) resize the head arrays to their final sizes */ mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) @@ -1636,9 +1679,6 @@ void __init kmem_cache_init_late(void) /* Done! */ g_cpucache_up = FULL; - /* Annotate slab for lockdep -- annotate the malloc caches */ - init_lock_keys(); - /* * Register a cpu startup notifier callback that initializes * cpu_cache_get for all new cpus @@ -2426,6 +2466,16 @@ kmem_cache_create (const char *name, size_t size, size_t align, goto oops; } + if (flags & SLAB_DEBUG_OBJECTS) { + /* + * Would deadlock through slab_destroy()->call_rcu()-> + * debug_object_activate()->kmem_cache_alloc(). + */ + WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU); + + slab_set_debugobj_lock_classes(cachep); + } + /* cache setup completed, link it into the list */ list_add(&cachep->next, &cache_chain); oops: |