diff options
163 files changed, 3699 insertions, 947 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4d5a4fe22703..2d31d811a52d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -22,11 +22,13 @@ default: 0 acpi_backlight= [HW,ACPI] - acpi_backlight=vendor - acpi_backlight=video - If set to vendor, prefer vendor specific driver + { vendor | video | native | none } + If set to vendor, prefer vendor-specific driver (e.g. thinkpad_acpi, sony_acpi, etc.) instead of the ACPI video.ko driver. + If set to video, use the ACPI video.ko driver. + If set to native, use the device's native backlight mode. + If set to none, disable the ACPI backlight interface. acpi_force_32bit_fadt_addr force FADT to use 32 bit addresses rather than the @@ -3720,6 +3722,9 @@ Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 + pm_debug_messages [SUSPEND,KNL] + Enable suspend/resume debug messages during boot up. + pnp.debug=1 [PNP] Enable PNP debug messages (depends on the CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time diff --git a/Documentation/admin-guide/pm/suspend-flows.rst b/Documentation/admin-guide/pm/suspend-flows.rst new file mode 100644 index 000000000000..c479d7462647 --- /dev/null +++ b/Documentation/admin-guide/pm/suspend-flows.rst @@ -0,0 +1,270 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: <isonum.txt> + +========================= +System Suspend Code Flows +========================= + +:Copyright: |copy| 2020 Intel Corporation + +:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + +At least one global system-wide transition needs to be carried out for the +system to get from the working state into one of the supported +:doc:`sleep states <sleep-states>`. Hibernation requires more than one +transition to occur for this purpose, but the other sleep states, commonly +referred to as *system-wide suspend* (or simply *system suspend*) states, need +only one. + +For those sleep states, the transition from the working state of the system into +the target sleep state is referred to as *system suspend* too (in the majority +of cases, whether this means a transition or a sleep state of the system should +be clear from the context) and the transition back from the sleep state into the +working state is referred to as *system resume*. + +The kernel code flows associated with the suspend and resume transitions for +different sleep states of the system are quite similar, but there are some +significant differences between the :ref:`suspend-to-idle <s2idle>` code flows +and the code flows related to the :ref:`suspend-to-RAM <s2ram>` and +:ref:`standby <standby>` sleep states. + +The :ref:`suspend-to-RAM <s2ram>` and :ref:`standby <standby>` sleep states +cannot be implemented without platform support and the difference between them +boils down to the platform-specific actions carried out by the suspend and +resume hooks that need to be provided by the platform driver to make them +available. Apart from that, the suspend and resume code flows for these sleep +states are mostly identical, so they both together will be referred to as +*platform-dependent suspend* states in what follows. + + +.. _s2idle_suspend: + +Suspend-to-idle Suspend Code Flow +================================= + +The following steps are taken in order to transition the system from the working +state to the :ref:`suspend-to-idle <s2idle>` sleep state: + + 1. Invoking system-wide suspend notifiers. + + Kernel subsystems can register callbacks to be invoked when the suspend + transition is about to occur and when the resume transition has finished. + + That allows them to prepare for the change of the system state and to clean + up after getting back to the working state. + + 2. Freezing tasks. + + Tasks are frozen primarily in order to avoid unchecked hardware accesses + from user space through MMIO regions or I/O registers exposed directly to + it and to prevent user space from entering the kernel while the next step + of the transition is in progress (which might have been problematic for + various reasons). + + All user space tasks are intercepted as though they were sent a signal and + put into uninterruptible sleep until the end of the subsequent system resume + transition. + + The kernel threads that choose to be frozen during system suspend for + specific reasons are frozen subsequently, but they are not intercepted. + Instead, they are expected to periodically check whether or not they need + to be frozen and to put themselves into uninterruptible sleep if so. [Note, + however, that kernel threads can use locking and other concurrency controls + available in kernel space to synchronize themselves with system suspend and + resume, which can be much more precise than the freezing, so the latter is + not a recommended option for kernel threads.] + + 3. Suspending devices and reconfiguring IRQs. + + Devices are suspended in four phases called *prepare*, *suspend*, + *late suspend* and *noirq suspend* (see :ref:`driverapi_pm_devices` for more + information on what exactly happens in each phase). + + Every device is visited in each phase, but typically it is not physically + accessed in more than two of them. + + The runtime PM API is disabled for every device during the *late* suspend + phase and high-level ("action") interrupt handlers are prevented from being + invoked before the *noirq* suspend phase. + + Interrupts are still handled after that, but they are only acknowledged to + interrupt controllers without performing any device-specific actions that + would be triggered in the working state of the system (those actions are + deferred till the subsequent system resume transition as described + `below <s2idle_resume_>`_). + + IRQs associated with system wakeup devices are "armed" so that the resume + transition of the system is started when one of them signals an event. + + 4. Freezing the scheduler tick and suspending timekeeping. + + When all devices have been suspended, CPUs enter the idle loop and are put + into the deepest available idle state. While doing that, each of them + "freezes" its own scheduler tick so that the timer events associated with + the tick do not occur until the CPU is woken up by another interrupt source. + + The last CPU to enter the idle state also stops the timekeeping which + (among other things) prevents high resolution timers from triggering going + forward until the first CPU that is woken up restarts the timekeeping. + That allows the CPUs to stay in the deep idle state relatively long in one + go. + + From this point on, the CPUs can only be woken up by non-timer hardware + interrupts. If that happens, they go back to the idle state unless the + interrupt that woke up one of them comes from an IRQ that has been armed for + system wakeup, in which case the system resume transition is started. + + +.. _s2idle_resume: + +Suspend-to-idle Resume Code Flow +================================ + +The following steps are taken in order to transition the system from the +:ref:`suspend-to-idle <s2idle>` sleep state into the working state: + + 1. Resuming timekeeping and unfreezing the scheduler tick. + + When one of the CPUs is woken up (by a non-timer hardware interrupt), it + leaves the idle state entered in the last step of the preceding suspend + transition, restarts the timekeeping (unless it has been restarted already + by another CPU that woke up earlier) and the scheduler tick on that CPU is + unfrozen. + + If the interrupt that has woken up the CPU was armed for system wakeup, + the system resume transition begins. + + 2. Resuming devices and restoring the working-state configuration of IRQs. + + Devices are resumed in four phases called *noirq resume*, *early resume*, + *resume* and *complete* (see :ref:`driverapi_pm_devices` for more + information on what exactly happens in each phase). + + Every device is visited in each phase, but typically it is not physically + accessed in more than two of them. + + The working-state configuration of IRQs is restored after the *noirq* resume + phase and the runtime PM API is re-enabled for every device whose driver + supports it during the *early* resume phase. + + 3. Thawing tasks. + + Tasks frozen in step 2 of the preceding `suspend <s2idle_suspend_>`_ + transition are "thawed", which means that they are woken up from the + uninterruptible sleep that they went into at that time and user space tasks + are allowed to exit the kernel. + + 4. Invoking system-wide resume notifiers. + + This is analogous to step 1 of the `suspend <s2idle_suspend_>`_ transition + and the same set of callbacks is invoked at this point, but a different + "notification type" parameter value is passed to them. + + +Platform-dependent Suspend Code Flow +==================================== + +The following steps are taken in order to transition the system from the working +state to platform-dependent suspend state: + + 1. Invoking system-wide suspend notifiers. + + This step is the same as step 1 of the suspend-to-idle suspend transition + described `above <s2idle_suspend_>`_. + + 2. Freezing tasks. + + This step is the same as step 2 of the suspend-to-idle suspend transition + described `above <s2idle_suspend_>`_. + + 3. Suspending devices and reconfiguring IRQs. + + This step is analogous to step 3 of the suspend-to-idle suspend transition + described `above <s2idle_suspend_>`_, but the arming of IRQs for system + wakeup generally does not have any effect on the platform. + + There are platforms that can go into a very deep low-power state internally + when all CPUs in them are in sufficiently deep idle states and all I/O + devices have been put into low-power states. On those platforms, + suspend-to-idle can reduce system power very effectively. + + On the other platforms, however, low-level components (like interrupt + controllers) need to be turned off in a platform-specific way (implemented + in the hooks provided by the platform driver) to achieve comparable power + reduction. + + That usually prevents in-band hardware interrupts from waking up the system, + which must be done in a special platform-dependent way. Then, the + configuration of system wakeup sources usually starts when system wakeup + devices are suspended and is finalized by the platform suspend hooks later + on. + + 4. Disabling non-boot CPUs. + + On some platforms the suspend hooks mentioned above must run in a one-CPU + configuration of the system (in particular, the hardware cannot be accessed + by any code running in parallel with the platform suspend hooks that may, + and often do, trap into the platform firmware in order to finalize the + suspend transition). + + For this reason, the CPU offline/online (CPU hotplug) framework is used + to take all of the CPUs in the system, except for one (the boot CPU), + offline (typically, the CPUs that have been taken offline go into deep idle + states). + + This means that all tasks are migrated away from those CPUs and all IRQs are + rerouted to the only CPU that remains online. + + 5. Suspending core system components. + + This prepares the core system components for (possibly) losing power going + forward and suspends the timekeeping. + + 6. Platform-specific power removal. + + This is expected to remove power from all of the system components except + for the memory controller and RAM (in order to preserve the contents of the + latter) and some devices designated for system wakeup. + + In many cases control is passed to the platform firmware which is expected + to finalize the suspend transition as needed. + + +Platform-dependent Resume Code Flow +=================================== + +The following steps are taken in order to transition the system from a +platform-dependent suspend state into the working state: + + 1. Platform-specific system wakeup. + + The platform is woken up by a signal from one of the designated system + wakeup devices (which need not be an in-band hardware interrupt) and + control is passed back to the kernel (the working configuration of the + platform may need to be restored by the platform firmware before the + kernel gets control again). + + 2. Resuming core system components. + + The suspend-time configuration of the core system components is restored and + the timekeeping is resumed. + + 3. Re-enabling non-boot CPUs. + + The CPUs disabled in step 4 of the preceding suspend transition are taken + back online and their suspend-time configuration is restored. + + 4. Resuming devices and restoring the working-state configuration of IRQs. + + This step is the same as step 2 of the suspend-to-idle suspend transition + described `above <s2idle_resume_>`_. + + 5. Thawing tasks. + + This step is the same as step 3 of the suspend-to-idle suspend transition + described `above <s2idle_resume_>`_. + + 6. Invoking system-wide resume notifiers. + + This step is the same as step 4 of the suspend-to-idle suspend transition + described `above <s2idle_resume_>`_. diff --git a/Documentation/admin-guide/pm/system-wide.rst b/Documentation/admin-guide/pm/system-wide.rst index 2b1f987b34f0..1a1924d71006 100644 --- a/Documentation/admin-guide/pm/system-wide.rst +++ b/Documentation/admin-guide/pm/system-wide.rst @@ -8,3 +8,4 @@ System-Wide Power Management :maxdepth: 2 sleep-states + suspend-flows diff --git a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml index 9458f6659be1..68573762294b 100644 --- a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml +++ b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml @@ -38,28 +38,27 @@ required: examples: - | /* USB host controller */ - &usb1 { - mvusb: mdio@1 { + usb { + #address-cells = <1>; + #size-cells = <0>; + + mdio@1 { compatible = "usb1286,1fa4"; reg = <1>; #address-cells = <1>; #size-cells = <0>; - }; - }; - /* MV88E6390X devboard */ - &mvusb { - switch@0 { - compatible = "marvell,mv88e6190"; - status = "ok"; - reg = <0x0>; + switch@0 { + compatible = "marvell,mv88e6190"; + reg = <0x0>; - ports { - /* Port definitions */ - }; + ports { + /* Port definitions */ + }; - mdio { - /* PHY definitions */ + mdio { + /* PHY definitions */ + }; }; }; }; diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 047427f71d83..94545d50d40f 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -3,6 +3,7 @@ config CSKY def_bool y select ARCH_32BIT_OFF_T select ARCH_HAS_DMA_PREP_COHERENT + select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_USE_BUILTIN_BSWAP @@ -38,16 +39,22 @@ config CSKY select HAVE_ARCH_AUDITSYSCALL select HAVE_COPY_THREAD_TLS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA + select HAVE_KPROBES if !CPU_CK610 + select HAVE_KPROBES_ON_FTRACE if !CPU_CK610 + select HAVE_KRETPROBES if !CPU_CK610 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_DMA_CONTIGUOUS + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select MAY_HAVE_SPARSE_IRQ @@ -65,6 +72,12 @@ config CSKY select PCI_SYSCALL if PCI select PCI_MSI if PCI +config LOCKDEP_SUPPORT + def_bool y + +config ARCH_SUPPORTS_UPROBES + def_bool y if !CPU_CK610 + config CPU_HAS_CACHEV2 bool diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index f35a9f3315ee..5056ebb902d1 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -172,10 +172,7 @@ addi r6, 0xe cpwcr r6, cpcr30 - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0xe + movi r6, 0 cpwcr r6, cpcr31 .endm diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c index 86d187d4e5af..5acc5c2e544e 100644 --- a/arch/csky/abiv2/fpu.c +++ b/arch/csky/abiv2/fpu.c @@ -10,11 +10,6 @@ #define MTCR_DIST 0xC0006420 #define MFCR_DIST 0xC0006020 -void __init init_fpu(void) -{ - mtcr("cr<1, 2>", 0); -} - /* * fpu_libc_helper() is to help libc to excute: * - mfcr %a, cr<1, 2> diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 94a7a58765df..a99aff555a0a 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -100,6 +100,66 @@ rte .endm +.macro SAVE_REGS_FTRACE + subi sp, 152 + stw tls, (sp, 0) + stw lr, (sp, 4) + + mfcr lr, psr + stw lr, (sp, 12) + + addi lr, sp, 152 + stw lr, (sp, 16) + + stw a0, (sp, 20) + stw a0, (sp, 24) + stw a1, (sp, 28) + stw a2, (sp, 32) + stw a3, (sp, 36) + + addi sp, 40 + stm r4-r13, (sp) + + addi sp, 40 + stm r16-r30, (sp) +#ifdef CONFIG_CPU_HAS_HILO + mfhi lr + stw lr, (sp, 60) + mflo lr + stw lr, (sp, 64) + mfcr lr, cr14 + stw lr, (sp, 68) +#endif + subi sp, 80 +.endm + +.macro RESTORE_REGS_FTRACE + ldw tls, (sp, 0) + ldw a0, (sp, 16) + mtcr a0, ss0 + +#ifdef CONFIG_CPU_HAS_HILO + ldw a0, (sp, 140) + mthi a0 + ldw a0, (sp, 144) + mtlo a0 + ldw a0, (sp, 148) + mtcr a0, cr14 +#endif + + ldw a0, (sp, 24) + ldw a1, (sp, 28) + ldw a2, (sp, 32) + ldw a3, (sp, 36) + + addi sp, 40 + ldm r4-r13, (sp) + addi sp, 40 + ldm r16-r30, (sp) + addi sp, 72 + mfcr sp, ss0 +.endm + .macro SAVE_SWITCH_STACK subi sp, 64 stm r4-r11, (sp) @@ -230,11 +290,8 @@ addi r6, 0x1ce mtcr r6, cr<30, 15> /* Set MSA0 */ - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0x1ce - mtcr r6, cr<31, 15> /* Set MSA1 */ + movi r6, 0 + mtcr r6, cr<31, 15> /* Clr MSA1 */ /* enable MMU */ mfcr r6, cr18 diff --git a/arch/csky/abiv2/inc/abi/fpu.h b/arch/csky/abiv2/inc/abi/fpu.h index 22ca3cf2794a..09e2700a3693 100644 --- a/arch/csky/abiv2/inc/abi/fpu.h +++ b/arch/csky/abiv2/inc/abi/fpu.h @@ -9,7 +9,8 @@ int fpu_libc_helper(struct pt_regs *regs); void fpu_fpe(struct pt_regs *regs); -void __init init_fpu(void); + +static inline void init_fpu(void) { mtcr("cr<1, 2>", 0); } void save_to_user_fp(struct user_fp *user_fp); void restore_from_user_fp(struct user_fp *user_fp); diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S index 326402e65f9e..9331c7ed5958 100644 --- a/arch/csky/abiv2/mcount.S +++ b/arch/csky/abiv2/mcount.S @@ -3,6 +3,8 @@ #include <linux/linkage.h> #include <asm/ftrace.h> +#include <abi/entry.h> +#include <asm/asm-offsets.h> /* * csky-gcc with -pg will put the following asm after prologue: @@ -44,6 +46,22 @@ jmp t1 .endm +.macro mcount_enter_regs + subi sp, 8 + stw lr, (sp, 0) + stw r8, (sp, 4) + SAVE_REGS_FTRACE +.endm + +.macro mcount_exit_regs + RESTORE_REGS_FTRACE + ldw t1, (sp, 0) + ldw r8, (sp, 4) + ldw lr, (sp, 8) + addi sp, 12 + jmp t1 +.endm + .macro save_return_regs subi sp, 16 stw a0, (sp, 0) @@ -122,6 +140,8 @@ ENTRY(ftrace_caller) ldw a0, (sp, 16) subi a0, 4 ldw a1, (sp, 24) + lrw a2, function_trace_op + ldw a2, (a2, 0) nop GLOBAL(ftrace_call) @@ -157,3 +177,31 @@ ENTRY(return_to_handler) jmp lr END(return_to_handler) #endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +ENTRY(ftrace_regs_caller) + mcount_enter_regs + + lrw t1, PT_FRAME_SIZE + add t1, sp + + ldw a0, (t1, 0) + subi a0, 4 + ldw a1, (t1, 8) + lrw a2, function_trace_op + ldw a2, (a2, 0) + mov a3, sp + + nop +GLOBAL(ftrace_regs_call) + nop32_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + nop +GLOBAL(ftrace_graph_regs_call) + nop32_stub +#endif + + mcount_exit_regs +ENDPROC(ftrace_regs_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h index ba35d93ecda2..fae72b0b1374 100644 --- a/arch/csky/include/asm/ftrace.h +++ b/arch/csky/include/asm/ftrace.h @@ -10,6 +10,8 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +#define ARCH_SUPPORTS_FTRACE_OPS 1 + #define MCOUNT_ADDR ((unsigned long)_mcount) #ifndef __ASSEMBLY__ diff --git a/arch/csky/include/asm/kprobes.h b/arch/csky/include/asm/kprobes.h new file mode 100644 index 000000000000..b647bbde4d6d --- /dev/null +++ b/arch/csky/include/asm/kprobes.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_CSKY_KPROBES_H +#define __ASM_CSKY_KPROBES_H + +#include <asm-generic/kprobes.h> + +#ifdef CONFIG_KPROBES +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 + +#define flush_insn_slot(p) do { } while (0) +#define kretprobe_blacklist_size 0 + +#include <asm/probes.h> + +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* Single step context for kprobe */ +struct kprobe_step_ctx { + unsigned long ss_pending; + unsigned long match_addr; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + unsigned long saved_sr; + struct prev_kprobe prev_kprobe; + struct kprobe_step_ctx ss_ctx; +}; + +void arch_remove_kprobe(struct kprobe *p); +int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); +int kprobe_breakpoint_handler(struct pt_regs *regs); +int kprobe_single_step_handler(struct pt_regs *regs); +void kretprobe_trampoline(void); +void __kprobes *trampoline_probe_handler(struct pt_regs *regs); + +#endif /* CONFIG_KPROBES */ +#endif /* __ASM_CSKY_KPROBES_H */ diff --git a/arch/csky/include/asm/probes.h b/arch/csky/include/asm/probes.h new file mode 100644 index 000000000000..5e526334e6f9 --- /dev/null +++ b/arch/csky/include/asm/probes.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_CSKY_PROBES_H +#define __ASM_CSKY_PROBES_H + +typedef u32 probe_opcode_t; +typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); + +/* architecture specific copy of original instruction */ +struct arch_probe_insn { + probe_opcode_t *insn; + probes_handler_t *handler; + /* restore address after simulation */ + unsigned long restore; +}; + +#ifdef CONFIG_KPROBES +typedef u32 kprobe_opcode_t; +struct arch_specific_insn { + struct arch_probe_insn api; +}; +#endif + +#endif /* __ASM_CSKY_PROBES_H */ diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 21e0bd5293dd..c6bcd7f7c720 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -43,6 +43,7 @@ extern struct cpuinfo_csky cpu_data[]; struct thread_struct { unsigned long ksp; /* kernel stack pointer */ unsigned long sr; /* saved status register */ + unsigned long trap_no; /* saved status register */ /* FPU regs */ struct user_fp __aligned(16) user_fp; diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h index d0aba7b32417..aae5aa96cf54 100644 --- a/arch/csky/include/asm/ptrace.h +++ b/arch/csky/include/asm/ptrace.h @@ -7,11 +7,14 @@ #include <uapi/asm/ptrace.h> #include <asm/traps.h> #include <linux/types.h> +#include <linux/compiler.h> #ifndef __ASSEMBLY__ #define PS_S 0x80000000 /* Supervisor Mode */ +#define USR_BKPT 0x1464 + #define arch_has_single_step() (1) #define current_pt_regs() \ ({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; }) @@ -22,6 +25,18 @@ #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->pc = val; +} + +#if defined(__CSKYABIV2__) +#define MAX_REG_OFFSET offsetof(struct pt_regs, dcsr) +#else +#define MAX_REG_OFFSET offsetof(struct pt_regs, regs[9]) +#endif + static inline bool in_syscall(struct pt_regs const *regs) { return ((regs->sr >> 16) & 0xff) == VEC_TRAP0; @@ -37,5 +52,33 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->a0; } +/* Valid only for Kernel mode traps. */ +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->usp; +} + +extern int regs_query_register_offset(const char *name); +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n); + +/* + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten + * @offset: offset of the register. + * + * regs_get_register returns the value of a register whose offset from @regs. + * The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + + return *(unsigned long *)((unsigned long)regs + offset); +} + #endif /* __ASSEMBLY__ */ #endif /* __ASM_CSKY_PTRACE_H */ diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h index 0b546a55a8bf..442fedad0260 100644 --- a/arch/csky/include/asm/thread_info.h +++ b/arch/csky/include/asm/thread_info.h @@ -57,6 +57,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 3 /* syscall trace active */ #define TIF_SYSCALL_TRACEPOINT 4 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing */ +#define TIF_UPROBE 6 /* uprobe breakpoint or singlestep */ #define TIF_POLLING_NRFLAG 16 /* poll_idle() is TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ @@ -68,6 +69,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) diff --git a/arch/csky/include/asm/uprobes.h b/arch/csky/include/asm/uprobes.h new file mode 100644 index 000000000000..600388eb93c6 --- /dev/null +++ b/arch/csky/include/asm/uprobes.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_CSKY_UPROBES_H +#define __ASM_CSKY_UPROBES_H + +#include <asm/probes.h> + +#define MAX_UINSN_BYTES 4 + +#define UPROBE_SWBP_INSN USR_BKPT +#define UPROBE_SWBP_INSN_SIZE 2 +#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES + +typedef u32 uprobe_opcode_t; + +struct arch_uprobe_task { + unsigned long saved_trap_no; +}; + +struct arch_uprobe { + union { + u8 insn[MAX_UINSN_BYTES]; + u8 ixol[MAX_UINSN_BYTES]; + }; + struct arch_probe_insn api; + unsigned long insn_size; + bool simulate; +}; + +int uprobe_breakpoint_handler(struct pt_regs *regs); +int uprobe_single_step_handler(struct pt_regs *regs); + +#endif /* __ASM_CSKY_UPROBES_H */ diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index 071d659f37b7..fd6d9dc8b7f3 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -4,6 +4,7 @@ extra-y := head.o vmlinux.lds obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o obj-y += power.o syscall.o syscall_table.o setup.o obj-y += process.o cpu-probe.o ptrace.o dumpstack.o +obj-y += probes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c index 9b48b1b1a61b..f8be348df9e4 100644 --- a/arch/csky/kernel/asm-offsets.c +++ b/arch/csky/kernel/asm-offsets.c @@ -72,6 +72,7 @@ int main(void) DEFINE(PT_RLO, offsetof(struct pt_regs, rlo)); #endif DEFINE(PT_USP, offsetof(struct pt_regs, usp)); + DEFINE(PT_FRAME_SIZE, sizeof(struct pt_regs)); /* offsets into the irq_cpustat_t struct */ DEFINE(CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 007706328000..364819536f2e 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -128,7 +128,10 @@ tlbop_end 1 ENTRY(csky_systemcall) SAVE_ALL TRAP0_SIZE zero_fp - +#ifdef CONFIG_RSEQ_DEBUG + mov a0, sp + jbsr rseq_syscall +#endif psrset ee, ie lrw r11, __NR_syscalls @@ -218,10 +221,17 @@ ret_from_exception: andn r9, r10 ldw r12, (r9, TINFO_FLAGS) - andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) + andi r12, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | _TIF_UPROBE) cmpnei r12, 0 bt exit_work 1: +#ifdef CONFIG_TRACE_IRQFLAGS + ld r10, (sp, LSAVE_PSR) + btsti r10, 6 + bf 2f + jbsr trace_hardirqs_on +2: +#endif RESTORE_ALL exit_work: @@ -277,6 +287,10 @@ ENTRY(csky_irq) zero_fp psrset ee +#ifdef CONFIG_TRACE_IRQFLAGS + jbsr trace_hardirqs_off +#endif + #ifdef CONFIG_PREEMPTION mov r9, sp /* Get current stack pointer */ bmaski r10, THREAD_SHIFT diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 44f4880179b7..44628e3f7fa6 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -3,6 +3,7 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> #ifdef CONFIG_DYNAMIC_FTRACE @@ -126,6 +127,9 @@ int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = ftrace_modify_code((unsigned long)&ftrace_call, (unsigned long)func, true, true); + if (!ret) + ret = ftrace_modify_code((unsigned long)&ftrace_regs_call, + (unsigned long)func, true, true); return ret; } @@ -135,6 +139,14 @@ int __init ftrace_dyn_arch_init(void) } #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return ftrace_modify_code(rec->ip, addr, true, true); +} +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) @@ -190,5 +202,35 @@ int ftrace_disable_ftrace_graph_caller(void) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifndef CONFIG_CPU_HAS_ICACHE_INS +struct ftrace_modify_param { + int command; + atomic_t cpu_count; +}; + +static int __ftrace_modify_code(void *data) +{ + struct ftrace_modify_param *param = data; + + if (atomic_inc_return(¶m->cpu_count) == 1) { + ftrace_modify_all_code(param->command); + atomic_inc(¶m->cpu_count); + } else { + while (atomic_read(¶m->cpu_count) <= num_online_cpus()) + cpu_relax(); + local_icache_inv_all(NULL); + } + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + struct ftrace_modify_param param = { command, ATOMIC_INIT(0) }; + + stop_machine(__ftrace_modify_code, ¶m, cpu_online_mask); +} +#endif + /* _mcount is defined in abi's mcount.S */ EXPORT_SYMBOL(_mcount); diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S index 61989f9241c0..17ed9d250480 100644 --- a/arch/csky/kernel/head.S +++ b/arch/csky/kernel/head.S @@ -21,6 +21,11 @@ END(_start) ENTRY(_start_smp_secondary) SETUP_MMU + /* copy msa1 from CPU0 */ + lrw r6, secondary_msa1 + ld.w r6, (r6, 0) + mtcr r6, cr<31, 15> + /* set stack point */ lrw r6, secondary_stack ld.w r6, (r6, 0) diff --git a/arch/csky/kernel/probes/Makefile b/arch/csky/kernel/probes/Makefile new file mode 100644 index 000000000000..1c7c6e6cb25b --- /dev/null +++ b/arch/csky/kernel/probes/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o +obj-$(CONFIG_KPROBES) += kprobes_trampoline.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o +obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o + +CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) diff --git a/arch/csky/kernel/probes/decode-insn.c b/arch/csky/kernel/probes/decode-insn.c new file mode 100644 index 000000000000..bbc4edc25dc9 --- /dev/null +++ b/arch/csky/kernel/probes/decode-insn.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +enum probe_insn __kprobes +csky_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api) +{ + probe_opcode_t insn = le32_to_cpu(*addr); + + CSKY_INSN_SET_SIMULATE(br16, insn); + CSKY_INSN_SET_SIMULATE(bt16, insn); + CSKY_INSN_SET_SIMULATE(bf16, insn); + CSKY_INSN_SET_SIMULATE(jmp16, insn); + CSKY_INSN_SET_SIMULATE(jsr16, insn); + CSKY_INSN_SET_SIMULATE(lrw16, insn); + CSKY_INSN_SET_SIMULATE(pop16, insn); + + CSKY_INSN_SET_SIMULATE(br32, insn); + CSKY_INSN_SET_SIMULATE(bt32, insn); + CSKY_INSN_SET_SIMULATE(bf32, insn); + CSKY_INSN_SET_SIMULATE(jmp32, insn); + CSKY_INSN_SET_SIMULATE(jsr32, insn); + CSKY_INSN_SET_SIMULATE(lrw32, insn); + CSKY_INSN_SET_SIMULATE(pop32, insn); + + CSKY_INSN_SET_SIMULATE(bez32, insn); + CSKY_INSN_SET_SIMULATE(bnez32, insn); + CSKY_INSN_SET_SIMULATE(bnezad32, insn); + CSKY_INSN_SET_SIMULATE(bhsz32, insn); + CSKY_INSN_SET_SIMULATE(bhz32, insn); + CSKY_INSN_SET_SIMULATE(blsz32, insn); + CSKY_INSN_SET_SIMULATE(blz32, insn); + CSKY_INSN_SET_SIMULATE(bsr32, insn); + CSKY_INSN_SET_SIMULATE(jmpi32, insn); + CSKY_INSN_SET_SIMULATE(jsri32, insn); + + return INSN_GOOD; +} diff --git a/arch/csky/kernel/probes/decode-insn.h b/arch/csky/kernel/probes/decode-insn.h new file mode 100644 index 000000000000..9c4ad48fee0d --- /dev/null +++ b/arch/csky/kernel/probes/decode-insn.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __CSKY_KERNEL_KPROBES_DECODE_INSN_H +#define __CSKY_KERNEL_KPROBES_DECODE_INSN_H + +#include <asm/sections.h> +#include <asm/kprobes.h> + +enum probe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +#define is_insn32(insn) ((insn & 0xc000) == 0xc000) + +enum probe_insn __kprobes +csky_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi); + +#endif /* __CSKY_KERNEL_KPROBES_DECODE_INSN_H */ diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c new file mode 100644 index 000000000000..5264763d05be --- /dev/null +++ b/arch/csky/kernel/probes/ftrace.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kprobes.h> + +int arch_check_ftrace_location(struct kprobe *p) +{ + if (ftrace_location((unsigned long)p->addr)) + p->flags |= KPROBE_FLAG_FTRACE; + return 0; +} + +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + bool lr_saver = false; + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + /* Preempt is disabled by ftrace */ + p = get_kprobe((kprobe_opcode_t *)ip); + if (!p) { + p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); + if (unlikely(!p) || kprobe_disabled(p)) + return; + lr_saver = true; + } + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_ip = instruction_pointer(regs); + + if (lr_saver) + ip -= MCOUNT_INSN_SIZE; + instruction_pointer_set(regs, ip); + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) { + /* + * Emulate singlestep (and also recover regs->pc) + * as if there is a nop + */ + instruction_pointer_set(regs, + (unsigned long)p->addr + MCOUNT_INSN_SIZE); + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + instruction_pointer_set(regs, orig_ip); + } + /* + * If pre_handler returns !0, it changes regs->pc. We have to + * skip emulating post_handler. + */ + __this_cpu_write(current_kprobe, NULL); + } +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.api.insn = NULL; + return 0; +} diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c new file mode 100644 index 000000000000..f0f733b7ac5a --- /dev/null +++ b/arch/csky/kernel/probes/kprobes.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/kprobes.h> +#include <linux/extable.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <asm/ptrace.h> +#include <linux/uaccess.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + +struct csky_insn_patch { + kprobe_opcode_t *addr; + u32 opcode; + atomic_t cpu_count; +}; + +static int __kprobes patch_text_cb(void *priv) +{ + struct csky_insn_patch *param = priv; + unsigned int addr = (unsigned int)param->addr; + + if (atomic_inc_return(¶m->cpu_count) == 1) { + *(u16 *) addr = cpu_to_le16(param->opcode); + dcache_wb_range(addr, addr + 2); + atomic_inc(¶m->cpu_count); + } else { + while (atomic_read(¶m->cpu_count) <= num_online_cpus()) + cpu_relax(); + } + + icache_inv_range(addr, addr + 2); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + struct csky_insn_patch param = { addr, opcode, ATOMIC_INIT(0) }; + + return stop_machine_cpuslocked(patch_text_cb, ¶m, cpu_online_mask); +} + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + unsigned long offset = is_insn32(p->opcode) ? 4 : 2; + + p->ainsn.api.restore = (unsigned long)p->addr + offset; + + patch_text(p->ainsn.api.insn, p->opcode); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + p->ainsn.api.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.api.handler) + p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs); + + post_kprobe_handler(kcb, regs); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + + if (probe_addr & 0x1) { + pr_warn("Address not aligned.\n"); + return -EINVAL; + } + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + /* decode instruction */ + switch (csky_probe_decode_insn(p->addr, &p->ainsn.api)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.api.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.api.insn = get_insn_slot(); + if (!p->ainsn.api.insn) + return -ENOMEM; + break; + } + + /* prepare the instruction */ + if (p->ainsn.api.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +/* install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, USR_BKPT); +} + +/* remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_sr = regs->sr; + regs->sr &= ~BIT(6); +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + regs->sr = kcb->saved_sr; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr, struct kprobe *p) +{ + unsigned long offset = is_insn32(p->opcode) ? 4 : 2; + + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + offset; +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +#define TRACE_MODE_SI BIT(14) +#define TRACE_MODE_MASK ~(0x3 << 14) +#define TRACE_MODE_RUN 0 + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + if (p->ainsn.api.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.api.insn; + + set_ss_context(kcb, slot, p); /* mark pending ss */ + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + regs->sr = (regs->sr & TRACE_MODE_MASK) | TRACE_MODE_SI; + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected.\n"); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.api.restore != 0) + regs->pc = cur->ainsn.api.restore; + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->pc = (unsigned long) cur->addr; + if (!instruction_pointer(regs)) + BUG(); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return 1; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. + * + * pre_handler can hit a breakpoint and can step thru + * before return. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) + setup_singlestep(p, regs, kcb, 0); + else + reset_current_kprobe(); + } + return 1; + } + + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ + return 0; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == instruction_pointer(regs))) { + clear_ss_context(kcb); /* clear pending ss */ + + kprobes_restore_local_irqflag(kcb, regs); + regs->sr = (regs->sr & TRACE_MODE_MASK) | TRACE_MODE_RUN; + + post_kprobe_handler(kcb, regs); + return 1; + } + return 0; +} + +/* + * Provide a blacklist of symbols identifying ranges which cannot be kprobed. + * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). + */ +int __init arch_populate_kprobe_blacklist(void) +{ + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); + return ret; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->lr; + regs->lr = (unsigned long) &kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/csky/kernel/probes/kprobes_trampoline.S b/arch/csky/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000000..b1fe3af24f03 --- /dev/null +++ b/arch/csky/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <linux/linkage.h> + +#include <abi/entry.h> + +ENTRY(kretprobe_trampoline) + SAVE_REGS_FTRACE + + mov a0, sp /* pt_regs */ + + jbsr trampoline_probe_handler + + /* use the result as the return-address */ + mov lr, a0 + + RESTORE_REGS_FTRACE + rts +ENDPROC(kretprobe_trampoline) diff --git a/arch/csky/kernel/probes/simulate-insn.c b/arch/csky/kernel/probes/simulate-insn.c new file mode 100644 index 000000000000..4e464fed52ec --- /dev/null +++ b/arch/csky/kernel/probes/simulate-insn.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static inline bool csky_insn_reg_get_val(struct pt_regs *regs, + unsigned long index, + unsigned long *ptr) +{ + if (index < 14) + *ptr = *(®s->a0 + index); + + if (index > 15 && index < 31) + *ptr = *(®s->exregs[0] + index - 16); + + switch (index) { + case 14: + *ptr = regs->usp; + break; + case 15: + *ptr = regs->lr; + break; + case 31: + *ptr = regs->tls; + break; + default: + goto fail; + } + + return true; +fail: + return false; +} + +static inline bool csky_insn_reg_set_val(struct pt_regs *regs, + unsigned long index, + unsigned long val) +{ + if (index < 14) + *(®s->a0 + index) = val; + + if (index > 15 && index < 31) + *(®s->exregs[0] + index - 16) = val; + + switch (index) { + case 14: + regs->usp = val; + break; + case 15: + regs->lr = val; + break; + case 31: + regs->tls = val; + break; + default: + goto fail; + } + + return true; +fail: + return false; +} + +void __kprobes +simulate_br16(u32 opcode, long addr, struct pt_regs *regs) +{ + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0x3ff) << 1, 9)); +} + +void __kprobes +simulate_br32(u32 opcode, long addr, struct pt_regs *regs) +{ + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); +} + +void __kprobes +simulate_bt16(u32 opcode, long addr, struct pt_regs *regs) +{ + if (regs->sr & 1) + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0x3ff) << 1, 9)); + else + instruction_pointer_set(regs, addr + 2); +} + +void __kprobes +simulate_bt32(u32 opcode, long addr, struct pt_regs *regs) +{ + if (regs->sr & 1) + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + else + instruction_pointer_set(regs, addr + 4); +} + +void __kprobes +simulate_bf16(u32 opcode, long addr, struct pt_regs *regs) +{ + if (!(regs->sr & 1)) + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0x3ff) << 1, 9)); + else + instruction_pointer_set(regs, addr + 2); +} + +void __kprobes +simulate_bf32(u32 opcode, long addr, struct pt_regs *regs) +{ + if (!(regs->sr & 1)) + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + else + instruction_pointer_set(regs, addr + 4); +} + +void __kprobes +simulate_jmp16(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = (opcode >> 2) & 0xf; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + instruction_pointer_set(regs, tmp & 0xfffffffe); +} + +void __kprobes +simulate_jmp32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + instruction_pointer_set(regs, tmp & 0xfffffffe); +} + +void __kprobes +simulate_jsr16(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = (opcode >> 2) & 0xf; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + regs->lr = addr + 2; + + instruction_pointer_set(regs, tmp & 0xfffffffe); +} + +void __kprobes +simulate_jsr32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + regs->lr = addr + 4; + + instruction_pointer_set(regs, tmp & 0xfffffffe); +} + +void __kprobes +simulate_lrw16(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long val; + unsigned long tmp = (opcode & 0x300) >> 3; + unsigned long offset = ((opcode & 0x1f) | tmp) << 2; + + tmp = (opcode & 0xe0) >> 5; + + val = *(unsigned int *)(instruction_pointer(regs) + offset); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_lrw32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long val; + unsigned long offset = (opcode & 0xffff0000) >> 14; + unsigned long tmp = opcode & 0x0000001f; + + val = *(unsigned int *) + ((instruction_pointer(regs) + offset) & 0xfffffffc); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_pop16(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long *tmp = (unsigned long *)regs->usp; + int i; + + for (i = 0; i < (opcode & 0xf); i++) { + csky_insn_reg_set_val(regs, i + 4, *tmp); + tmp += 1; + } + + if (opcode & 0x10) { + csky_insn_reg_set_val(regs, 15, *tmp); + tmp += 1; + } + + regs->usp = (unsigned long)tmp; + + instruction_pointer_set(regs, regs->lr); +} + +void __kprobes +simulate_pop32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long *tmp = (unsigned long *)regs->usp; + int i; + + for (i = 0; i < ((opcode & 0xf0000) >> 16); i++) { + csky_insn_reg_set_val(regs, i + 4, *tmp); + tmp += 1; + } + + if (opcode & 0x100000) { + csky_insn_reg_set_val(regs, 15, *tmp); + tmp += 1; + } + + for (i = 0; i < ((opcode & 0xe00000) >> 21); i++) { + csky_insn_reg_set_val(regs, i + 16, *tmp); + tmp += 1; + } + + if (opcode & 0x1000000) { + csky_insn_reg_set_val(regs, 29, *tmp); + tmp += 1; + } + + regs->usp = (unsigned long)tmp; + + instruction_pointer_set(regs, regs->lr); +} + +void __kprobes +simulate_bez32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + if (tmp == 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); +} + +void __kprobes +simulate_bnez32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + + csky_insn_reg_get_val(regs, tmp, &tmp); + + if (tmp != 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); +} + +void __kprobes +simulate_bnezad32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + unsigned long val; + + csky_insn_reg_get_val(regs, tmp, &val); + + val -= 1; + + if (val > 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_bhsz32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + unsigned long val; + + csky_insn_reg_get_val(regs, tmp, &val); + + if (val >= 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_bhz32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + unsigned long val; + + csky_insn_reg_get_val(regs, tmp, &val); + + if (val > 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_blsz32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + unsigned long val; + + csky_insn_reg_get_val(regs, tmp, &val); + + if (val <= 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_blz32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp = opcode & 0x1f; + unsigned long val; + + csky_insn_reg_get_val(regs, tmp, &val); + + if (val < 0) { + instruction_pointer_set(regs, + addr + sign_extend32((opcode & 0xffff0000) >> 15, 15)); + } else + instruction_pointer_set(regs, addr + 4); + + csky_insn_reg_set_val(regs, tmp, val); +} + +void __kprobes +simulate_bsr32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long tmp; + + tmp = (opcode & 0xffff) << 16; + tmp |= (opcode & 0xffff0000) >> 16; + + instruction_pointer_set(regs, + addr + sign_extend32((tmp & 0x3ffffff) << 1, 15)); + + regs->lr = addr + 4; +} + +void __kprobes +simulate_jmpi32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long val; + unsigned long offset = ((opcode & 0xffff0000) >> 14); + + val = *(unsigned int *) + ((instruction_pointer(regs) + offset) & 0xfffffffc); + + instruction_pointer_set(regs, val); +} + +void __kprobes +simulate_jsri32(u32 opcode, long addr, struct pt_regs *regs) +{ + unsigned long val; + unsigned long offset = ((opcode & 0xffff0000) >> 14); + + val = *(unsigned int *) + ((instruction_pointer(regs) + offset) & 0xfffffffc); + + regs->lr = addr + 4; + + instruction_pointer_set(regs, val); +} diff --git a/arch/csky/kernel/probes/simulate-insn.h b/arch/csky/kernel/probes/simulate-insn.h new file mode 100644 index 000000000000..ba4cb7ef062e --- /dev/null +++ b/arch/csky/kernel/probes/simulate-insn.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __CSKY_KERNEL_PROBES_SIMULATE_INSN_H +#define __CSKY_KERNEL_PROBES_SIMULATE_INSN_H + +#define __CSKY_INSN_FUNCS(name, mask, val) \ +static __always_inline bool csky_insn_is_##name(probe_opcode_t code) \ +{ \ + BUILD_BUG_ON(~(mask) & (val)); \ + return (code & (mask)) == (val); \ +} \ +void simulate_##name(u32 opcode, long addr, struct pt_regs *regs); + +#define CSKY_INSN_SET_SIMULATE(name, code) \ + do { \ + if (csky_insn_is_##name(code)) { \ + api->handler = simulate_##name; \ + return INSN_GOOD_NO_SLOT; \ + } \ + } while (0) + +__CSKY_INSN_FUNCS(br16, 0xfc00, 0x0400) +__CSKY_INSN_FUNCS(bt16, 0xfc00, 0x0800) +__CSKY_INSN_FUNCS(bf16, 0xfc00, 0x0c00) +__CSKY_INSN_FUNCS(jmp16, 0xffc3, 0x7800) +__CSKY_INSN_FUNCS(jsr16, 0xffc3, 0x7801) +__CSKY_INSN_FUNCS(lrw16, 0xfc00, 0x1000) +__CSKY_INSN_FUNCS(pop16, 0xffe0, 0x1480) + +__CSKY_INSN_FUNCS(br32, 0x0000ffff, 0x0000e800) +__CSKY_INSN_FUNCS(bt32, 0x0000ffff, 0x0000e860) +__CSKY_INSN_FUNCS(bf32, 0x0000ffff, 0x0000e840) +__CSKY_INSN_FUNCS(jmp32, 0xffffffe0, 0x0000e8c0) +__CSKY_INSN_FUNCS(jsr32, 0xffffffe0, 0x0000e8e0) +__CSKY_INSN_FUNCS(lrw32, 0x0000ffe0, 0x0000ea80) +__CSKY_INSN_FUNCS(pop32, 0xfe00ffff, 0x0000ebc0) + +__CSKY_INSN_FUNCS(bez32, 0x0000ffe0, 0x0000e900) +__CSKY_INSN_FUNCS(bnez32, 0x0000ffe0, 0x0000e920) +__CSKY_INSN_FUNCS(bnezad32, 0x0000ffe0, 0x0000e820) +__CSKY_INSN_FUNCS(bhsz32, 0x0000ffe0, 0x0000e9a0) +__CSKY_INSN_FUNCS(bhz32, 0x0000ffe0, 0x0000e940) +__CSKY_INSN_FUNCS(blsz32, 0x0000ffe0, 0x0000e960) +__CSKY_INSN_FUNCS(blz32, 0x0000ffe0, 0x0000e980) +__CSKY_INSN_FUNCS(bsr32, 0x0000fc00, 0x0000e000) +__CSKY_INSN_FUNCS(jmpi32, 0x0000ffff, 0x0000eac0) +__CSKY_INSN_FUNCS(jsri32, 0x0000ffff, 0x0000eae0) + +#endif /* __CSKY_KERNEL_PROBES_SIMULATE_INSN_H */ diff --git a/arch/csky/kernel/probes/uprobes.c b/arch/csky/kernel/probes/uprobes.c new file mode 100644 index 000000000000..b3a56c260e3e --- /dev/null +++ b/arch/csky/kernel/probes/uprobes.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com> + */ +#include <linux/highmem.h> +#include <linux/ptrace.h> +#include <linux/uprobes.h> +#include <asm/cacheflush.h> + +#include "decode-insn.h" + +#define UPROBE_TRAP_NR UINT_MAX + +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + probe_opcode_t insn; + + insn = *(probe_opcode_t *)(&auprobe->insn[0]); + + auprobe->insn_size = is_insn32(insn) ? 4 : 2; + + switch (csky_probe_decode_insn(&insn, &auprobe->api)) { + case INSN_REJECTED: + return -EINVAL; + + case INSN_GOOD_NO_SLOT: + auprobe->simulate = true; + break; + + default: + break; + } + + return 0; +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + utask->autask.saved_trap_no = current->thread.trap_no; + current->thread.trap_no = UPROBE_TRAP_NR; + + instruction_pointer_set(regs, utask->xol_vaddr); + + user_enable_single_step(current); + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.trap_no != UPROBE_TRAP_NR); + + instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size); + + user_disable_single_step(current); + + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.trap_no != UPROBE_TRAP_NR) + return true; + + return false; +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + probe_opcode_t insn; + unsigned long addr; + + if (!auprobe->simulate) + return false; + + insn = *(probe_opcode_t *)(&auprobe->insn[0]); + addr = instruction_pointer(regs); + + if (auprobe->api.handler) + auprobe->api.handler(insn, addr, regs); + + return true; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + /* + * Task has received a fatal signal, so reset back to probbed + * address. + */ + instruction_pointer_set(regs, utask->vaddr); + + user_disable_single_step(current); +} + +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return regs->usp <= ret->stack; + else + return regs->usp < ret->stack; +} + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, + struct pt_regs *regs) +{ + unsigned long ra; + + ra = regs->lr; + + regs->lr = trampoline_vaddr; + + return ra; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +int uprobe_breakpoint_handler(struct pt_regs *regs) +{ + if (uprobe_pre_sstep_notifier(regs)) + return 1; + + return 0; +} + +int uprobe_single_step_handler(struct pt_regs *regs) +{ + if (uprobe_post_sstep_notifier(regs)) + return 1; + + return 0; +} diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 313623a19ecb..21ac2608f205 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -193,6 +193,109 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) return &user_csky_view; } +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + REG_OFFSET_NAME(tls), + REG_OFFSET_NAME(lr), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(sr), + REG_OFFSET_NAME(usp), + REG_OFFSET_NAME(orig_a0), + REG_OFFSET_NAME(a0), + REG_OFFSET_NAME(a1), + REG_OFFSET_NAME(a2), + REG_OFFSET_NAME(a3), + REG_OFFSET_NAME(regs[0]), + REG_OFFSET_NAME(regs[1]), + REG_OFFSET_NAME(regs[2]), + REG_OFFSET_NAME(regs[3]), + REG_OFFSET_NAME(regs[4]), + REG_OFFSET_NAME(regs[5]), + REG_OFFSET_NAME(regs[6]), + REG_OFFSET_NAME(regs[7]), + REG_OFFSET_NAME(regs[8]), + REG_OFFSET_NAME(regs[9]), +#if defined(__CSKYABIV2__) + REG_OFFSET_NAME(exregs[0]), + REG_OFFSET_NAME(exregs[1]), + REG_OFFSET_NAME(exregs[2]), + REG_OFFSET_NAME(exregs[3]), + REG_OFFSET_NAME(exregs[4]), + REG_OFFSET_NAME(exregs[5]), + REG_OFFSET_NAME(exregs[6]), + REG_OFFSET_NAME(exregs[7]), + REG_OFFSET_NAME(exregs[8]), + REG_OFFSET_NAME(exregs[9]), + REG_OFFSET_NAME(exregs[10]), + REG_OFFSET_NAME(exregs[11]), + REG_OFFSET_NAME(exregs[12]), + REG_OFFSET_NAME(exregs[13]), + REG_OFFSET_NAME(exregs[14]), + REG_OFFSET_NAME(rhi), + REG_OFFSET_NAME(rlo), + REG_OFFSET_NAME(dcsr), +#endif + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return (addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + void ptrace_disable(struct task_struct *child) { singlestep_disable(child); diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 3821e55742f4..819a9a7bf786 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -24,26 +24,9 @@ struct screen_info screen_info = { }; #endif -phys_addr_t __init_memblock memblock_end_of_REG0(void) -{ - return (memblock.memory.regions[0].base + - memblock.memory.regions[0].size); -} - -phys_addr_t __init_memblock memblock_start_of_REG1(void) -{ - return memblock.memory.regions[1].base; -} - -size_t __init_memblock memblock_size_of_REG1(void) -{ - return memblock.memory.regions[1].size; -} - static void __init csky_memblock_init(void) { unsigned long zone_size[MAX_NR_ZONES]; - unsigned long zhole_size[MAX_NR_ZONES]; signed long size; memblock_reserve(__pa(_stext), _end - _stext); @@ -54,54 +37,36 @@ static void __init csky_memblock_init(void) memblock_dump_all(); memset(zone_size, 0, sizeof(zone_size)); - memset(zhole_size, 0, sizeof(zhole_size)); min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); - - max_low_pfn = PFN_UP(memblock_end_of_REG0()); - if (max_low_pfn == 0) - max_low_pfn = max_pfn; + max_low_pfn = max_pfn = PFN_DOWN(memblock_end_of_DRAM()); size = max_pfn - min_low_pfn; - if (memblock.memory.cnt > 1) { - zone_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - min_low_pfn; - zhole_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - max_low_pfn; + if (size <= PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET)) + zone_size[ZONE_NORMAL] = size; + else if (size < PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) { + zone_size[ZONE_NORMAL] = + PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET); + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; } else { - if (size <= PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) - zone_size[ZONE_NORMAL] = max_pfn - min_low_pfn; - else { - zone_size[ZONE_NORMAL] = + zone_size[ZONE_NORMAL] = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; - } + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; + write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE); } #ifdef CONFIG_HIGHMEM - size = 0; - if (memblock.memory.cnt > 1) { - size = PFN_DOWN(memblock_size_of_REG1()); - highstart_pfn = PFN_DOWN(memblock_start_of_REG1()); - } else { - size = max_pfn - min_low_pfn - - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - highstart_pfn = min_low_pfn + - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - } - - if (size > 0) - zone_size[ZONE_HIGHMEM] = size; + zone_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; - highend_pfn = max_pfn; + highstart_pfn = max_low_pfn; + highend_pfn = max_pfn; #endif memblock_set_current_limit(PFN_PHYS(max_low_pfn)); dma_contiguous_reserve(0); - free_area_init_node(0, zone_size, min_low_pfn, zhole_size); + free_area_init_node(0, zone_size, min_low_pfn, NULL); } void __init setup_arch(char **cmdline_p) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 9b1b7c039ddf..9452d6570b7e 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -175,6 +175,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) sigset_t *oldset = sigmask_to_save(); int ret; + rseq_signal_deliver(ksig, regs); + /* Are we from a system call? */ if (in_syscall(regs)) { /* Avoid additional syscall restarting via ret_from_exception */ @@ -251,6 +253,9 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + /* Handle pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); @@ -258,5 +263,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } } diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 0bb0954d5570..b5c5bc3afeb5 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -22,6 +22,9 @@ #include <asm/sections.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> +#ifdef CONFIG_CPU_HAS_FPU +#include <abi/fpu.h> +#endif struct ipi_data_struct { unsigned long bits ____cacheline_aligned; @@ -156,6 +159,8 @@ volatile unsigned int secondary_hint; volatile unsigned int secondary_ccr; volatile unsigned int secondary_stack; +unsigned long secondary_msa1; + int __cpu_up(unsigned int cpu, struct task_struct *tidle) { unsigned long mask = 1 << cpu; @@ -164,6 +169,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) (unsigned int) task_stack_page(tidle) + THREAD_SIZE - 8; secondary_hint = mfcr("cr31"); secondary_ccr = mfcr("cr18"); + secondary_msa1 = read_mmu_msa1(); /* * Because other CPUs are in reset status, we must flush data diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index b057480e7463..fcc3a69831ad 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -14,6 +14,7 @@ #include <linux/kallsyms.h> #include <linux/rtc.h> #include <linux/uaccess.h> +#include <linux/kprobes.h> #include <asm/setup.h> #include <asm/traps.h> @@ -109,14 +110,14 @@ void buserr(struct pt_regs *regs) force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc); } -#define USR_BKPT 0x1464 asmlinkage void trap_c(struct pt_regs *regs) { int sig; unsigned long vector; siginfo_t info; + struct task_struct *tsk = current; - vector = (mfcr("psr") >> 16) & 0xff; + vector = (regs->sr >> 16) & 0xff; switch (vector) { case VEC_ZERODIV: @@ -125,10 +126,27 @@ asmlinkage void trap_c(struct pt_regs *regs) break; /* ptrace */ case VEC_TRACE: +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs)) + return; +#endif +#ifdef CONFIG_UPROBES + if (uprobe_single_step_handler(regs)) + return; +#endif info.si_code = TRAP_TRACE; sig = SIGTRAP; break; case VEC_ILLEGAL: + tsk->thread.trap_no = vector; +#ifdef CONFIG_KPROBES + if (kprobe_breakpoint_handler(regs)) + return; +#endif +#ifdef CONFIG_UPROBES + if (uprobe_breakpoint_handler(regs)) + return; +#endif die_if_kernel("Kernel mode ILLEGAL", regs, vector); #ifndef CONFIG_CPU_NO_USER_BKPT if (*(uint16_t *)instruction_pointer(regs) != USR_BKPT) @@ -146,16 +164,20 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ACCESS: + tsk->thread.trap_no = vector; return buserr(regs); #ifdef CONFIG_CPU_NEED_SOFTALIGN case VEC_ALIGN: + tsk->thread.trap_no = vector; return csky_alignment(regs); #endif #ifdef CONFIG_CPU_HAS_FPU case VEC_FPE: + tsk->thread.trap_no = vector; die_if_kernel("Kernel mode FPE", regs, vector); return fpu_fpe(regs); case VEC_PRIV: + tsk->thread.trap_no = vector; die_if_kernel("Kernel mode PRIV", regs, vector); if (fpu_libc_helper(regs)) return; @@ -164,5 +186,8 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGSEGV; break; } + + tsk->thread.trap_no = vector; + send_sig(sig, current, 0); } diff --git a/arch/csky/mm/cachev2.c b/arch/csky/mm/cachev2.c index bc419f8039d3..7a9664adce43 100644 --- a/arch/csky/mm/cachev2.c +++ b/arch/csky/mm/cachev2.c @@ -7,8 +7,12 @@ #include <asm/cache.h> #include <asm/barrier.h> +/* for L1-cache */ #define INS_CACHE (1 << 0) +#define DATA_CACHE (1 << 1) #define CACHE_INV (1 << 4) +#define CACHE_CLR (1 << 5) +#define CACHE_OMS (1 << 6) void local_icache_inv_all(void *priv) { @@ -16,11 +20,6 @@ void local_icache_inv_all(void *priv) sync_is(); } -void icache_inv_all(void) -{ - on_each_cpu(local_icache_inv_all, NULL, 1); -} - #ifdef CONFIG_CPU_HAS_ICACHE_INS void icache_inv_range(unsigned long start, unsigned long end) { @@ -31,9 +30,43 @@ void icache_inv_range(unsigned long start, unsigned long end) sync_is(); } #else +struct cache_range { + unsigned long start; + unsigned long end; +}; + +static DEFINE_SPINLOCK(cache_lock); + +static inline void cache_op_line(unsigned long i, unsigned int val) +{ + mtcr("cr22", i); + mtcr("cr17", val); +} + +void local_icache_inv_range(void *priv) +{ + struct cache_range *param = priv; + unsigned long i = param->start & ~(L1_CACHE_BYTES - 1); + unsigned long flags; + + spin_lock_irqsave(&cache_lock, flags); + + for (; i < param->end; i += L1_CACHE_BYTES) + cache_op_line(i, INS_CACHE | CACHE_INV | CACHE_OMS); + + spin_unlock_irqrestore(&cache_lock, flags); + + sync_is(); +} + void icache_inv_range(unsigned long start, unsigned long end) { - icache_inv_all(); + struct cache_range param = { start, end }; + + if (irqs_disabled()) + local_icache_inv_range(¶m); + else + on_each_cpu(local_icache_inv_range, ¶m, 1); } #endif diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index f76618b630f9..d3c61b83e195 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -18,6 +18,7 @@ #include <linux/extable.h> #include <linux/uaccess.h> #include <linux/perf_event.h> +#include <linux/kprobes.h> #include <asm/hardirq.h> #include <asm/mmu_context.h> @@ -53,6 +54,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, int fault; unsigned long address = mmu_meh & PAGE_MASK; + if (kprobe_page_fault(regs, tsk->thread.trap_no)) + return; + si_code = SEGV_MAPERR; #ifndef CONFIG_CPU_HAS_TLBI @@ -179,11 +183,14 @@ bad_area: bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } no_context: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; @@ -198,6 +205,8 @@ no_context: die_if_kernel("Oops", regs, write); out_of_memory: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). @@ -206,6 +215,8 @@ out_of_memory: return; do_sigbus: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index ed32845bd2d2..2f051343612e 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -2,23 +2,12 @@ #ifndef ___ASM_SPARC_DMA_MAPPING_H #define ___ASM_SPARC_DMA_MAPPING_H -#include <asm/cpu_type.h> - extern const struct dma_map_ops *dma_ops; -extern struct bus_type pci_bus_type; - static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { -#ifdef CONFIG_SPARC_LEON - if (sparc_cpu_model == sparc_leon) - return NULL; -#endif -#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) - if (bus == &pci_bus_type) - return NULL; -#endif - return dma_ops; + /* sparc32 uses per-device dma_ops */ + return IS_ENABLED(CONFIG_SPARC64) ? dma_ops : NULL; } #endif diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index e59461d03b9a..d6874c9b639f 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -373,9 +373,6 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, dma_make_coherent(paddr, PAGE_ALIGN(size)); } -const struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - #ifdef CONFIG_PROC_FS static int sparc_io_proc_show(struct seq_file *m, void *v) diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c index b32cc5610712..e717a56efc5d 100644 --- a/arch/sparc/kernel/of_device_common.c +++ b/arch/sparc/kernel/of_device_common.c @@ -67,6 +67,7 @@ void of_propagate_archdata(struct platform_device *bus) op->dev.archdata.stc = bus_sd->stc; op->dev.archdata.host_controller = bus_sd->host_controller; op->dev.archdata.numa_node = bus_sd->numa_node; + op->dev.dma_ops = bus->dev.dma_ops; if (dp->child) of_propagate_archdata(op); diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 33a0facd9eb5..289276b99b01 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -38,6 +38,8 @@ #define IOPERM (IOUPTE_CACHE | IOUPTE_WRITE | IOUPTE_VALID) #define MKIOPTE(phys) __iopte((((phys)>>4) & IOUPTE_PAGE) | IOPERM) +static const struct dma_map_ops iounit_dma_ops; + static void __init iounit_iommu_init(struct platform_device *op) { struct iounit_struct *iounit; @@ -70,6 +72,8 @@ static void __init iounit_iommu_init(struct platform_device *op) xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t); for (; xpt < xptend; xpt++) sbus_writel(0, xpt); + + op->dev.dma_ops = &iounit_dma_ops; } static int __init iounit_init(void) @@ -288,8 +292,3 @@ static const struct dma_map_ops iounit_dma_ops = { .map_sg = iounit_map_sg, .unmap_sg = iounit_unmap_sg, }; - -void __init ld_mmu_iounit(void) -{ - dma_ops = &iounit_dma_ops; -} diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 4d3c6991f0ae..b00dde13681b 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -54,6 +54,9 @@ static pgprot_t dvma_prot; /* Consistent mapping pte flags */ #define IOPERM (IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID) #define MKIOPTE(pfn, perm) (((((pfn)<<8) & IOPTE_PAGE) | (perm)) & ~IOPTE_WAZ) +static const struct dma_map_ops sbus_iommu_dma_gflush_ops; +static const struct dma_map_ops sbus_iommu_dma_pflush_ops; + static void __init sbus_iommu_init(struct platform_device *op) { struct iommu_struct *iommu; @@ -129,6 +132,11 @@ static void __init sbus_iommu_init(struct platform_device *op) (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); op->dev.archdata.iommu = iommu; + + if (flush_page_for_dma_global) + op->dev.dma_ops = &sbus_iommu_dma_gflush_ops; + else + op->dev.dma_ops = &sbus_iommu_dma_pflush_ops; } static int __init iommu_init(void) @@ -445,13 +453,6 @@ static const struct dma_map_ops sbus_iommu_dma_pflush_ops = { void __init ld_mmu_iommu(void) { - if (flush_page_for_dma_global) { - /* flush_page_for_dma flushes everything, no matter of what page is it */ - dma_ops = &sbus_iommu_dma_gflush_ops; - } else { - dma_ops = &sbus_iommu_dma_pflush_ops; - } - if (viking_mxcc_present || srmmu_modtype == HyperSparc) { dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV); ioperm_noc = IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID; diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h index 0d0b06e952a5..ce750a99eea9 100644 --- a/arch/sparc/mm/mm_32.h +++ b/arch/sparc/mm/mm_32.h @@ -20,6 +20,3 @@ void __init srmmu_paging_init(void); /* iommu.c */ void ld_mmu_iommu(void); - -/* io-unit.c */ -void ld_mmu_iounit(void); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index f56c3c9a9793..b7c94de70cca 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1865,9 +1865,7 @@ void __init load_mmu(void) &smp_cachetlb_ops; #endif - if (sparc_cpu_model == sun4d) - ld_mmu_iounit(); - else + if (sparc_cpu_model != sun4d) ld_mmu_iommu(); #ifdef CONFIG_SMP if (sparc_cpu_model == sun4d) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index caf2edccbad2..49ae4e1ac9cd 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -161,7 +161,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index e618ddfab2fd..40f6a3c33a15 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -256,6 +256,8 @@ u32 acpi_ns_build_normalized_path(struct acpi_namespace_node *node, char *full_path, u32 path_size, u8 no_trailing); +void acpi_ns_normalize_pathname(char *original_path); + char *acpi_ns_get_normalized_pathname(struct acpi_namespace_node *node, u8 no_trailing); diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c index aa71f65395d2..ee6a1b77af3f 100644 --- a/drivers/acpi/acpica/dbinput.c +++ b/drivers/acpi/acpica/dbinput.c @@ -468,16 +468,14 @@ char *acpi_db_get_next_token(char *string, return (NULL); } - /* Remove any spaces at the beginning */ + /* Remove any spaces at the beginning, ignore blank lines */ - if (*string == ' ') { - while (*string && (*string == ' ')) { - string++; - } + while (*string && isspace(*string)) { + string++; + } - if (!(*string)) { - return (NULL); - } + if (!(*string)) { + return (NULL); } switch (*string) { @@ -570,7 +568,7 @@ char *acpi_db_get_next_token(char *string, /* Find end of token */ - while (*string && (*string != ' ')) { + while (*string && !isspace(*string)) { string++; } break; diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c index 3eb45ea93e5e..9dfd693cda3e 100644 --- a/drivers/acpi/acpica/dbxface.c +++ b/drivers/acpi/acpica/dbxface.c @@ -409,6 +409,7 @@ acpi_status acpi_initialize_debugger(void) acpi_gbl_db_output_flags = ACPI_DB_CONSOLE_OUTPUT; acpi_gbl_db_opt_no_ini_methods = FALSE; + acpi_gbl_db_opt_no_region_support = FALSE; acpi_gbl_db_buffer = acpi_os_allocate(ACPI_DEBUG_BUFFER_SIZE); if (!acpi_gbl_db_buffer) { diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c index 5e81a1ae44cf..1d4f8c81028c 100644 --- a/drivers/acpi/acpica/dswexec.c +++ b/drivers/acpi/acpica/dswexec.c @@ -16,6 +16,9 @@ #include "acinterp.h" #include "acnamesp.h" #include "acdebug.h" +#ifdef ACPI_EXEC_APP +#include "aecommon.h" +#endif #define _COMPONENT ACPI_DISPATCHER ACPI_MODULE_NAME("dswexec") @@ -329,6 +332,10 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) u32 op_class; union acpi_parse_object *next_op; union acpi_parse_object *first_arg; +#ifdef ACPI_EXEC_APP + char *namepath; + union acpi_operand_object *obj_desc; +#endif ACPI_FUNCTION_TRACE_PTR(ds_exec_end_op, walk_state); @@ -537,6 +544,32 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) status = acpi_ds_eval_buffer_field_operands(walk_state, op); + if (ACPI_FAILURE(status)) { + break; + } +#ifdef ACPI_EXEC_APP + /* + * acpi_exec support for namespace initialization file (initialize + * buffer_fields in this code.) + */ + namepath = + acpi_ns_get_external_pathname(op->common.node); + status = ae_lookup_init_file_entry(namepath, &obj_desc); + if (ACPI_SUCCESS(status)) { + status = + acpi_ex_write_data_to_field(obj_desc, + op->common. + node->object, + NULL); + if ACPI_FAILURE + (status) { + ACPI_EXCEPTION((AE_INFO, status, + "While writing to buffer field")); + } + } + ACPI_FREE(namepath); + status = AE_OK; +#endif break; case AML_TYPE_CREATE_OBJECT: diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 697974e37edf..27069325b6de 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -14,7 +14,6 @@ #include "acdispat.h" #include "acinterp.h" #include "acnamesp.h" - #ifdef ACPI_ASL_COMPILER #include "acdisasm.h" #endif @@ -399,7 +398,6 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) union acpi_parse_object *op; acpi_object_type object_type; acpi_status status = AE_OK; - #ifdef ACPI_ASL_COMPILER u8 param_count; #endif diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index b31457ca926c..edadbe146506 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -15,6 +15,9 @@ #include "acinterp.h" #include "acnamesp.h" #include "acevents.h" +#ifdef ACPI_EXEC_APP +#include "aecommon.h" +#endif #define _COMPONENT ACPI_DISPATCHER ACPI_MODULE_NAME("dswload2") @@ -373,6 +376,10 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) struct acpi_namespace_node *new_node; u32 i; u8 region_space; +#ifdef ACPI_EXEC_APP + union acpi_operand_object *obj_desc; + char *namepath; +#endif ACPI_FUNCTION_TRACE(ds_load2_end_op); @@ -466,6 +473,11 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) * be evaluated later during the execution phase */ status = acpi_ds_create_buffer_field(op, walk_state); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, + "CreateBufferField failure")); + goto cleanup; + } break; case AML_TYPE_NAMED_FIELD: @@ -604,6 +616,29 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) case AML_NAME_OP: status = acpi_ds_create_node(walk_state, node, op); + if (ACPI_FAILURE(status)) { + goto cleanup; + } +#ifdef ACPI_EXEC_APP + /* + * acpi_exec support for namespace initialization file (initialize + * Name opcodes in this code.) + */ + namepath = acpi_ns_get_external_pathname(node); + status = ae_lookup_init_file_entry(namepath, &obj_desc); + if (ACPI_SUCCESS(status)) { + + /* Detach any existing object, attach new object */ + + if (node->object) { + acpi_ns_detach_object(node); + } + acpi_ns_attach_object(node, obj_desc, + obj_desc->common.type); + } + ACPI_FREE(namepath); + status = AE_OK; +#endif break; case AML_METHOD_OP: diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c index d4d26147610e..d91153f65700 100644 --- a/drivers/acpi/acpica/nsnames.c +++ b/drivers/acpi/acpica/nsnames.c @@ -13,9 +13,6 @@ #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsnames") -/* Local Prototypes */ -static void acpi_ns_normalize_pathname(char *original_path); - /******************************************************************************* * * FUNCTION: acpi_ns_get_external_pathname @@ -30,7 +27,6 @@ static void acpi_ns_normalize_pathname(char *original_path); * for error and debug statements. * ******************************************************************************/ - char *acpi_ns_get_external_pathname(struct acpi_namespace_node *node) { char *name_buffer; @@ -411,7 +407,7 @@ cleanup: * ******************************************************************************/ -static void acpi_ns_normalize_pathname(char *original_path) +void acpi_ns_normalize_pathname(char *original_path) { char *input_path = original_path; char *new_path_buffer; diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c index befdd13b403b..177ab88d95de 100644 --- a/drivers/acpi/acpica/utdecode.c +++ b/drivers/acpi/acpica/utdecode.c @@ -78,7 +78,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = { "IPMI", /* 0x07 */ "GeneralPurposeIo", /* 0x08 */ "GenericSerialBus", /* 0x09 */ - "PlatformCommChannel" /* 0x0A */ + "PCC" /* 0x0A */ }; const char *acpi_ut_get_region_name(u8 space_id) diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c index eee263cb7beb..c365faf4e6cd 100644 --- a/drivers/acpi/acpica/utdelete.c +++ b/drivers/acpi/acpica/utdelete.c @@ -452,13 +452,13 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action) * * FUNCTION: acpi_ut_update_object_reference * - * PARAMETERS: object - Increment ref count for this object - * and all sub-objects + * PARAMETERS: object - Increment or decrement the ref count for + * this object and all sub-objects * action - Either REF_INCREMENT or REF_DECREMENT * * RETURN: Status * - * DESCRIPTION: Increment the object reference count + * DESCRIPTION: Increment or decrement the object reference count * * Object references are incremented when: * 1) An object is attached to a Node (namespace object) @@ -492,7 +492,7 @@ acpi_ut_update_object_reference(union acpi_operand_object *object, u16 action) } /* - * All sub-objects must have their reference count incremented + * All sub-objects must have their reference count updated * also. Different object types have different subobjects. */ switch (object->common.type) { @@ -559,6 +559,7 @@ acpi_ut_update_object_reference(union acpi_operand_object *object, u16 action) break; } } + next_object = NULL; break; diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c index a874dac7db5c..681c11f4af4e 100644 --- a/drivers/acpi/acpica/utprint.c +++ b/drivers/acpi/acpica/utprint.c @@ -332,7 +332,12 @@ int vsnprintf(char *string, acpi_size size, const char *format, va_list args) int i; pos = string; - end = string + size; + + if (size != ACPI_UINT32_MAX) { + end = string + size; + } else { + end = ACPI_CAST_PTR(char, ACPI_UINT32_MAX); + } for (; *format; ++format) { if (*format != '%') { diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a1a858ad4d18..8b2e89c20c11 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -438,13 +438,10 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data) * domain info. */ for_each_possible_cpu(i) { - pr = all_cpu_data[i]; - if (!pr) - continue; - if (cpumask_test_cpu(i, covered_cpus)) continue; + pr = all_cpu_data[i]; cpc_ptr = per_cpu(cpc_desc_ptr, i); if (!cpc_ptr) { retval = -EFAULT; @@ -495,44 +492,28 @@ int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data) cpumask_set_cpu(j, pr->shared_cpu_map); } - for_each_possible_cpu(j) { + for_each_cpu(j, pr->shared_cpu_map) { if (i == j) continue; match_pr = all_cpu_data[j]; - if (!match_pr) - continue; - - match_cpc_ptr = per_cpu(cpc_desc_ptr, j); - if (!match_cpc_ptr) { - retval = -EFAULT; - goto err_ret; - } - - match_pdomain = &(match_cpc_ptr->domain_info); - if (match_pdomain->domain != pdomain->domain) - continue; - match_pr->shared_type = pr->shared_type; cpumask_copy(match_pr->shared_cpu_map, pr->shared_cpu_map); } } + goto out; err_ret: for_each_possible_cpu(i) { pr = all_cpu_data[i]; - if (!pr) - continue; /* Assume no coordination on any error parsing domain info */ - if (retval) { - cpumask_clear(pr->shared_cpu_map); - cpumask_set_cpu(i, pr->shared_cpu_map); - pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; - } + cpumask_clear(pr->shared_cpu_map); + cpumask_set_cpu(i, pr->shared_cpu_map); + pr->shared_type = CPUFREQ_SHARED_TYPE_ALL; } - +out: free_cpumask_var(covered_cpus); return retval; } diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index b64c62bfcea5..b2263ec67b43 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1321,8 +1321,8 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) */ static const struct acpi_device_id special_pm_ids[] = { {"PNP0C0B", }, /* Generic ACPI fan */ - {"INT1044", }, /* Fan for Tiger Lake generation */ {"INT3404", }, /* Fan */ + {"INTC1044", }, /* Fan for Tiger Lake generation */ {} }; struct acpi_device *adev = ACPI_COMPANION(dev); diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c index 387f27ef3368..e4e8b75d39f0 100644 --- a/drivers/acpi/dptf/dptf_power.c +++ b/drivers/acpi/dptf/dptf_power.c @@ -97,8 +97,8 @@ static int dptf_power_remove(struct platform_device *pdev) } static const struct acpi_device_id int3407_device_ids[] = { - {"INT1047", 0}, {"INT3407", 0}, + {"INTC1047", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, int3407_device_ids); diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c index 1ec7b6900662..bc71a6a60334 100644 --- a/drivers/acpi/dptf/int340x_thermal.c +++ b/drivers/acpi/dptf/int340x_thermal.c @@ -13,10 +13,6 @@ #define INT3401_DEVICE 0X01 static const struct acpi_device_id int340x_thermal_device_ids[] = { - {"INT1040"}, - {"INT1043"}, - {"INT1044"}, - {"INT1047"}, {"INT3400"}, {"INT3401", INT3401_DEVICE}, {"INT3402"}, @@ -28,6 +24,10 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INT3409"}, {"INT340A"}, {"INT340B"}, + {"INTC1040"}, + {"INTC1043"}, + {"INTC1044"}, + {"INTC1047"}, {""}, }; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 532a1ae3595a..a0bd56ece3ff 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -897,13 +897,6 @@ static long __acpi_processor_get_throttling(void *data) return pr->throttling.acpi_processor_get_throttling(pr); } -static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct) -{ - if (direct || (is_percpu_thread() && cpu == smp_processor_id())) - return fn(arg); - return work_on_cpu(cpu, fn, arg); -} - static int acpi_processor_get_throttling(struct acpi_processor *pr) { if (!pr) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index bb1ae400ec1f..4edc8a3ce40f 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1009,6 +1009,10 @@ static bool acpi_s2idle_wake(void) if (acpi_any_fixed_event_status_set()) return true; + /* Check wakeups from drivers sharing the SCI. */ + if (acpi_check_wakeup_handlers()) + return true; + /* * If the status bit is set for any enabled GPE other than the * EC one, the wakeup is regarded as a genuine one. diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 41675d24a9bc..3d90480ce1b1 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -2,6 +2,7 @@ extern void acpi_enable_wakeup_devices(u8 sleep_state); extern void acpi_disable_wakeup_devices(u8 sleep_state); +extern bool acpi_check_wakeup_handlers(void); extern struct list_head acpi_wakeup_device_list; extern struct mutex acpi_device_lock; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 180ac4329763..0e905c3d1645 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -501,7 +501,7 @@ static const char * const table_sigs[] = { ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, - NULL }; + ACPI_SIG_NHLT, NULL }; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 419f814d596a..b4994e50608d 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -352,6 +352,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + { + .callback = video_detect_force_native, + .ident = "Acer Aspire 5738z", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5738"), + DMI_MATCH(DMI_BOARD_NAME, "JV50"), + }, + }, /* * Desktops which falsely report a backlight and which our heuristics diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c index c28244df56a5..0b2e42530adf 100644 --- a/drivers/acpi/wakeup.c +++ b/drivers/acpi/wakeup.c @@ -12,6 +12,15 @@ #include "internal.h" #include "sleep.h" +struct acpi_wakeup_handler { + struct list_head list_node; + bool (*wakeup)(void *context); + void *context; +}; + +static LIST_HEAD(acpi_wakeup_handler_head); +static DEFINE_MUTEX(acpi_wakeup_handler_mutex); + /* * We didn't lock acpi_device_lock in the file, because it invokes oops in * suspend/resume and isn't really required as this is called in S-state. At @@ -90,3 +99,75 @@ int __init acpi_wakeup_device_init(void) mutex_unlock(&acpi_device_lock); return 0; } + +/** + * acpi_register_wakeup_handler - Register wakeup handler + * @wake_irq: The IRQ through which the device may receive wakeups + * @wakeup: Wakeup-handler to call when the SCI has triggered a wakeup + * @context: Context to pass to the handler when calling it + * + * Drivers which may share an IRQ with the SCI can use this to register + * a handler which returns true when the device they are managing wants + * to trigger a wakeup. + */ +int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), + void *context) +{ + struct acpi_wakeup_handler *handler; + + /* + * If the device is not sharing its IRQ with the SCI, there is no + * need to register the handler. + */ + if (!acpi_sci_irq_valid() || wake_irq != acpi_sci_irq) + return 0; + + handler = kmalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + handler->wakeup = wakeup; + handler->context = context; + + mutex_lock(&acpi_wakeup_handler_mutex); + list_add(&handler->list_node, &acpi_wakeup_handler_head); + mutex_unlock(&acpi_wakeup_handler_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_register_wakeup_handler); + +/** + * acpi_unregister_wakeup_handler - Unregister wakeup handler + * @wakeup: Wakeup-handler passed to acpi_register_wakeup_handler() + * @context: Context passed to acpi_register_wakeup_handler() + */ +void acpi_unregister_wakeup_handler(bool (*wakeup)(void *context), + void *context) +{ + struct acpi_wakeup_handler *handler; + + mutex_lock(&acpi_wakeup_handler_mutex); + list_for_each_entry(handler, &acpi_wakeup_handler_head, list_node) { + if (handler->wakeup == wakeup && handler->context == context) { + list_del(&handler->list_node); + kfree(handler); + break; + } + } + mutex_unlock(&acpi_wakeup_handler_mutex); +} +EXPORT_SYMBOL_GPL(acpi_unregister_wakeup_handler); + +bool acpi_check_wakeup_handlers(void) +{ + struct acpi_wakeup_handler *handler; + + /* No need to lock, nothing else is running when we're called. */ + list_for_each_entry(handler, &acpi_wakeup_handler_head, list_node) { + if (handler->wakeup(handler->context)) + return true; + } + + return false; +} diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 6d1dee7051eb..fdd508a78ffd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1922,10 +1922,6 @@ static int device_prepare(struct device *dev, pm_message_t state) if (dev->power.syscore) return 0; - WARN_ON(!pm_runtime_enabled(dev) && - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND | - DPM_FLAG_LEAVE_SUSPENDED)); - /* * If a device's parent goes into runtime suspend at the wrong time, * it won't be possible to resume the device. To prevent this we @@ -1973,8 +1969,7 @@ unlock: */ spin_lock_irq(&dev->power.lock); dev->power.direct_complete = state.event == PM_EVENT_SUSPEND && - ((pm_runtime_suspended(dev) && ret > 0) || - dev->power.no_pm_callbacks) && + (ret > 0 || dev->power.no_pm_callbacks) && !dev_pm_test_driver_flags(dev, DPM_FLAG_NEVER_SKIP); spin_unlock_irq(&dev->power.lock); return 0; diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index bff5295016ae..c3e6bd59e920 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -37,10 +37,12 @@ config CPU_FREQ_STAT choice prompt "Default CPUFreq governor" default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || ARM_SA1110_CPUFREQ + default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if BIG_LITTLE + default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP default CPU_FREQ_DEFAULT_GOV_PERFORMANCE help This option sets which CPUFreq governor shall be loaded at - startup. If in doubt, select 'performance'. + startup. If in doubt, use the default setting. config CPU_FREQ_DEFAULT_GOV_PERFORMANCE bool "performance" diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 62502d0e4c33..bc58a0809d11 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -8,6 +8,8 @@ config X86_INTEL_PSTATE depends on X86 select ACPI_PROCESSOR if ACPI select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO + select CPU_FREQ_GOV_PERFORMANCE + select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c index 73658b71d4a3..cd1769ecdc1c 100644 --- a/drivers/crypto/chelsio/chcr_ktls.c +++ b/drivers/crypto/chelsio/chcr_ktls.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 Chelsio Communications. All rights reserved. */ #ifdef CONFIG_CHELSIO_TLS_DEVICE +#include <linux/highmem.h> #include "chcr_ktls.h" #include "clip_tbl.h" diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c index acf874800ca4..b0411a1827a3 100644 --- a/drivers/ide/ide-scan-pci.c +++ b/drivers/ide/ide-scan-pci.c @@ -89,8 +89,7 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) static int __init ide_scan_pcibus(void) { struct pci_dev *dev = NULL; - struct pci_driver *d; - struct list_head *l, *n; + struct pci_driver *d, *tmp; pre_init = 0; for_each_pci_dev(dev) @@ -101,9 +100,8 @@ static int __init ide_scan_pcibus(void) * are post init. */ - list_for_each_safe(l, n, &ide_pci_drivers) { - list_del(l); - d = list_entry(l, struct pci_driver, node); + list_for_each_entry_safe(d, tmp, &ide_pci_drivers, node) { + list_del(&d->node); if (__pci_register_driver(d, d->driver.owner, d->driver.mod_name)) printk(KERN_ERR "%s: failed to register %s driver\n", diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 086dfb1b9d0b..91cdc0a2b1a7 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -148,7 +148,7 @@ static void slc_bump(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; - cf.can_id = 0; + memset(&cf, 0, sizeof(cf)); switch (*cmd) { case 'r': @@ -187,8 +187,6 @@ static void slc_bump(struct slcan *sl) else return; - *(u64 *) (&cf.data) = 0; /* clear payload */ - /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf.can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf.can_dlc; i++) { diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index affa5c6e135c..c7ac63f41918 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -480,7 +480,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) priv->slave_mii_bus->parent = ds->dev->parent; priv->slave_mii_bus->phy_mask = ~priv->indir_phy_mask; - err = of_mdiobus_register(priv->slave_mii_bus, dn); + err = mdiobus_register(priv->slave_mii_bus); if (err && dn) of_node_put(dn); @@ -1079,6 +1079,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const struct bcm_sf2_of_data *data; struct b53_platform_data *pdata; struct dsa_switch_ops *ops; + struct device_node *ports; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -1146,7 +1147,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) set_bit(0, priv->cfp.used); set_bit(0, priv->cfp.unique); - bcm_sf2_identify_ports(priv, dn->child); + ports = of_find_node_by_name(dn, "ports"); + if (ports) { + bcm_sf2_identify_ports(priv, ports); + of_node_put(ports); + } priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index ef57552db260..2d0d91db0ddb 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1403,6 +1403,9 @@ mt7530_setup(struct dsa_switch *ds) continue; phy_node = of_parse_phandle(mac_np, "phy-handle", 0); + if (!phy_node) + continue; + if (phy_node->parent == priv->dev->of_node->parent) { ret = of_get_phy_mode(mac_np, &interface); if (ret && ret != -ENODEV) diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c index 97901c114bfa..fbe9d88b13c7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c @@ -491,7 +491,7 @@ get_ingress_preclass_record(struct aq_hw_s *hw, rec->snap[1] = packed_record[8] & 0xFF; rec->llc = (packed_record[8] >> 8) & 0xFF; - rec->llc = packed_record[9] << 8; + rec->llc |= packed_record[9] << 8; rec->mac_sa[0] = packed_record[10]; rec->mac_sa[0] |= packed_record[11] << 16; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 9638d65d8261..517caedc0a87 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6874,7 +6874,8 @@ int bnx2x_link_update(struct link_params *params, struct link_vars *vars) case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY: /* In this option, the first PHY makes sure to pass the * traffic through itself only. - * Its not clear how to reset the link on the second phy + * It's not clear how to reset the link on the second + * phy. */ active_external_phy = EXT_PHY1; break; diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.h b/drivers/net/ethernet/cavium/common/cavium_ptp.h index a04eccbc78e8..1e0ffe8f4152 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.h +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.h @@ -24,7 +24,7 @@ struct cavium_ptp { struct ptp_clock *ptp_clock; }; -#if IS_ENABLED(CONFIG_CAVIUM_PTP) +#if IS_REACHABLE(CONFIG_CAVIUM_PTP) struct cavium_ptp *cavium_ptp_get(void); void cavium_ptp_put(struct cavium_ptp *ptp); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 75fde0d4d493..a70018f067aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3132,7 +3132,6 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p) return ret; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - pi->xact_addr_filt = ret; return 0; } @@ -6672,6 +6671,10 @@ static void shutdown_one(struct pci_dev *pdev) if (adapter->port[i]->reg_state == NETREG_REGISTERED) cxgb_close(adapter->port[i]); + rtnl_lock(); + cxgb4_mqprio_stop_offload(adapter); + rtnl_unlock(); + if (is_uld(adapter)) { detach_ulds(adapter); t4_uld_clean_up(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c index ec3eb45ee3b4..e6af4906d674 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c @@ -301,6 +301,7 @@ static void cxgb4_mqprio_free_hw_resources(struct net_device *dev) cxgb4_clear_msix_aff(eorxq->msix->vec, eorxq->msix->aff_mask); free_irq(eorxq->msix->vec, &eorxq->rspq); + cxgb4_free_msix_idx_in_bmap(adap, eorxq->msix->idx); } free_rspq_fl(adap, &eorxq->rspq, &eorxq->fl); @@ -611,6 +612,28 @@ out: return ret; } +void cxgb4_mqprio_stop_offload(struct adapter *adap) +{ + struct cxgb4_tc_port_mqprio *tc_port_mqprio; + struct net_device *dev; + u8 i; + + if (!adap->tc_mqprio || !adap->tc_mqprio->port_mqprio) + return; + + for_each_port(adap, i) { + dev = adap->port[i]; + if (!dev) + continue; + + tc_port_mqprio = &adap->tc_mqprio->port_mqprio[i]; + if (!tc_port_mqprio->mqprio.qopt.num_tc) + continue; + + cxgb4_mqprio_disable_offload(dev); + } +} + int cxgb4_init_tc_mqprio(struct adapter *adap) { struct cxgb4_tc_port_mqprio *tc_port_mqprio, *port_mqprio; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h index c532f1ef8451..ff8794132b22 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h @@ -38,6 +38,7 @@ struct cxgb4_tc_mqprio { int cxgb4_setup_tc_mqprio(struct net_device *dev, struct tc_mqprio_qopt_offload *mqprio); +void cxgb4_mqprio_stop_offload(struct adapter *adap); int cxgb4_init_tc_mqprio(struct adapter *adap); void cxgb4_cleanup_tc_mqprio(struct adapter *adap); #endif /* __CXGB4_TC_MQPRIO_H__ */ diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 835b7816e372..87236206366f 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1731,7 +1731,7 @@ static int ftgmac100_setup_clk(struct ftgmac100 *priv) if (rc) goto cleanup_clk; - /* RCLK is for RMII, typically used for NCSI. Optional because its not + /* RCLK is for RMII, typically used for NCSI. Optional because it's not * necessary if it's the AST2400 MAC, or the MAC is configured for * RGMII, or the controller is not an ASPEED-based controller. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 2f76908cae73..51117a5a6bbf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -150,14 +150,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, u8 prio = act->vlan.prio; u16 vid = act->vlan.vid; - return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - act->id, vid, - proto, prio, extack); + err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + act->id, vid, + proto, prio, extack); + if (err) + return err; + break; } case FLOW_ACTION_PRIORITY: - return mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, - act->priority, - extack); + err = mlxsw_sp_acl_rulei_act_priority(mlxsw_sp, rulei, + act->priority, + extack); + if (err) + return err; + break; case FLOW_ACTION_MANGLE: { enum flow_action_mangle_base htype = act->mangle.htype; __be32 be_mask = (__force __be32) act->mangle.mask; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 9096ffd89e50..fbf714d027d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -643,7 +643,7 @@ static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size, { int bs = fls64(burst) - 1; - if (burst != (1 << bs)) { + if (burst != (BIT_ULL(bs))) { NL_SET_ERR_MSG_MOD(extack, "Policer burst size is not power of two"); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 1a5fc2ae351c..29810a1aa210 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -369,8 +369,8 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; u8 abs_vport_id = 0; - int rc = -EINVAL; u16 rx_mode = 0; + int rc; rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); if (rc) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 1305522f72d6..40efe60eff8d 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -282,7 +282,6 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], { struct rmnet_priv *priv = netdev_priv(dev); struct net_device *real_dev; - struct rmnet_endpoint *ep; struct rmnet_port *port; u16 mux_id; @@ -297,19 +296,27 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_RMNET_MUX_ID]) { mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); - if (rmnet_get_endpoint(port, mux_id)) { - NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists"); - return -EINVAL; - } - ep = rmnet_get_endpoint(port, priv->mux_id); - if (!ep) - return -ENODEV; - hlist_del_init_rcu(&ep->hlnode); - hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); + if (mux_id != priv->mux_id) { + struct rmnet_endpoint *ep; + + ep = rmnet_get_endpoint(port, priv->mux_id); + if (!ep) + return -ENODEV; - ep->mux_id = mux_id; - priv->mux_id = mux_id; + if (rmnet_get_endpoint(port, mux_id)) { + NL_SET_ERR_MSG_MOD(extack, + "MUX ID already exists"); + return -EINVAL; + } + + hlist_del_init_rcu(&ep->hlnode); + hlist_add_head_rcu(&ep->hlnode, + &port->muxed_ep[mux_id]); + + ep->mux_id = mux_id; + priv->mux_id = mux_id; + } } if (data[IFLA_RMNET_FLAGS]) { diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 55cb5730beb6..bf5bf05970a2 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5441,9 +5441,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT); - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -5460,26 +5459,26 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disallow toggling */ dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (rtl_chip_supports_csum_v2(tp)) + dev->hw_features |= NETIF_F_IPV6_CSUM; + + dev->features |= dev->hw_features; + + /* There has been a number of reports that using SG/TSO results in + * tx timeouts. However for a lot of people SG/TSO works fine. + * Therefore disable both features by default, but allow users to + * enable them. Use at own risk! + */ if (rtl_chip_supports_csum_v2(tp)) { - dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + dev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6; dev->gso_max_size = RTL_GSO_MAX_SIZE_V2; dev->gso_max_segs = RTL_GSO_MAX_SEGS_V2; } else { + dev->hw_features |= NETIF_F_SG | NETIF_F_TSO; dev->gso_max_size = RTL_GSO_MAX_SIZE_V1; dev->gso_max_segs = RTL_GSO_MAX_SEGS_V1; } - /* RTL8168e-vl and one RTL8168c variant are known to have a - * HW issue with TSO. - */ - if (tp->mac_version == RTL_GIGA_MAC_VER_34 || - tp->mac_version == RTL_GIGA_MAC_VER_22) { - dev->vlan_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG); - dev->hw_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG); - } - - dev->features |= dev->hw_features; - dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 542784300620..efc6ec1b8027 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -207,7 +207,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, reg++; } - while (reg <= perfect_addr_number) { + while (reg < perfect_addr_number) { writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); writel(0, ioaddr + GMAC_ADDR_LOW(reg)); reg++; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 0e4575f7bedb..ad4df9bddcf3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -577,8 +577,13 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, value |= XGMAC_VLAN_EDVLP; value |= XGMAC_VLAN_ESVL; value |= XGMAC_VLAN_DOVLTC; + } else { + value &= ~XGMAC_VLAN_EDVLP; + value &= ~XGMAC_VLAN_ESVL; + value &= ~XGMAC_VLAN_DOVLTC; } + value &= ~XGMAC_VLAN_VID; writel(value, ioaddr + XGMAC_VLAN_TAG); } else if (perfect_match) { u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); @@ -589,13 +594,19 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, value = readl(ioaddr + XGMAC_VLAN_TAG); + value &= ~XGMAC_VLAN_VTHM; value |= XGMAC_VLAN_ETV; if (is_double) { value |= XGMAC_VLAN_EDVLP; value |= XGMAC_VLAN_ESVL; value |= XGMAC_VLAN_DOVLTC; + } else { + value &= ~XGMAC_VLAN_EDVLP; + value &= ~XGMAC_VLAN_ESVL; + value &= ~XGMAC_VLAN_DOVLTC; } + value &= ~XGMAC_VLAN_VID; writel(value | perfect_match, ioaddr + XGMAC_VLAN_TAG); } else { u32 value = readl(ioaddr + XGMAC_PACKET_FILTER); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2fb671e61ee8..e6898fd5223f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4566,9 +4566,13 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid return ret; } - ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); + if (priv->hw->num_vlan) { + ret = stmmac_add_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); + if (ret) + return ret; + } - return ret; + return 0; } static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) @@ -4581,9 +4585,12 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi is_double = true; clear_bit(vid, priv->active_vlans); - ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); - if (ret) - return ret; + + if (priv->hw->num_vlan) { + ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); + if (ret) + return ret; + } return stmmac_vlan_update(priv, is_double); } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index da82d7f16a09..0d580d81d910 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2594,6 +2594,9 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(dev); macsec = macsec_priv(dev); + if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) + return -EINVAL; + offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload == offload) return 0; diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 481cf48c9b9e..31f731e6df72 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -425,8 +425,8 @@ static int at803x_parse_dt(struct phy_device *phydev) */ if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) || at803x_match_phy_id(phydev, ATH8035_PHY_ID)) { - priv->clk_25m_reg &= ~AT8035_CLK_OUT_MASK; - priv->clk_25m_mask &= ~AT8035_CLK_OUT_MASK; + priv->clk_25m_reg &= AT8035_CLK_OUT_MASK; + priv->clk_25m_mask &= AT8035_CLK_OUT_MASK; } } diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2ec19e5540bf..05d20343b816 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -25,6 +25,7 @@ #include <linux/micrel_phy.h> #include <linux/of.h> #include <linux/clk.h> +#include <linux/delay.h> /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -952,6 +953,12 @@ static int kszphy_resume(struct phy_device *phydev) genphy_resume(phydev); + /* After switching from power-down to normal mode, an internal global + * reset is automatically generated. Wait a minimum of 1 ms before + * read/write access to the PHY registers. + */ + usleep_range(1000, 2000); + ret = kszphy_config_reset(phydev); if (ret) return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 228fe449dc6d..07476c6510f2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1678,8 +1678,12 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, alloc_frag->offset += buflen; } err = tun_xdp_act(tun, xdp_prog, &xdp, act); - if (err < 0) - goto err_xdp; + if (err < 0) { + if (act == XDP_REDIRECT || act == XDP_TX) + put_page(alloc_frag->page); + goto out; + } + if (err == XDP_REDIRECT) xdp_do_flush(); if (err != XDP_PASS) @@ -1693,8 +1697,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); -err_xdp: - put_page(alloc_frag->page); out: rcu_read_unlock(); local_bh_enable(); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 8783e2ab3ec0..0ef7e1f443e3 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -54,6 +54,7 @@ static const char driver_name[] = "pegasus"; #undef PEGASUS_WRITE_EEPROM #define BMSR_MEDIA (BMSR_10HALF | BMSR_10FULL | BMSR_100HALF | \ BMSR_100FULL | BMSR_ANEGCAPABLE) +#define CARRIER_CHECK_DELAY (2 * HZ) static bool loopback; static bool mii_mode; @@ -1089,17 +1090,12 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 2); } - -static int pegasus_count; -static struct workqueue_struct *pegasus_workqueue; -#define CARRIER_CHECK_DELAY (2 * HZ) - static void check_carrier(struct work_struct *work) { pegasus_t *pegasus = container_of(work, pegasus_t, carrier_check.work); set_carrier(pegasus->net); if (!(pegasus->flags & PEGASUS_UNPLUG)) { - queue_delayed_work(pegasus_workqueue, &pegasus->carrier_check, + queue_delayed_work(system_long_wq, &pegasus->carrier_check, CARRIER_CHECK_DELAY); } } @@ -1120,18 +1116,6 @@ static int pegasus_blacklisted(struct usb_device *udev) return 0; } -/* we rely on probe() and remove() being serialized so we - * don't need extra locking on pegasus_count. - */ -static void pegasus_dec_workqueue(void) -{ - pegasus_count--; - if (pegasus_count == 0) { - destroy_workqueue(pegasus_workqueue); - pegasus_workqueue = NULL; - } -} - static int pegasus_probe(struct usb_interface *intf, const struct usb_device_id *id) { @@ -1144,14 +1128,6 @@ static int pegasus_probe(struct usb_interface *intf, if (pegasus_blacklisted(dev)) return -ENODEV; - if (pegasus_count == 0) { - pegasus_workqueue = alloc_workqueue("pegasus", WQ_MEM_RECLAIM, - 0); - if (!pegasus_workqueue) - return -ENOMEM; - } - pegasus_count++; - net = alloc_etherdev(sizeof(struct pegasus)); if (!net) goto out; @@ -1209,7 +1185,7 @@ static int pegasus_probe(struct usb_interface *intf, res = register_netdev(net); if (res) goto out3; - queue_delayed_work(pegasus_workqueue, &pegasus->carrier_check, + queue_delayed_work(system_long_wq, &pegasus->carrier_check, CARRIER_CHECK_DELAY); dev_info(&intf->dev, "%s, %s, %pM\n", net->name, usb_dev_id[dev_index].name, net->dev_addr); @@ -1222,7 +1198,6 @@ out2: out1: free_netdev(net); out: - pegasus_dec_workqueue(); return res; } @@ -1237,7 +1212,7 @@ static void pegasus_disconnect(struct usb_interface *intf) } pegasus->flags |= PEGASUS_UNPLUG; - cancel_delayed_work(&pegasus->carrier_check); + cancel_delayed_work_sync(&pegasus->carrier_check); unregister_netdev(pegasus->net); unlink_all_urbs(pegasus); free_all_urbs(pegasus); @@ -1246,7 +1221,6 @@ static void pegasus_disconnect(struct usb_interface *intf) pegasus->rx_skb = NULL; } free_netdev(pegasus->net); - pegasus_dec_workqueue(); } static int pegasus_suspend(struct usb_interface *intf, pm_message_t message) @@ -1254,7 +1228,7 @@ static int pegasus_suspend(struct usb_interface *intf, pm_message_t message) struct pegasus *pegasus = usb_get_intfdata(intf); netif_device_detach(pegasus->net); - cancel_delayed_work(&pegasus->carrier_check); + cancel_delayed_work_sync(&pegasus->carrier_check); if (netif_running(pegasus->net)) { usb_kill_urb(pegasus->rx_urb); usb_kill_urb(pegasus->intr_urb); @@ -1276,7 +1250,7 @@ static int pegasus_resume(struct usb_interface *intf) pegasus->intr_urb->actual_length = 0; intr_callback(pegasus->intr_urb); } - queue_delayed_work(pegasus_workqueue, &pegasus->carrier_check, + queue_delayed_work(system_long_wq, &pegasus->carrier_check, CARRIER_CHECK_DELAY); return 0; } diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index f66c0f8f6f4a..ecb3fccca603 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -740,9 +740,6 @@ EXPORT_SYMBOL_GPL(i2400m_error_recovery); static int i2400m_bm_buf_alloc(struct i2400m *i2400m) { - int result; - - result = -ENOMEM; i2400m->bm_cmd_buf = kzalloc(I2400M_BM_CMD_BUF_SIZE, GFP_KERNEL); if (i2400m->bm_cmd_buf == NULL) goto error_bm_cmd_kzalloc; @@ -754,7 +751,7 @@ int i2400m_bm_buf_alloc(struct i2400m *i2400m) error_bm_ack_buf_kzalloc: kfree(i2400m->bm_cmd_buf); error_bm_cmd_kzalloc: - return result; + return -ENOMEM; } @@ -843,7 +840,7 @@ EXPORT_SYMBOL_GPL(i2400m_reset); */ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags) { - int result = -ENODEV; + int result; struct device *dev = i2400m_dev(i2400m); struct wimax_dev *wimax_dev = &i2400m->wimax_dev; struct net_device *net_dev = i2400m->wimax_dev.net_dev; diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 33c9b6ea7364..fb9b17fa0fb5 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -40,7 +40,7 @@ struct cis_cache_entry { unsigned int addr; unsigned int len; unsigned int attr; - unsigned char cache[0]; + unsigned char cache[]; }; struct pccard_resource_ops { diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c index 0a04eb04f3a2..d3ef5534991e 100644 --- a/drivers/pcmcia/omap_cf.c +++ b/drivers/pcmcia/omap_cf.c @@ -329,7 +329,7 @@ static int __exit omap_cf_remove(struct platform_device *pdev) static struct platform_driver omap_cf_driver = { .driver = { - .name = (char *) driver_name, + .name = driver_name, }, .remove = __exit_p(omap_cf_remove), }; diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index 9e6922c08ef6..3b05760e69d6 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -1076,7 +1076,7 @@ static ssize_t show_io_db(struct device *dev, for (p = data->io_db.next; p != &data->io_db; p = p->next) { if (ret > (PAGE_SIZE - 10)) continue; - ret += snprintf(&buf[ret], (PAGE_SIZE - ret - 1), + ret += scnprintf(&buf[ret], (PAGE_SIZE - ret - 1), "0x%08lx - 0x%08lx\n", ((unsigned long) p->base), ((unsigned long) p->base + p->num - 1)); @@ -1133,7 +1133,7 @@ static ssize_t show_mem_db(struct device *dev, p = p->next) { if (ret > (PAGE_SIZE - 10)) continue; - ret += snprintf(&buf[ret], (PAGE_SIZE - ret - 1), + ret += scnprintf(&buf[ret], (PAGE_SIZE - ret - 1), "0x%08lx - 0x%08lx\n", ((unsigned long) p->base), ((unsigned long) p->base + p->num - 1)); @@ -1142,7 +1142,7 @@ static ssize_t show_mem_db(struct device *dev, for (p = data->mem_db.next; p != &data->mem_db; p = p->next) { if (ret > (PAGE_SIZE - 10)) continue; - ret += snprintf(&buf[ret], (PAGE_SIZE - ret - 1), + ret += scnprintf(&buf[ret], (PAGE_SIZE - ret - 1), "0x%08lx - 0x%08lx\n", ((unsigned long) p->base), ((unsigned long) p->base + p->num - 1)); diff --git a/drivers/pcmcia/sa1100_simpad.c b/drivers/pcmcia/sa1100_simpad.c index e2e8729afd9d..784ada5b8c4f 100644 --- a/drivers/pcmcia/sa1100_simpad.c +++ b/drivers/pcmcia/sa1100_simpad.c @@ -14,7 +14,7 @@ #include <asm/mach-types.h> #include <mach/simpad.h> #include "sa1100_generic.h" - + static int simpad_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { @@ -66,7 +66,7 @@ simpad_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, simpad_clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1); break; - case 33: + case 33: simpad_clear_cs3_bit(VCC_3V_EN|EN1); simpad_set_cs3_bit(VCC_5V_EN|EN0); break; @@ -95,7 +95,7 @@ static void simpad_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) simpad_set_cs3_bit(PCMCIA_RESET); } -static struct pcmcia_low_level simpad_pcmcia_ops = { +static struct pcmcia_low_level simpad_pcmcia_ops = { .owner = THIS_MODULE, .hw_init = simpad_pcmcia_hw_init, .hw_shutdown = simpad_pcmcia_hw_shutdown, diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index b7f993f1bbd0..222e81c79365 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -88,7 +88,7 @@ struct soc_pcmcia_socket { struct skt_dev_info { int nskt; - struct soc_pcmcia_socket skt[0]; + struct soc_pcmcia_socket skt[]; }; struct pcmcia_state { diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 49b1c6a1bdbe..bf6529b0b5b0 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -180,12 +180,12 @@ static ssize_t show_yenta_registers(struct device *yentadev, struct device_attri for (i = 0; i < 0x24; i += 4) { unsigned val; if (!(i & 15)) - offset += snprintf(buf + offset, PAGE_SIZE - offset, "\n%02x:", i); + offset += scnprintf(buf + offset, PAGE_SIZE - offset, "\n%02x:", i); val = cb_readl(socket, i); - offset += snprintf(buf + offset, PAGE_SIZE - offset, " %08x", val); + offset += scnprintf(buf + offset, PAGE_SIZE - offset, " %08x", val); } - offset += snprintf(buf + offset, PAGE_SIZE - offset, "\n\nExCA registers:"); + offset += scnprintf(buf + offset, PAGE_SIZE - offset, "\n\nExCA registers:"); for (i = 0; i < 0x45; i++) { unsigned char val; if (!(i & 7)) { @@ -193,10 +193,10 @@ static ssize_t show_yenta_registers(struct device *yentadev, struct device_attri memcpy(buf + offset, " -", 2); offset += 2; } else - offset += snprintf(buf + offset, PAGE_SIZE - offset, "\n%02x:", i); + offset += scnprintf(buf + offset, PAGE_SIZE - offset, "\n%02x:", i); } val = exca_readb(socket, i); - offset += snprintf(buf + offset, PAGE_SIZE - offset, " %02x", val); + offset += scnprintf(buf + offset, PAGE_SIZE - offset, " %02x", val); } buf[offset++] = '\n'; return offset; diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 5f4f5716c893..cc7dd4d87cce 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -19,8 +19,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); static const struct acpi_device_id intel_hid_ids[] = { - {"INT1051", 0}, {"INT33D5", 0}, + {"INTC1051", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c index 7b23efc46a43..289c6655d425 100644 --- a/drivers/platform/x86/intel_int0002_vgpio.c +++ b/drivers/platform/x86/intel_int0002_vgpio.c @@ -127,6 +127,14 @@ static irqreturn_t int0002_irq(int irq, void *data) return IRQ_HANDLED; } +static bool int0002_check_wake(void *data) +{ + u32 gpe_sts_reg; + + gpe_sts_reg = inl(GPE0A_STS_PORT); + return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT); +} + static struct irq_chip int0002_byt_irqchip = { .name = DRV_NAME, .irq_ack = int0002_irq_ack, @@ -220,6 +228,7 @@ static int int0002_probe(struct platform_device *pdev) return ret; } + acpi_register_wakeup_handler(irq, int0002_check_wake, NULL); device_init_wakeup(dev, true); return 0; } @@ -227,6 +236,7 @@ static int int0002_probe(struct platform_device *pdev) static int int0002_remove(struct platform_device *pdev) { device_init_wakeup(&pdev->dev, false); + acpi_unregister_wakeup_handler(int0002_check_wake, NULL); return 0; } diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 513efe8e7628..890380302080 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -248,7 +248,7 @@ config SYSCON_REBOOT_MODE action according to the mode. config POWER_RESET_SC27XX - bool "Spreadtrum SC27xx PMIC power-off driver" + tristate "Spreadtrum SC27xx PMIC power-off driver" depends on MFD_SC27XX_PMIC || COMPILE_TEST help This driver supports powering off a system through diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c index d94e3267c3b6..3ff9d93a5226 100644 --- a/drivers/power/reset/at91-reset.c +++ b/drivers/power/reset/at91-reset.c @@ -35,6 +35,7 @@ #define AT91_RSTC_MR 0x08 /* Reset Controller Mode Register */ #define AT91_RSTC_URSTEN BIT(0) /* User Reset Enable */ +#define AT91_RSTC_URSTASYNC BIT(2) /* User Reset Asynchronous Control */ #define AT91_RSTC_URSTIEN BIT(4) /* User Reset Interrupt Enable */ #define AT91_RSTC_ERSTL GENMASK(11, 8) /* External Reset Length */ @@ -49,105 +50,63 @@ enum reset_type { RESET_TYPE_ULP2 = 8, }; -static void __iomem *at91_ramc_base[2], *at91_rstc_base; -static struct clk *sclk; +struct at91_reset { + void __iomem *rstc_base; + void __iomem *ramc_base[2]; + struct clk *sclk; + struct notifier_block nb; + u32 args; + u32 ramc_lpr; +}; /* * unless the SDRAM is cleanly shutdown before we hit the * reset register it can be left driving the data bus and * killing the chance of a subsequent boot from NAND */ -static int at91sam9260_restart(struct notifier_block *this, unsigned long mode, - void *cmd) +static int at91_reset(struct notifier_block *this, unsigned long mode, + void *cmd) { - asm volatile( - /* Align to cache lines */ - ".balign 32\n\t" - - /* Disable SDRAM accesses */ - "str %2, [%0, #" __stringify(AT91_SDRAMC_TR) "]\n\t" - - /* Power down SDRAM */ - "str %3, [%0, #" __stringify(AT91_SDRAMC_LPR) "]\n\t" - - /* Reset CPU */ - "str %4, [%1, #" __stringify(AT91_RSTC_CR) "]\n\t" - - "b .\n\t" - : - : "r" (at91_ramc_base[0]), - "r" (at91_rstc_base), - "r" (1), - "r" cpu_to_le32(AT91_SDRAMC_LPCB_POWER_DOWN), - "r" cpu_to_le32(AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST)); + struct at91_reset *reset = container_of(this, struct at91_reset, nb); - return NOTIFY_DONE; -} - -static int at91sam9g45_restart(struct notifier_block *this, unsigned long mode, - void *cmd) -{ asm volatile( - /* - * Test wether we have a second RAM controller to care - * about. - * - * First, test that we can dereference the virtual address. - */ - "cmp %1, #0\n\t" - "beq 1f\n\t" - - /* Then, test that the RAM controller is enabled */ - "ldr r0, [%1]\n\t" - "cmp r0, #0\n\t" - /* Align to cache lines */ ".balign 32\n\t" /* Disable SDRAM0 accesses */ - "1: str %3, [%0, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t" + " tst %0, #0\n\t" + " beq 1f\n\t" + " str %3, [%0, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t" /* Power down SDRAM0 */ - " str %4, [%0, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + " str %4, [%0, %6]\n\t" /* Disable SDRAM1 accesses */ + "1: tst %1, #0\n\t" + " beq 2f\n\t" " strne %3, [%1, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t" /* Power down SDRAM1 */ - " strne %4, [%1, #" __stringify(AT91_DDRSDRC_LPR) "]\n\t" + " strne %4, [%1, %6]\n\t" /* Reset CPU */ - " str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" + "2: str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" " b .\n\t" : - : "r" (at91_ramc_base[0]), - "r" (at91_ramc_base[1]), - "r" (at91_rstc_base), + : "r" (reset->ramc_base[0]), + "r" (reset->ramc_base[1]), + "r" (reset->rstc_base), "r" (1), "r" cpu_to_le32(AT91_DDRSDRC_LPCB_POWER_DOWN), - "r" cpu_to_le32(AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST) - : "r0"); - - return NOTIFY_DONE; -} - -static int sama5d3_restart(struct notifier_block *this, unsigned long mode, - void *cmd) -{ - writel(AT91_RSTC_KEY | AT91_RSTC_PERRST | AT91_RSTC_PROCRST, - at91_rstc_base); + "r" (reset->args), + "r" (reset->ramc_lpr) + : "r4"); return NOTIFY_DONE; } -static int samx7_restart(struct notifier_block *this, unsigned long mode, - void *cmd) -{ - writel(AT91_RSTC_KEY | AT91_RSTC_PROCRST, at91_rstc_base); - return NOTIFY_DONE; -} - -static void __init at91_reset_status(struct platform_device *pdev) +static void __init at91_reset_status(struct platform_device *pdev, + void __iomem *base) { const char *reason; - u32 reg = readl(at91_rstc_base + AT91_RSTC_SR); + u32 reg = readl(base + AT91_RSTC_SR); switch ((reg & AT91_RSTC_RSTTYP) >> 8) { case RESET_TYPE_GENERAL: @@ -183,42 +142,68 @@ static void __init at91_reset_status(struct platform_device *pdev) } static const struct of_device_id at91_ramc_of_match[] = { - { .compatible = "atmel,at91sam9260-sdramc", }, - { .compatible = "atmel,at91sam9g45-ddramc", }, + { + .compatible = "atmel,at91sam9260-sdramc", + .data = (void *)AT91_SDRAMC_LPR, + }, + { + .compatible = "atmel,at91sam9g45-ddramc", + .data = (void *)AT91_DDRSDRC_LPR, + }, { /* sentinel */ } }; static const struct of_device_id at91_reset_of_match[] = { - { .compatible = "atmel,at91sam9260-rstc", .data = at91sam9260_restart }, - { .compatible = "atmel,at91sam9g45-rstc", .data = at91sam9g45_restart }, - { .compatible = "atmel,sama5d3-rstc", .data = sama5d3_restart }, - { .compatible = "atmel,samx7-rstc", .data = samx7_restart }, - { .compatible = "microchip,sam9x60-rstc", .data = samx7_restart }, + { + .compatible = "atmel,at91sam9260-rstc", + .data = (void *)(AT91_RSTC_KEY | AT91_RSTC_PERRST | + AT91_RSTC_PROCRST), + }, + { + .compatible = "atmel,at91sam9g45-rstc", + .data = (void *)(AT91_RSTC_KEY | AT91_RSTC_PERRST | + AT91_RSTC_PROCRST) + }, + { + .compatible = "atmel,sama5d3-rstc", + .data = (void *)(AT91_RSTC_KEY | AT91_RSTC_PERRST | + AT91_RSTC_PROCRST) + }, + { + .compatible = "atmel,samx7-rstc", + .data = (void *)(AT91_RSTC_KEY | AT91_RSTC_PROCRST) + }, + { + .compatible = "microchip,sam9x60-rstc", + .data = (void *)(AT91_RSTC_KEY | AT91_RSTC_PROCRST) + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, at91_reset_of_match); -static struct notifier_block at91_restart_nb = { - .priority = 192, -}; - static int __init at91_reset_probe(struct platform_device *pdev) { const struct of_device_id *match; + struct at91_reset *reset; struct device_node *np; int ret, idx = 0; - at91_rstc_base = of_iomap(pdev->dev.of_node, 0); - if (!at91_rstc_base) { + reset = devm_kzalloc(&pdev->dev, sizeof(*reset), GFP_KERNEL); + if (!reset) + return -ENOMEM; + + reset->rstc_base = of_iomap(pdev->dev.of_node, 0); + if (!reset->rstc_base) { dev_err(&pdev->dev, "Could not map reset controller address\n"); return -ENODEV; } if (!of_device_is_compatible(pdev->dev.of_node, "atmel,sama5d3-rstc")) { /* we need to shutdown the ddr controller, so get ramc base */ - for_each_matching_node(np, at91_ramc_of_match) { - at91_ramc_base[idx] = of_iomap(np, 0); - if (!at91_ramc_base[idx]) { + for_each_matching_node_and_match(np, at91_ramc_of_match, &match) { + reset->ramc_lpr = (u32)match->data; + reset->ramc_base[idx] = of_iomap(np, 0); + if (!reset->ramc_base[idx]) { dev_err(&pdev->dev, "Could not map ram controller address\n"); of_node_put(np); return -ENODEV; @@ -228,33 +213,46 @@ static int __init at91_reset_probe(struct platform_device *pdev) } match = of_match_node(at91_reset_of_match, pdev->dev.of_node); - at91_restart_nb.notifier_call = match->data; + reset->nb.notifier_call = at91_reset; + reset->nb.priority = 192; + reset->args = (u32)match->data; - sclk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(sclk)) - return PTR_ERR(sclk); + reset->sclk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(reset->sclk)) + return PTR_ERR(reset->sclk); - ret = clk_prepare_enable(sclk); + ret = clk_prepare_enable(reset->sclk); if (ret) { dev_err(&pdev->dev, "Could not enable slow clock\n"); return ret; } - ret = register_restart_handler(&at91_restart_nb); + platform_set_drvdata(pdev, reset); + + if (of_device_is_compatible(pdev->dev.of_node, "microchip,sam9x60-rstc")) { + u32 val = readl(reset->rstc_base + AT91_RSTC_MR); + + writel(AT91_RSTC_KEY | AT91_RSTC_URSTASYNC | val, + reset->rstc_base + AT91_RSTC_MR); + } + + ret = register_restart_handler(&reset->nb); if (ret) { - clk_disable_unprepare(sclk); + clk_disable_unprepare(reset->sclk); return ret; } - at91_reset_status(pdev); + at91_reset_status(pdev, reset->rstc_base); return 0; } static int __exit at91_reset_remove(struct platform_device *pdev) { - unregister_restart_handler(&at91_restart_nb); - clk_disable_unprepare(sclk); + struct at91_reset *reset = platform_get_drvdata(pdev); + + unregister_restart_handler(&reset->nb); + clk_disable_unprepare(reset->sclk); return 0; } diff --git a/drivers/power/reset/sc27xx-poweroff.c b/drivers/power/reset/sc27xx-poweroff.c index 29fb08b8faa0..90287c31992c 100644 --- a/drivers/power/reset/sc27xx-poweroff.c +++ b/drivers/power/reset/sc27xx-poweroff.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/regmap.h> @@ -13,6 +14,8 @@ #define SC27XX_PWR_PD_HW 0xc2c #define SC27XX_PWR_OFF_EN BIT(0) +#define SC27XX_SLP_CTRL 0xdf0 +#define SC27XX_LDO_XTL_EN BIT(3) static struct regmap *regmap; @@ -27,10 +30,13 @@ static struct regmap *regmap; */ static void sc27xx_poweroff_shutdown(void) { -#ifdef CONFIG_PM_SLEEP_SMP - int cpu = smp_processor_id(); +#ifdef CONFIG_HOTPLUG_CPU + int cpu; - freeze_secondary_cpus(cpu); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + remove_cpu(cpu); + } #endif } @@ -40,6 +46,9 @@ static struct syscore_ops poweroff_syscore_ops = { static void sc27xx_poweroff_do_poweroff(void) { + /* Disable the external subsys connection's power firstly */ + regmap_write(regmap, SC27XX_SLP_CTRL, SC27XX_LDO_XTL_EN); + regmap_write(regmap, SC27XX_PWR_PD_HW, SC27XX_PWR_OFF_EN); } @@ -63,4 +72,8 @@ static struct platform_driver sc27xx_poweroff_driver = { .name = "sc27xx-poweroff", }, }; -builtin_platform_driver(sc27xx_poweroff_driver); +module_platform_driver(sc27xx_poweroff_driver); + +MODULE_DESCRIPTION("Power off driver for SC27XX PMIC Device"); +MODULE_AUTHOR("Baolin Wang <baolin.wang@unisoc.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index 9a5591ab90d0..195bc0462d3e 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -480,7 +480,7 @@ config CHARGER_GPIO called gpio-charger. config CHARGER_MANAGER - bool "Battery charger manager for multiple chargers" + tristate "Battery charger manager for multiple chargers" depends on REGULATOR select EXTCON help diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c index f69550d64f09..9469fe182d02 100644 --- a/drivers/power/supply/ab8500_charger.c +++ b/drivers/power/supply/ab8500_charger.c @@ -404,7 +404,7 @@ disable_otp: } /** - * ab8500_power_supply_changed - a wrapper with local extentions for + * ab8500_power_supply_changed - a wrapper with local extensions for * power_supply_changed * @di: pointer to the ab8500_charger structure * @psy: pointer to power_supply_that have changed. @@ -683,7 +683,7 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di, /* * Platform only supports USB 2.0. * This means that charging current from USB source - * is maximum 500 mA. Every occurence of USB_STAT_*_HOST_* + * is maximum 500 mA. Every occurrence of USB_STAT_*_HOST_* * should set USB_CH_IP_CUR_LVL_0P5. */ @@ -1379,13 +1379,13 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, /* * Due to a bug in AB8500, BTEMP_HIGH/LOW interrupts - * will be triggered everytime we enable the VDD ADC supply. + * will be triggered every time we enable the VDD ADC supply. * This will turn off charging for a short while. * It can be avoided by having the supply on when * there is a charger enabled. Normally the VDD ADC supply - * is enabled everytime a GPADC conversion is triggered. We will - * force it to be enabled from this driver to have - * the GPADC module independant of the AB8500 chargers + * is enabled every time a GPADC conversion is triggered. + * We will force it to be enabled from this driver to have + * the GPADC module independent of the AB8500 chargers */ if (!di->vddadc_en_ac) { ret = regulator_enable(di->regu); @@ -1455,7 +1455,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger, if (is_ab8500_1p1_or_earlier(di->parent)) { /* * For ABB revision 1.0 and 1.1 there is a bug in the - * watchdog logic. That means we have to continously + * watchdog logic. That means we have to continuously * kick the charger watchdog even when no charger is * connected. This is only valid once the AC charger * has been enabled. This is a bug that is not handled @@ -1552,13 +1552,13 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, /* * Due to a bug in AB8500, BTEMP_HIGH/LOW interrupts - * will be triggered everytime we enable the VDD ADC supply. + * will be triggered every time we enable the VDD ADC supply. * This will turn off charging for a short while. * It can be avoided by having the supply on when * there is a charger enabled. Normally the VDD ADC supply - * is enabled everytime a GPADC conversion is triggered. We will - * force it to be enabled from this driver to have - * the GPADC module independant of the AB8500 chargers + * is enabled every time a GPADC conversion is triggered. + * We will force it to be enabled from this driver to have + * the GPADC module independent of the AB8500 chargers */ if (!di->vddadc_en_usb) { ret = regulator_enable(di->regu); @@ -1582,7 +1582,10 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger, return -ENXIO; } - /* ChVoltLevel: max voltage upto which battery can be charged */ + /* + * ChVoltLevel: max voltage up to which battery can be + * charged + */ ret = abx500_set_register_interruptible(di->dev, AB8500_CHARGER, AB8500_CH_VOLT_LVL_REG, (u8) volt_index); if (ret) { @@ -2014,7 +2017,7 @@ static void ab8500_charger_check_hw_failure_work(struct work_struct *work) * Work queue function for kicking the charger watchdog. * * For ABB revision 1.0 and 1.1 there is a bug in the watchdog - * logic. That means we have to continously kick the charger + * logic. That means we have to continuously kick the charger * watchdog even when no charger is connected. This is only * valid once the AC charger has been enabled. This is * a bug that is not handled by the algorithm and the @@ -2262,7 +2265,7 @@ static void ab8500_charger_usb_link_status_work(struct work_struct *work) * Some chargers that breaks the USB spec is * identified as invalid by AB8500 and it refuse * to start the charging process. but by jumping - * thru a few hoops it can be forced to start. + * through a few hoops it can be forced to start. */ if (is_ab8500(di->parent)) ret = abx500_get_register_interruptible(di->dev, AB8500_USB, @@ -3214,7 +3217,7 @@ static int ab8500_charger_resume(struct platform_device *pdev) /* * For ABB revision 1.0 and 1.1 there is a bug in the watchdog - * logic. That means we have to continously kick the charger + * logic. That means we have to continuously kick the charger * watchdog even when no charger is connected. This is only * valid once the AC charger has been enabled. This is * a bug that is not handled by the algorithm and the @@ -3483,7 +3486,7 @@ static int ab8500_charger_probe(struct platform_device *pdev) /* * For ABB revision 1.0 and 1.1 there is a bug in the watchdog - * logic. That means we have to continously kick the charger + * logic. That means we have to continuously kick the charger * watchdog even when no charger is connected. This is only * valid once the AC charger has been enabled. This is * a bug that is not handled by the algorithm and the diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index 1bbba6bba673..cf4c67b2d235 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -21,6 +21,7 @@ #include <linux/property.h> #include <linux/mfd/axp20x.h> #include <linux/extcon.h> +#include <linux/dmi.h> #define PS_STAT_VBUS_TRIGGER BIT(0) #define PS_STAT_BAT_CHRG_DIR BIT(2) @@ -545,6 +546,49 @@ out: return IRQ_HANDLED; } +/* + * The HP Pavilion x2 10 series comes in a number of variants: + * Bay Trail SoC + AXP288 PMIC, DMI_BOARD_NAME: "815D" + * Cherry Trail SoC + AXP288 PMIC, DMI_BOARD_NAME: "813E" + * Cherry Trail SoC + TI PMIC, DMI_BOARD_NAME: "827C" or "82F4" + * + * The variants with the AXP288 PMIC are all kinds of special: + * + * 1. All variants use a Type-C connector which the AXP288 does not support, so + * when using a Type-C charger it is not recognized. Unlike most AXP288 devices, + * this model actually has mostly working ACPI AC / Battery code, the ACPI code + * "solves" this by simply setting the input_current_limit to 3A. + * There are still some issues with the ACPI code, so we use this native driver, + * and to solve the charging not working (500mA is not enough) issue we hardcode + * the 3A input_current_limit like the ACPI code does. + * + * 2. If no charger is connected the machine boots with the vbus-path disabled. + * Normally this is done when a 5V boost converter is active to avoid the PMIC + * trying to charge from the 5V boost converter's output. This is done when + * an OTG host cable is inserted and the ID pin on the micro-B receptacle is + * pulled low and the ID pin has an ACPI event handler associated with it + * which re-enables the vbus-path when the ID pin is pulled high when the + * OTG host cable is removed. The Type-C connector has no ID pin, there is + * no ID pin handler and there appears to be no 5V boost converter, so we + * end up not charging because the vbus-path is disabled, until we unplug + * the charger which automatically clears the vbus-path disable bit and then + * on the second plug-in of the adapter we start charging. To solve the not + * charging on first charger plugin we unconditionally enable the vbus-path at + * probe on this model, which is safe since there is no 5V boost converter. + */ +static const struct dmi_system_id axp288_hp_x2_dmi_ids[] = { + { + /* + * Bay Trail model has "Hewlett-Packard" as sys_vendor, Cherry + * Trail model has "HP", so we only match on product_name. + */ + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + }, + }, + {} /* Terminating entry */ +}; + static void axp288_charger_extcon_evt_worker(struct work_struct *work) { struct axp288_chrg_info *info = @@ -568,7 +612,11 @@ static void axp288_charger_extcon_evt_worker(struct work_struct *work) } /* Determine cable/charger type */ - if (extcon_get_state(edev, EXTCON_CHG_USB_SDP) > 0) { + if (dmi_check_system(axp288_hp_x2_dmi_ids)) { + /* See comment above axp288_hp_x2_dmi_ids declaration */ + dev_dbg(&info->pdev->dev, "HP X2 with Type-C, setting inlmt to 3A\n"); + current_limit = 3000000; + } else if (extcon_get_state(edev, EXTCON_CHG_USB_SDP) > 0) { dev_dbg(&info->pdev->dev, "USB SDP charger is connected\n"); current_limit = 500000; } else if (extcon_get_state(edev, EXTCON_CHG_USB_CDP) > 0) { @@ -685,6 +733,13 @@ static int charger_init_hw_regs(struct axp288_chrg_info *info) return ret; } + if (dmi_check_system(axp288_hp_x2_dmi_ids)) { + /* See comment above axp288_hp_x2_dmi_ids declaration */ + ret = axp288_charger_vbus_path_select(info, true); + if (ret < 0) + return ret; + } + /* Read current charge voltage and current limit */ ret = regmap_read(info->regmap, AXP20X_CHRG_CTRL1, &val); if (ret < 0) { diff --git a/drivers/power/supply/axp288_fuel_gauge.c b/drivers/power/supply/axp288_fuel_gauge.c index e1bc4e6e6f30..f40fa0e63b6e 100644 --- a/drivers/power/supply/axp288_fuel_gauge.c +++ b/drivers/power/supply/axp288_fuel_gauge.c @@ -706,14 +706,14 @@ static const struct dmi_system_id axp288_fuel_gauge_blacklist[] = { { /* Intel Cherry Trail Compute Stick, Windows version */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_SYS_VENDOR, "Intel"), DMI_MATCH(DMI_PRODUCT_NAME, "STK1AW32SC"), }, }, { /* Intel Cherry Trail Compute Stick, version without an OS */ .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_SYS_VENDOR, "Intel"), DMI_MATCH(DMI_PRODUCT_NAME, "STK1A32SC"), }, }, diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 195c18c2f426..664e50103eaa 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1885,7 +1885,10 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di) di->bat = power_supply_register_no_ws(di->dev, psy_desc, &psy_cfg); if (IS_ERR(di->bat)) { - dev_err(di->dev, "failed to register battery\n"); + if (PTR_ERR(di->bat) == -EPROBE_DEFER) + dev_dbg(di->dev, "failed to register battery, deferring probe\n"); + else + dev_err(di->dev, "failed to register battery\n"); return PTR_ERR(di->bat); } diff --git a/drivers/power/supply/ingenic-battery.c b/drivers/power/supply/ingenic-battery.c index 2748715c4c75..dd3d93dfe3eb 100644 --- a/drivers/power/supply/ingenic-battery.c +++ b/drivers/power/supply/ingenic-battery.c @@ -148,7 +148,8 @@ static int ingenic_battery_probe(struct platform_device *pdev) bat->battery = devm_power_supply_register(dev, desc, &psy_cfg); if (IS_ERR(bat->battery)) { - dev_err(dev, "Unable to register battery\n"); + if (PTR_ERR(bat->battery) != -EPROBE_DEFER) + dev_err(dev, "Unable to register battery\n"); return PTR_ERR(bat->battery); } diff --git a/drivers/power/supply/sc27xx_fuel_gauge.c b/drivers/power/supply/sc27xx_fuel_gauge.c index 469c83fdaa8e..a7c8a8453db1 100644 --- a/drivers/power/supply/sc27xx_fuel_gauge.c +++ b/drivers/power/supply/sc27xx_fuel_gauge.c @@ -614,6 +614,17 @@ static int sc27xx_fgu_get_property(struct power_supply *psy, val->intval = data->total_cap * 1000; break; + case POWER_SUPPLY_PROP_CHARGE_NOW: + ret = sc27xx_fgu_get_clbcnt(data, &value); + if (ret) + goto error; + + value = DIV_ROUND_CLOSEST(value * 10, + 36 * SC27XX_FGU_SAMPLE_HZ); + val->intval = sc27xx_fgu_adc_to_current(data, value); + + break; + default: ret = -EINVAL; break; @@ -682,6 +693,7 @@ static enum power_supply_property sc27xx_fgu_props[] = { POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE, POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, POWER_SUPPLY_PROP_CALIBRATE, + POWER_SUPPLY_PROP_CHARGE_NOW }; static const struct power_supply_desc sc27xx_fgu_desc = { diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index 648ab80288c9..1bc49b2e12e8 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -726,10 +726,10 @@ twl4030_bci_mode_show(struct device *dev, for (i = 0; i < ARRAY_SIZE(modes); i++) if (mode == i) - len += snprintf(buf+len, PAGE_SIZE-len, + len += scnprintf(buf+len, PAGE_SIZE-len, "[%s] ", modes[i]); else - len += snprintf(buf+len, PAGE_SIZE-len, + len += scnprintf(buf+len, PAGE_SIZE-len, "%s ", modes[i]); buf[len-1] = '\n'; return len; diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index efae0c02d898..6cad15eb9cf4 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -369,8 +369,8 @@ static int int3400_thermal_remove(struct platform_device *pdev) } static const struct acpi_device_id int3400_thermal_match[] = { - {"INT1040", 0}, {"INT3400", 0}, + {"INTC1040", 0}, {} }; diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c index aeece1e136a5..f86cbb125e2f 100644 --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c @@ -282,8 +282,8 @@ static int int3403_remove(struct platform_device *pdev) } static const struct acpi_device_id int3403_device_ids[] = { - {"INT1043", 0}, {"INT3403", 0}, + {"INTC1043", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, int3403_device_ids); diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index ac2ec4543fe1..09fd4a185fd2 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -32,13 +32,13 @@ endif config 9P_FS_SECURITY - bool "9P Security Labels" - depends on 9P_FS - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the 9P filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. + bool "9P Security Labels" + depends on 9P_FS + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the 9P filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index fe7f0bd2048e..92cd1d80218d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -388,7 +388,10 @@ v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", iov_iter_count(to), iocb->ki_pos); - ret = p9_client_read(fid, iocb->ki_pos, to, &err); + if (iocb->ki_filp->f_flags & O_NONBLOCK) + ret = p9_client_read_once(fid, iocb->ki_pos, to, &err); + else + ret = p9_client_read(fid, iocb->ki_pos, to, &err); if (!ret) return err; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b82423a72f68..c9255d399917 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -143,7 +143,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, default: p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n", type, stat->extension); - }; + } *rdev = MKDEV(major, minor); } else res |= S_IFREG; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0456bc990b5e..943cc469f42f 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -56,6 +56,7 @@ #include <linux/buffer_head.h> #include <linux/init.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> @@ -84,8 +85,8 @@ printk("\n"); \ } while (0) #else -# define ea_idebug(f...) -# define ea_bdebug(f...) +# define ea_idebug(inode, f...) no_printk(f) +# define ea_bdebug(bh, f...) no_printk(f) #endif static int ext2_xattr_set2(struct inode *, struct buffer_head *, @@ -790,7 +791,15 @@ ext2_xattr_delete_inode(struct inode *inode) struct buffer_head *bh = NULL; struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb); - down_write(&EXT2_I(inode)->xattr_sem); + /* + * We are the only ones holding inode reference. The xattr_sem should + * better be unlocked! We could as well just not acquire xattr_sem at + * all but this makes the code more futureproof. OTOH we need trylock + * here to avoid false-positive warning from lockdep about reclaim + * circular dependency. + */ + if (WARN_ON_ONCE(!down_write_trylock(&EXT2_I(inode)->xattr_sem))) + return; if (!EXT2_I(inode)->i_file_acl) goto cleanup; @@ -864,8 +873,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh) true); if (error) { if (error == -EBUSY) { - ea_bdebug(bh, "already in cache (%d cache entries)", - atomic_read(&ext2_xattr_cache->c_entry_count)); + ea_bdebug(bh, "already in cache"); error = 0; } } else diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index cee888cdc235..16272e6ddcf4 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -39,7 +39,7 @@ struct ext2_xattr_entry { __le32 e_value_block; /* disk block attribute is stored on (n/i) */ __le32 e_value_size; /* size of attribute value */ __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ + char e_name[]; /* attribute name */ }; #define EXT2_XATTR_PAD_BITS 2 diff --git a/fs/namei.c b/fs/namei.c index 61fdb77a7d58..a320371899cf 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -610,7 +610,7 @@ static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq) static inline bool legitimize_path(struct nameidata *nd, struct path *path, unsigned seq) { - return __legitimize_path(path, nd->m_seq, seq); + return __legitimize_path(path, seq, nd->m_seq); } static bool legitimize_links(struct nameidata *nd) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5778d1347b35..5435a40f82be 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -17,6 +17,59 @@ #include "fanotify.h" +static bool fanotify_path_equal(struct path *p1, struct path *p2) +{ + return p1->mnt == p2->mnt && p1->dentry == p2->dentry; +} + +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1, + __kernel_fsid_t *fsid2) +{ + return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1]; +} + +static bool fanotify_fh_equal(struct fanotify_fh *fh1, + struct fanotify_fh *fh2) +{ + if (fh1->type != fh2->type || fh1->len != fh2->len) + return false; + + /* Do not merge events if we failed to encode fh */ + if (fh1->type == FILEID_INVALID) + return false; + + return !fh1->len || + !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len); +} + +static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, + struct fanotify_fid_event *ffe2) +{ + /* Do not merge fid events without object fh */ + if (!ffe1->object_fh.len) + return false; + + return fanotify_fsid_equal(&ffe1->fsid, &ffe2->fsid) && + fanotify_fh_equal(&ffe1->object_fh, &ffe2->object_fh); +} + +static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, + struct fanotify_name_event *fne2) +{ + /* + * Do not merge name events without dir fh. + * FAN_DIR_MODIFY does not encode object fh, so it may be empty. + */ + if (!fne1->dir_fh.len) + return false; + + if (fne1->name_len != fne2->name_len || + !fanotify_fh_equal(&fne1->dir_fh, &fne2->dir_fh)) + return false; + + return !memcmp(fne1->name, fne2->name, fne1->name_len); +} + static bool should_merge(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { @@ -26,14 +79,15 @@ static bool should_merge(struct fsnotify_event *old_fsn, old = FANOTIFY_E(old_fsn); new = FANOTIFY_E(new_fsn); - if (old_fsn->inode != new_fsn->inode || old->pid != new->pid || - old->fh_type != new->fh_type || old->fh_len != new->fh_len) + if (old_fsn->objectid != new_fsn->objectid || + old->type != new->type || old->pid != new->pid) return false; - if (fanotify_event_has_path(old)) { - return old->path.mnt == new->path.mnt && - old->path.dentry == new->path.dentry; - } else if (fanotify_event_has_fid(old)) { + switch (old->type) { + case FANOTIFY_EVENT_TYPE_PATH: + return fanotify_path_equal(fanotify_event_path(old), + fanotify_event_path(new)); + case FANOTIFY_EVENT_TYPE_FID: /* * We want to merge many dirent events in the same dir (i.e. * creates/unlinks/renames), but we do not want to merge dirent @@ -42,11 +96,18 @@ static bool should_merge(struct fsnotify_event *old_fsn, * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ * unlink pair or rmdir+create pair of events. */ - return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) && - fanotify_fid_equal(&old->fid, &new->fid, old->fh_len); + if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) + return false; + + return fanotify_fid_event_equal(FANOTIFY_FE(old), + FANOTIFY_FE(new)); + case FANOTIFY_EVENT_TYPE_FID_NAME: + return fanotify_name_event_equal(FANOTIFY_NE(old), + FANOTIFY_NE(new)); + default: + WARN_ON_ONCE(1); } - /* Do not merge events if we failed to encode fid */ return false; } @@ -151,7 +212,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, { __u32 marks_mask = 0, marks_ignored_mask = 0; __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS; - const struct path *path = data; + const struct path *path = fsnotify_data_path(data, data_type); struct fsnotify_mark *mark; int type; @@ -160,7 +221,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { /* Do we have path to open a file descriptor? */ - if (data_type != FSNOTIFY_EVENT_PATH) + if (!path) return 0; /* Path type events are only relevant for files and dirs */ if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry)) @@ -172,6 +233,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, continue; mark = iter_info->marks[type]; /* + * If the event is on dir and this mark doesn't care about + * events on dir, don't send it! + */ + if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR)) + continue; + + /* * If the event is for a child and this mark doesn't care about * events on a child, don't send it! */ @@ -187,9 +255,9 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, test_mask = event_mask & marks_mask & ~marks_ignored_mask; /* - * dirent modification events (create/delete/move) do not carry the - * child entry name/inode information. Instead, we report FAN_ONDIR - * for mkdir/rmdir so user can differentiate them from creat/unlink. + * For dirent modification events (create/delete/move) that do not carry + * the child entry name information, we report FAN_ONDIR for mkdir/rmdir + * so user can differentiate them from creat/unlink. * * For backward compatibility and consistency, do not report FAN_ONDIR * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR @@ -203,22 +271,20 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, user_mask &= ~FAN_ONDIR; } - if (event_mask & FS_ISDIR && - !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) - return 0; - return test_mask & user_mask; } -static int fanotify_encode_fid(struct fanotify_event *event, - struct inode *inode, gfp_t gfp, - __kernel_fsid_t *fsid) +static void fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, + gfp_t gfp) { - struct fanotify_fid *fid = &event->fid; - int dwords, bytes = 0; - int err, type; + int dwords, type, bytes = 0; + char *ext_buf = NULL; + void *buf = fh->buf; + int err; + + if (!inode) + goto out; - fid->ext_fh = NULL; dwords = 0; err = -ENOENT; type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL); @@ -229,31 +295,33 @@ static int fanotify_encode_fid(struct fanotify_event *event, if (bytes > FANOTIFY_INLINE_FH_LEN) { /* Treat failure to allocate fh as failure to allocate event */ err = -ENOMEM; - fid->ext_fh = kmalloc(bytes, gfp); - if (!fid->ext_fh) + ext_buf = kmalloc(bytes, gfp); + if (!ext_buf) goto out_err; + + *fanotify_fh_ext_buf_ptr(fh) = ext_buf; + buf = ext_buf; } - type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes), - &dwords, NULL); + type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL); err = -EINVAL; if (!type || type == FILEID_INVALID || bytes != dwords << 2) goto out_err; - fid->fsid = *fsid; - event->fh_len = bytes; + fh->type = type; + fh->len = bytes; - return type; + return; out_err: - pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, " - "type=%d, bytes=%d, err=%i)\n", - fsid->val[0], fsid->val[1], type, bytes, err); - kfree(fid->ext_fh); - fid->ext_fh = NULL; - event->fh_len = 0; - - return FILEID_INVALID; + pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n", + type, bytes, err); + kfree(ext_buf); + *fanotify_fh_ext_buf_ptr(fh) = NULL; +out: + /* Report the event without a file identifier on encode error */ + fh->type = FILEID_INVALID; + fh->len = 0; } /* @@ -269,21 +337,22 @@ static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask, { if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) return to_tell; - else if (data_type == FSNOTIFY_EVENT_INODE) - return (struct inode *)data; - else if (data_type == FSNOTIFY_EVENT_PATH) - return d_inode(((struct path *)data)->dentry); - return NULL; + + return (struct inode *)fsnotify_data_inode(data, data_type); } struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, + const struct qstr *file_name, __kernel_fsid_t *fsid) { struct fanotify_event *event = NULL; + struct fanotify_fid_event *ffe = NULL; + struct fanotify_name_event *fne = NULL; gfp_t gfp = GFP_KERNEL_ACCOUNT; struct inode *id = fanotify_fid_inode(inode, mask, data, data_type); + const struct path *path = fsnotify_data_path(data, data_type); /* * For queues with unlimited length lost events are not expected and @@ -305,33 +374,81 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); if (!pevent) goto out; + event = &pevent->fae; + event->type = FANOTIFY_EVENT_TYPE_PATH_PERM; pevent->response = 0; pevent->state = FAN_EVENT_INIT; goto init; } - event = kmem_cache_alloc(fanotify_event_cachep, gfp); - if (!event) - goto out; -init: __maybe_unused - fsnotify_init_event(&event->fse, inode); + + /* + * For FAN_DIR_MODIFY event, we report the fid of the directory and + * the name of the modified entry. + * Allocate an fanotify_name_event struct and copy the name. + */ + if (mask & FAN_DIR_MODIFY && !(WARN_ON_ONCE(!file_name))) { + fne = kmalloc(sizeof(*fne) + file_name->len + 1, gfp); + if (!fne) + goto out; + + event = &fne->fae; + event->type = FANOTIFY_EVENT_TYPE_FID_NAME; + fne->name_len = file_name->len; + strcpy(fne->name, file_name->name); + goto init; + } + + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { + ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp); + if (!ffe) + goto out; + + event = &ffe->fae; + event->type = FANOTIFY_EVENT_TYPE_FID; + } else { + struct fanotify_path_event *pevent; + + pevent = kmem_cache_alloc(fanotify_path_event_cachep, gfp); + if (!pevent) + goto out; + + event = &pevent->fae; + event->type = FANOTIFY_EVENT_TYPE_PATH; + } + +init: + /* + * Use the victim inode instead of the watching inode as the id for + * event queue, so event reported on parent is merged with event + * reported on child when both directory and child watches exist. + */ + fsnotify_init_event(&event->fse, (unsigned long)id); event->mask = mask; if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) event->pid = get_pid(task_pid(current)); else event->pid = get_pid(task_tgid(current)); - event->fh_len = 0; - if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { - /* Report the event without a file identifier on encode error */ - event->fh_type = fanotify_encode_fid(event, id, gfp, fsid); - } else if (data_type == FSNOTIFY_EVENT_PATH) { - event->fh_type = FILEID_ROOT; - event->path = *((struct path *)data); - path_get(&event->path); - } else { - event->fh_type = FILEID_INVALID; - event->path.mnt = NULL; - event->path.dentry = NULL; + + if (fsid && fanotify_event_fsid(event)) + *fanotify_event_fsid(event) = *fsid; + + if (fanotify_event_object_fh(event)) + fanotify_encode_fh(fanotify_event_object_fh(event), id, gfp); + + if (fanotify_event_dir_fh(event)) + fanotify_encode_fh(fanotify_event_dir_fh(event), id, gfp); + + if (fanotify_event_has_path(event)) { + struct path *p = fanotify_event_path(event); + + if (path) { + *p = *path; + path_get(path); + } else { + p->mnt = NULL; + p->dentry = NULL; + } } out: memalloc_unuse_memcg(); @@ -392,6 +509,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); BUILD_BUG_ON(FAN_CREATE != FS_CREATE); BUILD_BUG_ON(FAN_DELETE != FS_DELETE); + BUILD_BUG_ON(FAN_DIR_MODIFY != FS_DIR_MODIFY); BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); @@ -402,7 +520,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20); mask = fanotify_group_event_mask(group, iter_info, mask, data, data_type); @@ -429,7 +547,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, } event = fanotify_alloc_event(group, inode, mask, data, data_type, - &fsid); + file_name, &fsid); ret = -ENOMEM; if (unlikely(!event)) { /* @@ -451,7 +569,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = 0; } else if (fanotify_is_perm_event(mask)) { - ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), + ret = fanotify_get_response(group, FANOTIFY_PERM(event), iter_info); } finish: @@ -470,22 +588,58 @@ static void fanotify_free_group_priv(struct fsnotify_group *group) free_uid(user); } +static void fanotify_free_path_event(struct fanotify_event *event) +{ + path_put(fanotify_event_path(event)); + kmem_cache_free(fanotify_path_event_cachep, FANOTIFY_PE(event)); +} + +static void fanotify_free_perm_event(struct fanotify_event *event) +{ + path_put(fanotify_event_path(event)); + kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PERM(event)); +} + +static void fanotify_free_fid_event(struct fanotify_event *event) +{ + struct fanotify_fid_event *ffe = FANOTIFY_FE(event); + + if (fanotify_fh_has_ext_buf(&ffe->object_fh)) + kfree(fanotify_fh_ext_buf(&ffe->object_fh)); + kmem_cache_free(fanotify_fid_event_cachep, ffe); +} + +static void fanotify_free_name_event(struct fanotify_event *event) +{ + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + if (fanotify_fh_has_ext_buf(&fne->dir_fh)) + kfree(fanotify_fh_ext_buf(&fne->dir_fh)); + kfree(fne); +} + static void fanotify_free_event(struct fsnotify_event *fsn_event) { struct fanotify_event *event; event = FANOTIFY_E(fsn_event); - if (fanotify_event_has_path(event)) - path_put(&event->path); - else if (fanotify_event_has_ext_fh(event)) - kfree(event->fid.ext_fh); put_pid(event->pid); - if (fanotify_is_perm_event(event->mask)) { - kmem_cache_free(fanotify_perm_event_cachep, - FANOTIFY_PE(fsn_event)); - return; + switch (event->type) { + case FANOTIFY_EVENT_TYPE_PATH: + fanotify_free_path_event(event); + break; + case FANOTIFY_EVENT_TYPE_PATH_PERM: + fanotify_free_perm_event(event); + break; + case FANOTIFY_EVENT_TYPE_FID: + fanotify_free_fid_event(event); + break; + case FANOTIFY_EVENT_TYPE_FID_NAME: + fanotify_free_name_event(event); + break; + default: + WARN_ON_ONCE(1); } - kmem_cache_free(fanotify_event_cachep, event); } static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 68b30504284c..35bfbf4a7aac 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -5,7 +5,8 @@ #include <linux/exportfs.h> extern struct kmem_cache *fanotify_mark_cache; -extern struct kmem_cache *fanotify_event_cachep; +extern struct kmem_cache *fanotify_fid_event_cachep; +extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; /* Possible states of the permission event */ @@ -18,94 +19,140 @@ enum { /* * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation). - * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and - * fh_* fields increase the size of fanotify_event by another 4 bytes. - * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and - * fh_* fields are packed in a hole after mask. + * fh buf should be dword aligned. On 64bit arch, the ext_buf pointer is + * stored in either the first or last 2 dwords. */ -#if BITS_PER_LONG == 32 #define FANOTIFY_INLINE_FH_LEN (3 << 2) -#else -#define FANOTIFY_INLINE_FH_LEN (4 << 2) -#endif -struct fanotify_fid { - __kernel_fsid_t fsid; - union { - unsigned char fh[FANOTIFY_INLINE_FH_LEN]; - unsigned char *ext_fh; - }; -}; +struct fanotify_fh { + unsigned char buf[FANOTIFY_INLINE_FH_LEN]; + u8 type; + u8 len; +} __aligned(4); + +static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh) +{ + return fh->len > FANOTIFY_INLINE_FH_LEN; +} + +static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh) +{ + BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) > + FANOTIFY_INLINE_FH_LEN); + return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *)); +} -static inline void *fanotify_fid_fh(struct fanotify_fid *fid, - unsigned int fh_len) +static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh) { - return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh; + return *fanotify_fh_ext_buf_ptr(fh); } -static inline bool fanotify_fid_equal(struct fanotify_fid *fid1, - struct fanotify_fid *fid2, - unsigned int fh_len) +static inline void *fanotify_fh_buf(struct fanotify_fh *fh) { - return fid1->fsid.val[0] == fid2->fsid.val[0] && - fid1->fsid.val[1] == fid2->fsid.val[1] && - !memcmp(fanotify_fid_fh(fid1, fh_len), - fanotify_fid_fh(fid2, fh_len), fh_len); + return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf; } /* - * Structure for normal fanotify events. It gets allocated in + * Common structure for fanotify events. Concrete structs are allocated in * fanotify_handle_event() and freed when the information is retrieved by - * userspace + * userspace. The type of event determines how it was allocated, how it will + * be freed and which concrete struct it may be cast to. */ +enum fanotify_event_type { + FANOTIFY_EVENT_TYPE_FID, /* fixed length */ + FANOTIFY_EVENT_TYPE_FID_NAME, /* variable length */ + FANOTIFY_EVENT_TYPE_PATH, + FANOTIFY_EVENT_TYPE_PATH_PERM, +}; + struct fanotify_event { struct fsnotify_event fse; u32 mask; - /* - * Those fields are outside fanotify_fid to pack fanotify_event nicely - * on 64bit arch and to use fh_type as an indication of whether path - * or fid are used in the union: - * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither. - */ - u8 fh_type; - u8 fh_len; - u16 pad; - union { - /* - * We hold ref to this path so it may be dereferenced at any - * point during this object's lifetime - */ - struct path path; - /* - * With FAN_REPORT_FID, we do not hold any reference on the - * victim object. Instead we store its NFS file handle and its - * filesystem's fsid as a unique identifier. - */ - struct fanotify_fid fid; - }; + enum fanotify_event_type type; struct pid *pid; }; -static inline bool fanotify_event_has_path(struct fanotify_event *event) +struct fanotify_fid_event { + struct fanotify_event fae; + __kernel_fsid_t fsid; + struct fanotify_fh object_fh; +}; + +static inline struct fanotify_fid_event * +FANOTIFY_FE(struct fanotify_event *event) { - return event->fh_type == FILEID_ROOT; + return container_of(event, struct fanotify_fid_event, fae); } -static inline bool fanotify_event_has_fid(struct fanotify_event *event) +struct fanotify_name_event { + struct fanotify_event fae; + __kernel_fsid_t fsid; + struct fanotify_fh dir_fh; + u8 name_len; + char name[0]; +}; + +static inline struct fanotify_name_event * +FANOTIFY_NE(struct fanotify_event *event) { - return event->fh_type != FILEID_ROOT && - event->fh_type != FILEID_INVALID; + return container_of(event, struct fanotify_name_event, fae); } -static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event) +static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event) { - return fanotify_event_has_fid(event) && - event->fh_len > FANOTIFY_INLINE_FH_LEN; + if (event->type == FANOTIFY_EVENT_TYPE_FID) + return &FANOTIFY_FE(event)->fsid; + else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) + return &FANOTIFY_NE(event)->fsid; + else + return NULL; } -static inline void *fanotify_event_fh(struct fanotify_event *event) +static inline struct fanotify_fh *fanotify_event_object_fh( + struct fanotify_event *event) { - return fanotify_fid_fh(&event->fid, event->fh_len); + if (event->type == FANOTIFY_EVENT_TYPE_FID) + return &FANOTIFY_FE(event)->object_fh; + else + return NULL; +} + +static inline struct fanotify_fh *fanotify_event_dir_fh( + struct fanotify_event *event) +{ + if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME) + return &FANOTIFY_NE(event)->dir_fh; + else + return NULL; +} + +static inline int fanotify_event_object_fh_len(struct fanotify_event *event) +{ + struct fanotify_fh *fh = fanotify_event_object_fh(event); + + return fh ? fh->len : 0; +} + +static inline bool fanotify_event_has_name(struct fanotify_event *event) +{ + return event->type == FANOTIFY_EVENT_TYPE_FID_NAME; +} + +static inline int fanotify_event_name_len(struct fanotify_event *event) +{ + return fanotify_event_has_name(event) ? + FANOTIFY_NE(event)->name_len : 0; +} + +struct fanotify_path_event { + struct fanotify_event fae; + struct path path; +}; + +static inline struct fanotify_path_event * +FANOTIFY_PE(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_path_event, fae); } /* @@ -117,15 +164,16 @@ static inline void *fanotify_event_fh(struct fanotify_event *event) */ struct fanotify_perm_event { struct fanotify_event fae; + struct path path; unsigned short response; /* userspace answer to the event */ unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ }; static inline struct fanotify_perm_event * -FANOTIFY_PE(struct fsnotify_event *fse) +FANOTIFY_PERM(struct fanotify_event *event) { - return container_of(fse, struct fanotify_perm_event, fae.fse); + return container_of(event, struct fanotify_perm_event, fae); } static inline bool fanotify_is_perm_event(u32 mask) @@ -139,7 +187,24 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) return container_of(fse, struct fanotify_event, fse); } +static inline bool fanotify_event_has_path(struct fanotify_event *event) +{ + return event->type == FANOTIFY_EVENT_TYPE_PATH || + event->type == FANOTIFY_EVENT_TYPE_PATH_PERM; +} + +static inline struct path *fanotify_event_path(struct fanotify_event *event) +{ + if (event->type == FANOTIFY_EVENT_TYPE_PATH) + return &FANOTIFY_PE(event)->path; + else if (event->type == FANOTIFY_EVENT_TYPE_PATH_PERM) + return &FANOTIFY_PERM(event)->path; + else + return NULL; +} + struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, struct inode *inode, u32 mask, const void *data, int data_type, + const struct qstr *file_name, __kernel_fsid_t *fsid); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 0aa362b88550..42cb794c62ac 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -46,32 +46,53 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; struct kmem_cache *fanotify_mark_cache __read_mostly; -struct kmem_cache *fanotify_event_cachep __read_mostly; +struct kmem_cache *fanotify_fid_event_cachep __read_mostly; +struct kmem_cache *fanotify_path_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; #define FANOTIFY_EVENT_ALIGN 4 +#define FANOTIFY_INFO_HDR_LEN \ + (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) + +static int fanotify_fid_info_len(int fh_len, int name_len) +{ + int info_len = fh_len; + + if (name_len) + info_len += name_len + 1; + + return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN); +} static int fanotify_event_info_len(struct fanotify_event *event) { - if (!fanotify_event_has_fid(event)) - return 0; + int info_len = 0; + int fh_len = fanotify_event_object_fh_len(event); + + if (fh_len) + info_len += fanotify_fid_info_len(fh_len, 0); - return roundup(sizeof(struct fanotify_event_info_fid) + - sizeof(struct file_handle) + event->fh_len, - FANOTIFY_EVENT_ALIGN); + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + info_len += fanotify_fid_info_len(fne->dir_fh.len, + fne->name_len); + } + + return info_len; } /* - * Get an fsnotify notification event if one exists and is small + * Get an fanotify notification event if one exists and is small * enough to fit in "count". Return an error pointer if the count * is not large enough. When permission event is dequeued, its state is * updated accordingly. */ -static struct fsnotify_event *get_one_event(struct fsnotify_group *group, +static struct fanotify_event *get_one_event(struct fsnotify_group *group, size_t count) { size_t event_size = FAN_EVENT_METADATA_LEN; - struct fsnotify_event *fsn_event = NULL; + struct fanotify_event *event = NULL; pr_debug("%s: group=%p count=%zd\n", __func__, group, count); @@ -85,26 +106,23 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, } if (event_size > count) { - fsn_event = ERR_PTR(-EINVAL); + event = ERR_PTR(-EINVAL); goto out; } - fsn_event = fsnotify_remove_first_event(group); - if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask)) - FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED; + event = FANOTIFY_E(fsnotify_remove_first_event(group)); + if (fanotify_is_perm_event(event->mask)) + FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; out: spin_unlock(&group->notification_lock); - return fsn_event; + return event; } -static int create_fd(struct fsnotify_group *group, - struct fanotify_event *event, +static int create_fd(struct fsnotify_group *group, struct path *path, struct file **file) { int client_fd; struct file *new_file; - pr_debug("%s: group=%p event=%p\n", __func__, group, event); - client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; @@ -113,14 +131,9 @@ static int create_fd(struct fsnotify_group *group, * we need a new file handle for the userspace program so it can read even if it was * originally opened O_WRONLY. */ - /* it's possible this event was an overflow event. in that case dentry and mnt - * are NULL; That's fine, just don't call dentry open */ - if (event->path.dentry && event->path.mnt) - new_file = dentry_open(&event->path, - group->fanotify_data.f_flags | FMODE_NONOTIFY, - current_cred()); - else - new_file = ERR_PTR(-EOVERFLOW); + new_file = dentry_open(path, + group->fanotify_data.f_flags | FMODE_NONOTIFY, + current_cred()); if (IS_ERR(new_file)) { /* * we still send an event even if we can't open the file. this @@ -204,83 +217,111 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } -static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) +static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, + const char *name, size_t name_len, + char __user *buf, size_t count) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; - unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; - size_t fh_len = event->fh_len; - size_t len = fanotify_event_info_len(event); + unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; + size_t fh_len = fh ? fh->len : 0; + size_t info_len = fanotify_fid_info_len(fh_len, name_len); + size_t len = info_len; + + pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n", + __func__, fh_len, name_len, info_len, count); - if (!len) + if (!fh_len || (name && !name_len)) return 0; - if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len)) + if (WARN_ON_ONCE(len < sizeof(info) || len > count)) return -EFAULT; - /* Copy event info fid header followed by vaiable sized file handle */ - info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID; + /* + * Copy event info fid header followed by variable sized file handle + * and optionally followed by variable sized filename. + */ + info.hdr.info_type = name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : + FAN_EVENT_INFO_TYPE_FID; info.hdr.len = len; - info.fsid = event->fid.fsid; + info.fsid = *fsid; if (copy_to_user(buf, &info, sizeof(info))) return -EFAULT; buf += sizeof(info); len -= sizeof(info); - handle.handle_type = event->fh_type; + if (WARN_ON_ONCE(len < sizeof(handle))) + return -EFAULT; + + handle.handle_type = fh->type; handle.handle_bytes = fh_len; if (copy_to_user(buf, &handle, sizeof(handle))) return -EFAULT; buf += sizeof(handle); len -= sizeof(handle); + if (WARN_ON_ONCE(len < fh_len)) + return -EFAULT; + /* - * For an inline fh, copy through stack to exclude the copy from - * usercopy hardening protections. + * For an inline fh and inline file name, copy through stack to exclude + * the copy from usercopy hardening protections. */ - fh = fanotify_event_fh(event); + fh_buf = fanotify_fh_buf(fh); if (fh_len <= FANOTIFY_INLINE_FH_LEN) { - memcpy(bounce, fh, fh_len); - fh = bounce; + memcpy(bounce, fh_buf, fh_len); + fh_buf = bounce; } - if (copy_to_user(buf, fh, fh_len)) + if (copy_to_user(buf, fh_buf, fh_len)) return -EFAULT; - /* Pad with 0's */ buf += fh_len; len -= fh_len; + + if (name_len) { + /* Copy the filename with terminating null */ + name_len++; + if (WARN_ON_ONCE(len < name_len)) + return -EFAULT; + + if (copy_to_user(buf, name, name_len)) + return -EFAULT; + + buf += name_len; + len -= name_len; + } + + /* Pad with 0's */ WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); if (len > 0 && clear_user(buf, len)) return -EFAULT; - return 0; + return info_len; } static ssize_t copy_event_to_user(struct fsnotify_group *group, - struct fsnotify_event *fsn_event, + struct fanotify_event *event, char __user *buf, size_t count) { struct fanotify_event_metadata metadata; - struct fanotify_event *event; + struct path *path = fanotify_event_path(event); struct file *f = NULL; int ret, fd = FAN_NOFD; - pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event); + pr_debug("%s: group=%p event=%p\n", __func__, group, event); - event = container_of(fsn_event, struct fanotify_event, fse); - metadata.event_len = FAN_EVENT_METADATA_LEN; + metadata.event_len = FAN_EVENT_METADATA_LEN + + fanotify_event_info_len(event); metadata.metadata_len = FAN_EVENT_METADATA_LEN; metadata.vers = FANOTIFY_METADATA_VERSION; metadata.reserved = 0; metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; metadata.pid = pid_vnr(event->pid); - if (fanotify_event_has_path(event)) { - fd = create_fd(group, event, &f); + if (path && path->mnt && path->dentry) { + fd = create_fd(group, path, &f); if (fd < 0) return fd; - } else if (fanotify_event_has_fid(event)) { - metadata.event_len += fanotify_event_info_len(event); } metadata.fd = fd; @@ -295,15 +336,39 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN)) goto out_close_fd; + buf += FAN_EVENT_METADATA_LEN; + count -= FAN_EVENT_METADATA_LEN; + if (fanotify_is_perm_event(event->mask)) - FANOTIFY_PE(fsn_event)->fd = fd; + FANOTIFY_PERM(event)->fd = fd; - if (fanotify_event_has_path(event)) { + if (f) fd_install(fd, f); - } else if (fanotify_event_has_fid(event)) { - ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN); + + /* Event info records order is: dir fid + name, child fid */ + if (fanotify_event_name_len(event)) { + struct fanotify_name_event *fne = FANOTIFY_NE(event); + + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_dir_fh(event), + fne->name, fne->name_len, + buf, count); + if (ret < 0) + return ret; + + buf += ret; + count -= ret; + } + + if (fanotify_event_object_fh_len(event)) { + ret = copy_info_to_user(fanotify_event_fsid(event), + fanotify_event_object_fh(event), + NULL, 0, buf, count); if (ret < 0) return ret; + + buf += ret; + count -= ret; } return metadata.event_len; @@ -335,7 +400,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct fsnotify_group *group; - struct fsnotify_event *kevent; + struct fanotify_event *event; char __user *start; int ret; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -347,13 +412,13 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, add_wait_queue(&group->notification_waitq, &wait); while (1) { - kevent = get_one_event(group, count); - if (IS_ERR(kevent)) { - ret = PTR_ERR(kevent); + event = get_one_event(group, count); + if (IS_ERR(event)) { + ret = PTR_ERR(event); break; } - if (!kevent) { + if (!event) { ret = -EAGAIN; if (file->f_flags & O_NONBLOCK) break; @@ -369,7 +434,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, continue; } - ret = copy_event_to_user(group, kevent, buf, count); + ret = copy_event_to_user(group, event, buf, count); if (unlikely(ret == -EOPENSTALE)) { /* * We cannot report events with stale fd so drop it. @@ -384,17 +449,17 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, * Permission events get queued to wait for response. Other * events can be destroyed now. */ - if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) { - fsnotify_destroy_event(group, kevent); + if (!fanotify_is_perm_event(event->mask)) { + fsnotify_destroy_event(group, &event->fse); } else { if (ret <= 0) { spin_lock(&group->notification_lock); finish_permission_event(group, - FANOTIFY_PE(kevent), FAN_DENY); + FANOTIFY_PERM(event), FAN_DENY); wake_up(&group->fanotify_data.access_waitq); } else { spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, + list_add_tail(&event->fse.list, &group->fanotify_data.access_list); spin_unlock(&group->notification_lock); } @@ -440,8 +505,6 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; - struct fanotify_perm_event *event; - struct fsnotify_event *fsn_event; /* * Stop new events from arriving in the notification queue. since @@ -456,6 +519,8 @@ static int fanotify_release(struct inode *ignored, struct file *file) */ spin_lock(&group->notification_lock); while (!list_empty(&group->fanotify_data.access_list)) { + struct fanotify_perm_event *event; + event = list_first_entry(&group->fanotify_data.access_list, struct fanotify_perm_event, fae.fse.list); list_del_init(&event->fae.fse.list); @@ -469,12 +534,14 @@ static int fanotify_release(struct inode *ignored, struct file *file) * response is consumed and fanotify_get_response() returns. */ while (!fsnotify_notify_queue_is_empty(group)) { - fsn_event = fsnotify_remove_first_event(group); - if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) { + struct fanotify_event *event; + + event = FANOTIFY_E(fsnotify_remove_first_event(group)); + if (!(event->mask & FANOTIFY_PERM_EVENTS)) { spin_unlock(&group->notification_lock); - fsnotify_destroy_event(group, fsn_event); + fsnotify_destroy_event(group, &event->fse); } else { - finish_permission_event(group, FANOTIFY_PE(fsn_event), + finish_permission_event(group, FANOTIFY_PERM(event), FAN_ALLOW); } spin_lock(&group->notification_lock); @@ -824,7 +891,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->memcg = get_mem_cgroup_from_mm(current->mm); oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, - FSNOTIFY_EVENT_NONE, NULL); + FSNOTIFY_EVENT_NONE, NULL, NULL); if (unlikely(!oevent)) { fd = -ENOMEM; goto out_destroy_group; @@ -1139,7 +1206,10 @@ static int __init fanotify_user_setup(void) fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC|SLAB_ACCOUNT); - fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC); + fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, + SLAB_PANIC); + fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, + SLAB_PANIC); if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 46f225580009..72d332ce8e12 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -143,15 +143,13 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) } /* Notify this dentry's parent about a child's events. */ -int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, + int data_type) { struct dentry *parent; struct inode *p_inode; int ret = 0; - if (!dentry) - dentry = path->dentry; - if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) return 0; @@ -168,12 +166,7 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask mask |= FS_EVENT_ON_CHILD; take_dentry_name_snapshot(&name, dentry); - if (path) - ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - &name.name, 0); - else - ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - &name.name, 0); + ret = fsnotify(p_inode, mask, data, data_type, &name.name, 0); release_dentry_name_snapshot(&name); } @@ -181,7 +174,7 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask return ret; } -EXPORT_SYMBOL_GPL(__fsnotify_parent); +EXPORT_SYMBOL_GPL(fsnotify_parent); static int send_to_group(struct inode *to_tell, __u32 mask, const void *data, @@ -318,6 +311,7 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info) int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, const struct qstr *file_name, u32 cookie) { + const struct path *path = fsnotify_data_path(data, data_is); struct fsnotify_iter_info iter_info = {}; struct super_block *sb = to_tell->i_sb; struct mount *mnt = NULL; @@ -325,8 +319,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, int ret = 0; __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS); - if (data_is == FSNOTIFY_EVENT_PATH) { - mnt = real_mount(((const struct path *)data)->mnt); + if (path) { + mnt = real_mount(path->mnt); mnt_or_sb_mask |= mnt->mnt_fsnotify_mask; } /* An event "on child" is not intended for a mount/sb mark */ @@ -389,7 +383,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d510223d302c..2ebc89047153 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -39,7 +39,7 @@ static bool event_compare(struct fsnotify_event *old_fsn, if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && - (old_fsn->inode == new_fsn->inode) && + (old_fsn->objectid == new_fsn->objectid) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; @@ -61,6 +61,7 @@ int inotify_handle_event(struct fsnotify_group *group, const struct qstr *file_name, u32 cookie, struct fsnotify_iter_info *iter_info) { + const struct path *path = fsnotify_data_path(data, data_type); struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -73,12 +74,9 @@ int inotify_handle_event(struct fsnotify_group *group, return 0; if ((inode_mark->mask & FS_EXCL_UNLINK) && - (data_type == FSNOTIFY_EVENT_PATH)) { - const struct path *path = data; + path && d_unlinked(path->dentry)) + return 0; - if (d_unlinked(path->dentry)) - return 0; - } if (file_name) { len = file_name->len; alloc_len += len + 1; @@ -118,7 +116,7 @@ int inotify_handle_event(struct fsnotify_group *group, mask &= ~IN_ISDIR; fsn_event = &event->fse; - fsnotify_init_event(fsn_event, inode); + fsnotify_init_event(fsn_event, (unsigned long)inode); event->mask = mask; event->wd = i_mark->wd; event->sync_cookie = cookie; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 107537a543fd..81ffc8629fc4 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -635,7 +635,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL); + fsnotify_init_event(group->overflow_event, 0); oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 3d83be54c474..758efe557a19 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -83,7 +83,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extPosition; int s_nr_groups; - struct buffer_head *s_block_bitmap[0]; + struct buffer_head *s_block_bitmap[]; }; struct udf_part_map { diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 87fc14e97d2b..49b519f36b69 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -12,7 +12,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20200214 +#define ACPI_CA_VERSION 0x20200326 #include <acpi/acconfig.h> #include <acpi/actypes.h> diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index b818ba60e19d..ec66779cb193 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -43,6 +43,7 @@ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ #define ACPI_SIG_SDEI "SDEI" /* Software Delegated Exception Interface Table */ #define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ +#define ACPI_SIG_NHLT "NHLT" /* Non-HDAudio Link Table */ /* * All tables must be byte-packed to match the ACPI specification, since @@ -274,7 +275,8 @@ struct acpi_ivrs_header { /* Values for subtable Type above */ enum acpi_ivrs_type { - ACPI_IVRS_TYPE_HARDWARE = 0x10, + ACPI_IVRS_TYPE_HARDWARE1 = 0x10, + ACPI_IVRS_TYPE_HARDWARE2 = 0x11, ACPI_IVRS_TYPE_MEMORY1 = 0x20, ACPI_IVRS_TYPE_MEMORY2 = 0x21, ACPI_IVRS_TYPE_MEMORY3 = 0x22 @@ -301,13 +303,26 @@ enum acpi_ivrs_type { /* 0x10: I/O Virtualization Hardware Definition Block (IVHD) */ -struct acpi_ivrs_hardware { +struct acpi_ivrs_hardware_10 { struct acpi_ivrs_header header; u16 capability_offset; /* Offset for IOMMU control fields */ u64 base_address; /* IOMMU control registers */ u16 pci_segment_group; u16 info; /* MSI number and unit ID */ - u32 reserved; + u32 feature_reporting; +}; + +/* 0x11: I/O Virtualization Hardware Definition Block (IVHD) */ + +struct acpi_ivrs_hardware_11 { + struct acpi_ivrs_header header; + u16 capability_offset; /* Offset for IOMMU control fields */ + u64 base_address; /* IOMMU control registers */ + u16 pci_segment_group; + u16 info; /* MSI number and unit ID */ + u32 attributes; + u64 efr_register_image; + u64 reserved; }; /* Masks for Info field above */ diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h index 2bf3baf819bb..b0b163b9efc6 100644 --- a/include/acpi/actbl3.h +++ b/include/acpi/actbl3.h @@ -39,7 +39,7 @@ #define ACPI_SIG_WDDT "WDDT" /* Watchdog Timer Description Table */ #define ACPI_SIG_WDRT "WDRT" /* Watchdog Resource Table */ #define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */ -#define ACPI_SIG_WSMT "WSMT" /* Windows SMM Security Migrations Table */ +#define ACPI_SIG_WSMT "WSMT" /* Windows SMM Security Mitigations Table */ #define ACPI_SIG_XENV "XENV" /* Xen Environment table */ #define ACPI_SIG_XXXX "XXXX" /* Intermediate AML header for ASL/ASL+ converter */ @@ -673,10 +673,10 @@ struct acpi_table_wpbt { /******************************************************************************* * - * WSMT - Windows SMM Security Migrations Table + * WSMT - Windows SMM Security Mitigations Table * Version 1 * - * Conforms to "Windows SMM Security Migrations Table", + * Conforms to "Windows SMM Security Mitigations Table", * Version 1.0, April 18, 2016 * ******************************************************************************/ diff --git a/include/acpi/acuuid.h b/include/acpi/acuuid.h index 9dd4689a39cf..9e1367b19069 100644 --- a/include/acpi/acuuid.h +++ b/include/acpi/acuuid.h @@ -57,4 +57,4 @@ #define UUID_THERMAL_EXTENSIONS "14d399cd-7a27-4b18-8fb4-7cb7b9f4e500" #define UUID_DEVICE_PROPERTIES "daffd814-6eba-4d8c-8a91-bc9bbf4aa301" -#endif /* __AUUID_H__ */ +#endif /* __ACUUID_H__ */ diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 47805172e73d..683e124ad517 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -297,6 +297,14 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx } #endif +static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg, + bool direct) +{ + if (direct || (is_percpu_thread() && cpu == smp_processor_id())) + return fn(arg); + return work_on_cpu(cpu, fn, arg); +} + /* in processor_perflib.c */ #ifdef CONFIG_CPU_FREQ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b7d3caf6f205..9f70b7807d53 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -488,6 +488,11 @@ void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ +int acpi_register_wakeup_handler( + int wake_irq, bool (*wakeup)(void *context), void *context); +void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context); + struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index b79fa9bb7359..3049a6c06d9e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -47,7 +47,8 @@ * Directory entry modification events - reported only to directory * where entry is modified and not to a watching parent. */ -#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE) +#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ + FAN_DIR_MODIFY) /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a2d5d175d3c1..5ab28f6c7d26 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -18,39 +18,63 @@ #include <linux/bug.h> /* - * Notify this @dir inode about a change in the directory entry @dentry. + * Notify this @dir inode about a change in a child directory entry. + * The directory entry may have turned positive or negative or its inode may + * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode. */ -static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry, - __u32 mask) +static inline void fsnotify_name(struct inode *dir, __u32 mask, + struct inode *child, + const struct qstr *name, u32 cookie) { - return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &dentry->d_name, 0); + fsnotify(dir, mask, child, FSNOTIFY_EVENT_INODE, name, cookie); + /* + * Send another flavor of the event without child inode data and + * without the specific event type (e.g. FS_CREATE|FS_IS_DIR). + * The name is relative to the dir inode the event is reported to. + */ + fsnotify(dir, FS_DIR_MODIFY, dir, FSNOTIFY_EVENT_INODE, name, 0); } -/* Notify this dentry's parent about a child's events. */ -static inline int fsnotify_parent(const struct path *path, - struct dentry *dentry, __u32 mask) +static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, + __u32 mask) { - if (!dentry) - dentry = path->dentry; - - return __fsnotify_parent(path, dentry, mask); + fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0); } /* - * Simple wrapper to consolidate calls fsnotify_parent()/fsnotify() when - * an event is on a path. + * Simple wrappers to consolidate calls fsnotify_parent()/fsnotify() when + * an event is on a file/dentry. */ -static inline int fsnotify_path(struct inode *inode, const struct path *path, - __u32 mask) +static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) +{ + struct inode *inode = d_inode(dentry); + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_parent(dentry, mask, inode, FSNOTIFY_EVENT_INODE); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); +} + +static inline int fsnotify_file(struct file *file, __u32 mask) { - int ret = fsnotify_parent(path, NULL, mask); + const struct path *path = &file->f_path; + struct inode *inode = file_inode(file); + int ret; + + if (file->f_mode & FMODE_NONOTIFY) + return 0; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + ret = fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); if (ret) return ret; + return fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } @@ -58,19 +82,16 @@ static inline int fsnotify_path(struct inode *inode, const struct path *path, static inline int fsnotify_perm(struct file *file, int mask) { int ret; - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; - if (file->f_mode & FMODE_NONOTIFY) - return 0; if (!(mask & (MAY_READ | MAY_OPEN))) return 0; + if (mask & MAY_OPEN) { fsnotify_mask = FS_OPEN_PERM; if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_path(inode, path, FS_OPEN_EXEC_PERM); + ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); if (ret) return ret; @@ -79,10 +100,7 @@ static inline int fsnotify_perm(struct file *file, int mask) fsnotify_mask = FS_ACCESS_PERM; } - if (S_ISDIR(inode->i_mode)) - fsnotify_mask |= FS_ISDIR; - - return fsnotify_path(inode, path, fsnotify_mask); + return fsnotify_file(file, fsnotify_mask); } /* @@ -122,10 +140,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, - fs_cookie); - fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, - fs_cookie); + fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie); + fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie); if (target) fsnotify_link_count(target); @@ -180,12 +196,13 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ -static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) +static inline void fsnotify_link(struct inode *dir, struct inode *inode, + struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); - fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, &new_dentry->d_name, 0); + fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } /* @@ -229,15 +246,7 @@ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_access(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_ACCESS; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_ACCESS); } /* @@ -245,15 +254,7 @@ static inline void fsnotify_access(struct file *file) */ static inline void fsnotify_modify(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - __u32 mask = FS_MODIFY; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, FS_MODIFY); } /* @@ -261,16 +262,12 @@ static inline void fsnotify_modify(struct file *file) */ static inline void fsnotify_open(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); __u32 mask = FS_OPEN; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -278,16 +275,10 @@ static inline void fsnotify_open(struct file *file) */ static inline void fsnotify_close(struct file *file) { - const struct path *path = &file->f_path; - struct inode *inode = file_inode(file); - fmode_t mode = file->f_mode; - __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; + __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : + FS_CLOSE_NOWRITE; - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - if (!(file->f_mode & FMODE_NONOTIFY)) - fsnotify_path(inode, path, mask); + fsnotify_file(file, mask); } /* @@ -295,14 +286,7 @@ static inline void fsnotify_close(struct file *file) */ static inline void fsnotify_xattr(struct dentry *dentry) { - struct inode *inode = dentry->d_inode; - __u32 mask = FS_ATTRIB; - - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + fsnotify_dentry(dentry, FS_ATTRIB); } /* @@ -311,7 +295,6 @@ static inline void fsnotify_xattr(struct dentry *dentry) */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { - struct inode *inode = dentry->d_inode; __u32 mask = 0; if (ia_valid & ATTR_UID) @@ -332,13 +315,8 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; - if (mask) { - if (S_ISDIR(inode->i_mode)) - mask |= FS_ISDIR; - - fsnotify_parent(NULL, dentry, mask); - fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - } + if (mask) + fsnotify_dentry(dentry, mask); } #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 1915bdba2fad..f0c506405b54 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -47,18 +47,18 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +#define FS_DIR_MODIFY 0x00080000 /* Directory entry was modified */ #define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */ -#define FS_ISDIR 0x40000000 /* event occurred against dir */ -#define FS_IN_ONESHOT 0x80000000 /* only send event once */ - -#define FS_DN_RENAME 0x10000000 /* file renamed */ -#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ - /* This inode cares about things that happen to its children. Always set for * dnotify and inotify. */ #define FS_EVENT_ON_CHILD 0x08000000 +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ +#define FS_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) /* @@ -67,7 +67,8 @@ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event * when a directory entry inside a child subdir changes. */ -#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE) +#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | \ + FS_DIR_MODIFY) #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) @@ -133,8 +134,7 @@ struct fsnotify_ops { */ struct fsnotify_event { struct list_head list; - /* inode may ONLY be dereferenced during handle_event(). */ - struct inode *inode; /* either the inode the event happened to or its parent */ + unsigned long objectid; /* identifier for queue merges */ }; /* @@ -213,10 +213,36 @@ struct fsnotify_group { }; }; -/* when calling fsnotify tell it if the data is a path or inode */ -#define FSNOTIFY_EVENT_NONE 0 -#define FSNOTIFY_EVENT_PATH 1 -#define FSNOTIFY_EVENT_INODE 2 +/* When calling fsnotify tell it if the data is a path or inode */ +enum fsnotify_data_type { + FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_PATH, + FSNOTIFY_EVENT_INODE, +}; + +static inline const struct inode *fsnotify_data_inode(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_INODE: + return data; + case FSNOTIFY_EVENT_PATH: + return d_inode(((const struct path *)data)->dentry); + default: + return NULL; + } +} + +static inline const struct path *fsnotify_data_path(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_PATH: + return data; + default: + return NULL; + } +} enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, @@ -351,9 +377,10 @@ struct fsnotify_mark { /* called from the vfs helpers */ /* main fsnotify call to send events */ -extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie); -extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask); +extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie); +extern int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, + int data_type); extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); @@ -500,21 +527,22 @@ extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); static inline void fsnotify_init_event(struct fsnotify_event *event, - struct inode *inode) + unsigned long objectid) { INIT_LIST_HEAD(&event->list); - event->inode = inode; + event->objectid = objectid; } #else -static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, - const struct qstr *name, u32 cookie) +static inline int fsnotify(struct inode *to_tell, __u32 mask, const void *data, + int data_type, const struct qstr *name, u32 cookie) { return 0; } -static inline int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask) +static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, + const void *data, int data_type) { return 0; } diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index ad19e68e1fc3..ae94dcebd936 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -248,7 +248,7 @@ struct charger_manager { u64 charging_end_time; }; -#ifdef CONFIG_CHARGER_MANAGER +#if IS_ENABLED(CONFIG_CHARGER_MANAGER) extern void cm_notify_event(struct power_supply *psy, enum cm_event_types type, char *msg); #else diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 28b1a2b4459e..3a2ac7072dbb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -47,8 +47,8 @@ * A. IP checksum related features * * Drivers advertise checksum offload capabilities in the features of a device. - * From the stack's point of view these are capabilities offered by the driver, - * a driver typically only advertises features that it is capable of offloading + * From the stack's point of view these are capabilities offered by the driver. + * A driver typically only advertises features that it is capable of offloading * to its device. * * The checksum related features are: @@ -63,7 +63,7 @@ * TCP or UDP packets over IPv4. These are specifically * unencapsulated packets of the form IPv4|TCP or * IPv4|UDP where the Protocol field in the IPv4 header - * is TCP or UDP. The IPv4 header may contain IP options + * is TCP or UDP. The IPv4 header may contain IP options. * This feature cannot be set in features for a device * with NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). @@ -79,13 +79,13 @@ * DEPRECATED (see below). * * NETIF_F_RXCSUM - Driver (device) performs receive checksum offload. - * This flag is used only used to disable the RX checksum + * This flag is only used to disable the RX checksum * feature for a device. The stack will accept receive * checksum indication in packets received on a device * regardless of whether NETIF_F_RXCSUM is set. * * B. Checksumming of received packets by device. Indication of checksum - * verification is in set skb->ip_summed. Possible values are: + * verification is set in skb->ip_summed. Possible values are: * * CHECKSUM_NONE: * @@ -115,16 +115,16 @@ * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), - * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * GRE (checksum flag is set) and TCP, skb->csum_level would be set to * two. If the device were only able to verify the UDP checksum and not - * GRE, either because it doesn't support GRE checksum of because GRE + * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * * CHECKSUM_COMPLETE: * * This is the most generic way. The device supplied checksum of the _whole_ - * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * packet as seen by netif_rx() and fills in skb->csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: @@ -153,8 +153,8 @@ * from skb->csum_start up to the end, and to record/write the checksum at * offset skb->csum_start + skb->csum_offset. A driver may verify that the * csum_start and csum_offset values are valid values given the length and - * offset of the packet, however they should not attempt to validate that the - * checksum refers to a legitimate transport layer checksum-- it is the + * offset of the packet, but it should not attempt to validate that the + * checksum refers to a legitimate transport layer checksum -- it is the * purview of the stack to validate that csum_start and csum_offset are set * correctly. * @@ -178,18 +178,18 @@ * * CHECKSUM_UNNECESSARY: * - * This has the same meaning on as CHECKSUM_NONE for checksum offload on + * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * * CHECKSUM_COMPLETE: * Not used in checksum output. If a driver observes a packet with this value - * set in skbuff, if should treat as CHECKSUM_NONE being set. + * set in skbuff, it should treat the packet as if CHECKSUM_NONE were set. * * D. Non-IP checksum (CRC) offloads * * NETIF_F_SCTP_CRC - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack - * will set set csum_start and csum_offset accordingly, set ip_summed to + * will set csum_start and csum_offset accordingly, set ip_summed to * CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in * the skbuff that the CHECKSUM_PARTIAL refers to CRC32c. * A driver that supports both IP checksum offload and SCTP CRC32c offload @@ -200,10 +200,10 @@ * NETIF_F_FCOE_CRC - This feature indicates that a device is capable of * offloading the FCOE CRC in a packet. To perform this offload the stack * will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset - * accordingly. Note the there is no indication in the skbuff that the - * CHECKSUM_PARTIAL refers to an FCOE checksum, a driver that supports + * accordingly. Note that there is no indication in the skbuff that the + * CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports * both IP checksum offload and FCOE CRC offload must verify which offload - * is configured for a packet presumably by inspecting packet headers. + * is configured for a packet, presumably by inspecting packet headers. * * E. Checksumming on output with GSO. * @@ -211,9 +211,9 @@ * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the * gso_type is SKB_GSO_TCPV4 or SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded - * with GSO then ip_summed is CHECKSUM_PARTIAL, csum_start and csum_offset - * are set to refer to the outermost checksum being offload (two offloaded - * checksums are possible with UDP encapsulation). + * with GSO then ip_summed is CHECKSUM_PARTIAL, and both csum_start and + * csum_offset are set to refer to the outermost checksum being offloaded + * (two offloaded checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ diff --git a/include/net/9p/client.h b/include/net/9p/client.h index acc60d8a3b3b..dd5b5bd781a4 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -73,7 +73,6 @@ enum p9_req_status_t { * @wq: wait_queue for the client to block on for this request * @tc: the request fcall structure * @rc: the response fcall structure - * @aux: transport specific data (provided for trans_fd migration) * @req_list: link for higher level objects to chain requests */ struct p9_req_t { @@ -83,7 +82,6 @@ struct p9_req_t { wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; - void *aux; struct list_head req_list; }; @@ -200,6 +198,8 @@ int p9_client_fsync(struct p9_fid *fid, int datasync); int p9_client_remove(struct p9_fid *fid); int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags); int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err); +int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, + int *err); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset); int p9dirent_read(struct p9_client *clnt, char *buf, int len, diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index b9effa6f8503..a88c7c6d0692 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -24,10 +24,11 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +#define FAN_DIR_MODIFY 0x00080000 /* Directory entry was modified */ -#define FAN_ONDIR 0x40000000 /* event occurred against dir */ +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ /* helper events */ #define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ @@ -116,6 +117,7 @@ struct fanotify_event_metadata { }; #define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 /* Variable length info record following event metadata */ struct fanotify_event_info_header { @@ -124,7 +126,12 @@ struct fanotify_event_info_header { __u16 len; }; -/* Unique file identifier info record */ +/* + * Unique file identifier info record. This is used both for + * FAN_EVENT_INFO_TYPE_FID records and for FAN_EVENT_INFO_TYPE_DFID_NAME + * records. For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null + * terminated name immediately after the file handle. + */ struct fanotify_event_info_fid { struct fanotify_event_info_header hdr; __kernel_fsid_t fsid; diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index f0d243318452..3596448bfdab 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -160,23 +160,14 @@ static int audit_mark_handle_event(struct fsnotify_group *group, { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); struct audit_fsnotify_mark *audit_mark; - const struct inode *inode = NULL; + const struct inode *inode = fsnotify_data_inode(data, data_type); audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark); BUG_ON(group != audit_fsnotify_group); - switch (data_type) { - case (FSNOTIFY_EVENT_PATH): - inode = ((const struct path *)data)->dentry->d_inode; - break; - case (FSNOTIFY_EVENT_INODE): - inode = (const struct inode *)data; - break; - default: - BUG(); + if (WARN_ON(!inode)) return 0; - } if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) { if (audit_compare_dname_path(dname, audit_mark->path, AUDIT_NAME_FULL)) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 8a8fd732ff6d..e09c551ae52d 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -471,25 +471,13 @@ static int audit_watch_handle_event(struct fsnotify_group *group, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - const struct inode *inode; + const struct inode *inode = fsnotify_data_inode(data, data_type); struct audit_parent *parent; parent = container_of(inode_mark, struct audit_parent, mark); BUG_ON(group != audit_watch_group); - - switch (data_type) { - case (FSNOTIFY_EVENT_PATH): - inode = d_backing_inode(((const struct path *)data)->dentry); - break; - case (FSNOTIFY_EVENT_INODE): - inode = (const struct inode *)data; - break; - default: - BUG(); - inode = NULL; - break; - } + WARN_ON(!inode); if (mask & (FS_CREATE|FS_MOVED_TO) && inode) audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 6dbeedb7354c..86aba8706b16 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -678,7 +678,7 @@ static int load_image_and_restore(void) error = swsusp_read(&flags); swsusp_close(FMODE_READ); if (!error) - hibernation_restore(flags & SF_PLATFORM_MODE); + error = hibernation_restore(flags & SF_PLATFORM_MODE); pr_err("Failed to load image, recovering.\n"); swsusp_free(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 69b7a8aeca3b..40f86ec4ab30 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -535,6 +535,13 @@ static ssize_t pm_debug_messages_store(struct kobject *kobj, power_attr(pm_debug_messages); +static int __init pm_debug_messages_setup(char *str) +{ + pm_debug_messages_on = true; + return 1; +} +__setup("pm_debug_messages", pm_debug_messages_setup); + /** * __pm_pr_dbg - Print a suspend debug message to the kernel log. * @defer: Whether or not to use printk_deferred() to print the message. diff --git a/net/9p/client.c b/net/9p/client.c index 1d48afc7033c..fc1f3635e5dd 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1549,82 +1549,94 @@ EXPORT_SYMBOL(p9_client_unlinkat); int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) { - struct p9_client *clnt = fid->clnt; - struct p9_req_t *req; int total = 0; *err = 0; + while (iov_iter_count(to)) { + int count; + + count = p9_client_read_once(fid, offset, to, err); + if (!count || *err) + break; + offset += count; + total += count; + } + return total; +} +EXPORT_SYMBOL(p9_client_read); + +int +p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, + int *err) +{ + struct p9_client *clnt = fid->clnt; + struct p9_req_t *req; + int count = iov_iter_count(to); + int rsize, non_zc = 0; + char *dataptr; + + *err = 0; p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (unsigned long long) offset, (int)iov_iter_count(to)); - while (iov_iter_count(to)) { - int count = iov_iter_count(to); - int rsize, non_zc = 0; - char *dataptr; + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; + if (count < rsize) + rsize = count; - if (count < rsize) - rsize = count; + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize, + 0, 11, "dqd", fid->fid, + offset, rsize); + } else { + non_zc = 1; + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, + rsize); + } + if (IS_ERR(req)) { + *err = PTR_ERR(req); + return 0; + } - /* Don't bother zerocopy for small IO (< 1024) */ - if (clnt->trans_mod->zc_request && rsize > 1024) { - /* - * response header len is 11 - * PDU Header(7) + IO Size (4) - */ - req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize, - 0, 11, "dqd", fid->fid, - offset, rsize); - } else { - non_zc = 1; - req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); - } - if (IS_ERR(req)) { - *err = PTR_ERR(req); - break; - } + *err = p9pdu_readf(&req->rc, clnt->proto_version, + "D", &count, &dataptr); + if (*err) { + trace_9p_protocol_dump(clnt, &req->rc); + p9_tag_remove(clnt, req); + return 0; + } + if (rsize < count) { + pr_err("bogus RREAD count (%d > %d)\n", count, rsize); + count = rsize; + } - *err = p9pdu_readf(&req->rc, clnt->proto_version, - "D", &count, &dataptr); - if (*err) { - trace_9p_protocol_dump(clnt, &req->rc); - p9_tag_remove(clnt, req); - break; - } - if (rsize < count) { - pr_err("bogus RREAD count (%d > %d)\n", count, rsize); - count = rsize; - } + p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + if (!count) { + p9_tag_remove(clnt, req); + return 0; + } - p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (!count) { - p9_tag_remove(clnt, req); - break; - } + if (non_zc) { + int n = copy_to_iter(dataptr, count, to); - if (non_zc) { - int n = copy_to_iter(dataptr, count, to); - total += n; - offset += n; - if (n != count) { - *err = -EFAULT; - p9_tag_remove(clnt, req); - break; - } - } else { - iov_iter_advance(to, count); - total += count; - offset += count; + if (n != count) { + *err = -EFAULT; + p9_tag_remove(clnt, req); + return n; } - p9_tag_remove(clnt, req); + } else { + iov_iter_advance(to, count); } - return total; + p9_tag_remove(clnt, req); + return count; } -EXPORT_SYMBOL(p9_client_read); +EXPORT_SYMBOL(p9_client_read_once); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5bf8d22a47ec..39d37d0ef575 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1065,11 +1065,12 @@ static void neigh_timer_handler(struct timer_list *t) neigh->updated = jiffies; atomic_set(&neigh->probes, 0); notify = 1; - next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/100); } } else { /* NUD_PROBE|NUD_INCOMPLETE */ - next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); } if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && @@ -1125,7 +1126,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), - HZ/2); + HZ/100); neigh_add_timer(neigh, next); immediate_probe = true; } else { @@ -1427,7 +1428,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->nud_state = NUD_INCOMPLETE; atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, - jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); + jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/100)); } EXPORT_SYMBOL(__neigh_set_probe_once); diff --git a/net/core/sock.c b/net/core/sock.c index da32d9b6d09f..ce1d8dce9b7a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -574,7 +574,7 @@ static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) /* Sorry... */ ret = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_RAW)) + if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5390ff541658..e94eb1aac602 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1338,7 +1338,7 @@ static void dsa_hw_port_list_free(struct list_head *hw_port_list) } /* Make the hardware datapath to/from @dev limited to a common MTU */ -void dsa_bridge_mtu_normalization(struct dsa_port *dp) +static void dsa_bridge_mtu_normalization(struct dsa_port *dp) { struct list_head hw_port_list; struct dsa_switch_tree *dst; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a11fd4d67832..24e319dfb510 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1357,7 +1357,7 @@ retry: regen_advance = idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; /* recalculate max_desync_factor each time and update * idev->desync_factor if it's larger @@ -3298,6 +3298,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (netif_is_l3_master(idev->dev)) return; + /* no link local addresses on devices flagged as slaves */ + if (idev->dev->flags & IFF_SLAVE) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); switch (idev->cnf.addr_gen_mode) { @@ -4117,7 +4121,8 @@ static void addrconf_dad_work(struct work_struct *w) ifp->dad_probes--; addrconf_mod_dad_work(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), + HZ/100)); spin_unlock(&ifp->lock); write_unlock_bh(&idev->lock); @@ -4523,7 +4528,7 @@ restart: !(ifp->flags&IFA_F_TENTATIVE)) { unsigned long regen_advance = ifp->idev->cnf.regen_max_retry * ifp->idev->cnf.dad_transmits * - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ; + max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ; if (age >= ifp->prefered_lft - regen_advance) { struct inet6_ifaddr *ifpub = ifp->ifpub; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6ffa153e5166..1ecd4e9b0bdf 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1359,8 +1359,8 @@ skip_defrtr: if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) { rtime = (rtime*HZ)/1000; - if (rtime < HZ/10) - rtime = HZ/10; + if (rtime < HZ/100) + rtime = HZ/100; NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c index dc4f20e23bf7..d38b476fc7f2 100644 --- a/net/ipv6/rpl.c +++ b/net/ipv6/rpl.c @@ -48,7 +48,7 @@ void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr, outhdr->cmpri = 0; outhdr->cmpre = 0; - for (i = 0; i <= n; i++) + for (i = 0; i < n; i++) ipv6_rpl_addr_decompress(&outhdr->rpl_segaddr[i], daddr, ipv6_rpl_segdata_pos(inhdr, i), inhdr->cmpri); @@ -66,7 +66,7 @@ static unsigned char ipv6_rpl_srh_calc_cmpri(const struct ipv6_rpl_sr_hdr *inhdr int i; for (plen = 0; plen < sizeof(*daddr); plen++) { - for (i = 0; i <= n; i++) { + for (i = 0; i < n; i++) { if (daddr->s6_addr[plen] != inhdr->rpl_segaddr[i].s6_addr[plen]) return plen; @@ -114,7 +114,7 @@ void ipv6_rpl_srh_compress(struct ipv6_rpl_sr_hdr *outhdr, outhdr->cmpri = cmpri; outhdr->cmpre = cmpre; - for (i = 0; i <= n; i++) + for (i = 0; i < n; i++) ipv6_rpl_addr_compress(ipv6_rpl_segdata_pos(outhdr, i), &inhdr->rpl_segaddr[i], cmpri); diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index a49ddc6cd020..c3ececd7cfc1 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -210,7 +210,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; struct rpl_lwt *rlwt; - int err = -EINVAL; + int err; rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index bd220ee4aac9..faf57585b892 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -4,6 +4,8 @@ * Copyright (c) 2017 - 2019, Intel Corporation. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/kernel.h> #include <net/tcp.h> #include <net/mptcp.h> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 064639f72487..977d9c8b1453 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -3,6 +3,8 @@ * * Copyright (c) 2019, Intel Corporation. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/kernel.h> #include <net/tcp.h> #include <net/mptcp.h> diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a0ce7f324499..86d61ab34c7c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -4,6 +4,8 @@ * Copyright (c) 2020, Red Hat, Inc. */ +#define pr_fmt(fmt) "MPTCP: " fmt + #include <linux/inet.h> #include <linux/kernel.h> #include <net/tcp.h> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1833bc1f4a43..939a5045181a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -57,10 +57,43 @@ static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk) return msk->first && !sk_is_mptcp(msk->first); } +static struct socket *mptcp_is_tcpsk(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sock->sk != sk) + return NULL; + + if (unlikely(sk->sk_prot == &tcp_prot)) { + /* we are being invoked after mptcp_accept() has + * accepted a non-mp-capable flow: sk is a tcp_sk, + * not an mptcp one. + * + * Hand the socket over to tcp so all further socket ops + * bypass mptcp. + */ + sock->ops = &inet_stream_ops; + return sock; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { + sock->ops = &inet6_stream_ops; + return sock; +#endif + } + + return NULL; +} + static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) { + struct socket *sock; + sock_owned_by_me((const struct sock *)msk); + sock = mptcp_is_tcpsk((struct sock *)msk); + if (unlikely(sock)) + return sock; + if (likely(!__mptcp_needs_tcp_fallback(msk))) return NULL; @@ -84,6 +117,10 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) struct socket *ssock; int err; + ssock = __mptcp_tcp_fallback(msk); + if (unlikely(ssock)) + return ssock; + ssock = __mptcp_nmpc_socket(msk); if (ssock) goto set_state; @@ -121,6 +158,27 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, MPTCP_SKB_CB(skb)->offset = offset; } +/* both sockets must be locked */ +static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk, + struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u64 dsn = mptcp_subflow_get_mapped_dsn(subflow); + + /* revalidate data sequence number. + * + * mptcp_subflow_data_available() is usually called + * without msk lock. Its unlikely (but possible) + * that msk->ack_seq has been advanced since the last + * call found in-sequence data. + */ + if (likely(dsn == msk->ack_seq)) + return true; + + subflow->data_avail = 0; + return mptcp_subflow_data_available(ssk); +} + static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sock *ssk, unsigned int *bytes) @@ -132,6 +190,11 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct tcp_sock *tp; bool done = false; + if (!mptcp_subflow_dsn_valid(msk, ssk)) { + *bytes = 0; + return false; + } + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); @@ -290,6 +353,15 @@ void mptcp_data_acked(struct sock *sk) sock_hold(sk); } +void mptcp_subflow_eof(struct sock *sk) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + if (!test_and_set_bit(MPTCP_WORK_EOF, &msk->flags) && + schedule_work(&msk->work)) + sock_hold(sk); +} + static void mptcp_stop_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -994,6 +1066,27 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } +static void mptcp_check_for_eof(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + int receivers = 0; + + mptcp_for_each_subflow(msk, subflow) + receivers += !subflow->rx_eof; + + if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + /* hopefully temporary hack: propagate shutdown status + * to msk, when all subflows agree on it + */ + sk->sk_shutdown |= RCV_SHUTDOWN; + + smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ + set_bit(MPTCP_DATA_READY, &msk->flags); + sk->sk_data_ready(sk); + } +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -1010,6 +1103,9 @@ static void mptcp_worker(struct work_struct *work) __mptcp_flush_join_list(msk); __mptcp_move_skbs(msk); + if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) + mptcp_check_for_eof(msk); + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; @@ -1752,7 +1848,9 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, msk = mptcp_sk(sk); lock_sock(sk); - ssock = __mptcp_nmpc_socket(msk); + ssock = __mptcp_tcp_fallback(msk); + if (!ssock) + ssock = __mptcp_nmpc_socket(msk); if (ssock) { mask = ssock->ops->poll(file, ssock, wait); release_sock(sk); @@ -1762,9 +1860,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, release_sock(sk); sock_poll_wait(file, sock, wait); lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - return ssock->ops->poll(file, ssock, NULL); if (test_bit(MPTCP_DATA_READY, &msk->flags)) mask = EPOLLIN | EPOLLRDNORM; @@ -1783,11 +1878,17 @@ static int mptcp_shutdown(struct socket *sock, int how) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; + struct socket *ssock; int ret = 0; pr_debug("sk=%p, how=%d", msk, how); lock_sock(sock->sk); + ssock = __mptcp_tcp_fallback(msk); + if (ssock) { + release_sock(sock->sk); + return inet_shutdown(ssock, how); + } if (how == SHUT_WR || how == SHUT_RDWR) inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f733c5425552..67448002a2d7 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -89,6 +89,7 @@ #define MPTCP_DATA_READY 0 #define MPTCP_SEND_SPACE 1 #define MPTCP_WORK_RTX 2 +#define MPTCP_WORK_EOF 3 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { @@ -339,6 +340,7 @@ void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); +void mptcp_subflow_eof(struct sock *sk); int mptcp_token_new_request(struct request_sock *req); void mptcp_token_destroy_request(u32 token); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index b5180c81588e..50a8bea987c6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -994,8 +994,7 @@ static void subflow_state_change(struct sock *sk) if (!(parent->sk_shutdown & RCV_SHUTDOWN) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; - parent->sk_shutdown |= RCV_SHUTDOWN; - __subflow_state_change(parent); + mptcp_subflow_eof(parent); } } diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 129a5ad1bc35..33352dd99d4d 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -40,7 +40,7 @@ static int token_used __read_mostly; /** * mptcp_token_new_request - create new key/idsn/token for subflow_request - * @req - the request socket + * @req: the request socket * * This function is called when a new mptcp connection is coming in. * @@ -80,7 +80,7 @@ int mptcp_token_new_request(struct request_sock *req) /** * mptcp_token_new_connect - create new key/idsn/token for subflow - * @sk - the socket that will initiate a connection + * @sk: the socket that will initiate a connection * * This function is called when a new outgoing mptcp connection is * initiated. @@ -125,6 +125,7 @@ int mptcp_token_new_connect(struct sock *sk) /** * mptcp_token_new_accept - insert token for later processing * @token: the token to insert to the tree + * @conn: the just cloned socket linked to the new connection * * Called when a SYN packet creates a new logical connection, i.e. * is not a join request. @@ -169,7 +170,7 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token) /** * mptcp_token_destroy_request - remove mptcp connection/token - * @token - token of mptcp connection to remove + * @token: token of mptcp connection to remove * * Remove not-yet-fully-established incoming connection identified * by @token. @@ -183,7 +184,7 @@ void mptcp_token_destroy_request(u32 token) /** * mptcp_token_destroy - remove mptcp connection/token - * @token - token of mptcp connection to remove + * @token: token of mptcp connection to remove * * Remove the connection identified by @token. */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index fd8a01ca7a2d..2398d7238300 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -462,12 +462,14 @@ static void flow_table_copy_flows(struct table_instance *old, struct hlist_head *head = &old->buckets[i]; if (ufid) - hlist_for_each_entry(flow, head, - ufid_table.node[old_ver]) + hlist_for_each_entry_rcu(flow, head, + ufid_table.node[old_ver], + lockdep_ovsl_is_held()) ufid_table_instance_insert(new, flow); else - hlist_for_each_entry(flow, head, - flow_table.node[old_ver]) + hlist_for_each_entry_rcu(flow, head, + flow_table.node[old_ver], + lockdep_ovsl_is_held()) table_instance_insert(new, flow); } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9904299424a1..61e95029c18f 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -11,6 +11,7 @@ #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/slab.h> +#include <linux/refcount.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> @@ -26,9 +27,12 @@ #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ +struct tcindex_data; + struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; + struct tcindex_data *p; struct rcu_work rwork; }; @@ -49,6 +53,7 @@ struct tcindex_data { u32 hash; /* hash table size; 0 if undefined */ u32 alloc_hash; /* allocated size */ u32 fall_through; /* 0: only classify if explicit match */ + refcount_t refcnt; /* a temporary refcnt for perfect hash */ struct rcu_work rwork; }; @@ -57,6 +62,20 @@ static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) return tcf_exts_has_actions(&r->exts) || r->res.classid; } +static void tcindex_data_get(struct tcindex_data *p) +{ + refcount_inc(&p->refcnt); +} + +static void tcindex_data_put(struct tcindex_data *p) +{ + if (refcount_dec_and_test(&p->refcnt)) { + kfree(p->perfect); + kfree(p->h); + kfree(p); + } +} + static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, u16 key) { @@ -132,6 +151,7 @@ static int tcindex_init(struct tcf_proto *tp) p->mask = 0xffff; p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; + refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */ rcu_assign_pointer(tp->root, p); return 0; @@ -141,6 +161,7 @@ static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) { tcf_exts_destroy(&r->exts); tcf_exts_put_net(&r->exts); + tcindex_data_put(r->p); } static void tcindex_destroy_rexts_work(struct work_struct *work) @@ -212,6 +233,8 @@ found: else __tcindex_destroy_fexts(f); } else { + tcindex_data_get(p); + if (tcf_exts_get_net(&r->exts)) tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); else @@ -228,9 +251,7 @@ static void tcindex_destroy_work(struct work_struct *work) struct tcindex_data, rwork); - kfree(p->perfect); - kfree(p->h); - kfree(p); + tcindex_data_put(p); } static inline int @@ -248,9 +269,11 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { }; static int tcindex_filter_result_init(struct tcindex_filter_result *r, + struct tcindex_data *p, struct net *net) { memset(r, 0, sizeof(*r)); + r->p = p; return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } @@ -290,6 +313,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; + cp->perfect[i].p = cp; } return 0; @@ -334,6 +358,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->alloc_hash = p->alloc_hash; cp->fall_through = p->fall_through; cp->tp = tp; + refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */ if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -366,7 +391,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } cp->h = p->h; - err = tcindex_filter_result_init(&new_filter_result, net); + err = tcindex_filter_result_init(&new_filter_result, cp, net); if (err < 0) goto errout_alloc; if (old_r) @@ -434,7 +459,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, goto errout_alloc; f->key = handle; f->next = NULL; - err = tcindex_filter_result_init(&f->result, net); + err = tcindex_filter_result_init(&f->result, cp, net); if (err < 0) { kfree(f); goto errout_alloc; @@ -447,7 +472,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (old_r && old_r != r) { - err = tcindex_filter_result_init(old_r, net); + err = tcindex_filter_result_init(old_r, cp, net); if (err < 0) { kfree(f); goto errout_alloc; @@ -571,6 +596,14 @@ static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, for (i = 0; i < p->hash; i++) { struct tcindex_filter_result *r = p->perfect + i; + /* tcf_queue_work() does not guarantee the ordering we + * want, so we have to take this refcnt temporarily to + * ensure 'p' is freed after all tcindex_filter_result + * here. Imperfect hash does not need this, because it + * uses linked lists rather than an array. + */ + tcindex_data_get(p); + tcf_unbind_filter(tp, &r->res); if (tcf_exts_get_net(&r->exts)) tcf_queue_work(&r->rwork, |