diff options
125 files changed, 2476 insertions, 2697 deletions
diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt index 0bda229a6171..3899d6a557c1 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt @@ -1,5 +1,20 @@ Freescale FlexTimer Module (FTM) PWM controller +The same FTM PWM device can have a different endianness on different SoCs. The +device tree provides a property to describing this so that an operating system +device driver can handle all variants of the device. Refer to the table below +for the endianness of the FTM PWM block as integrated into the existing SoCs: + + SoC | FTM-PWM endianness + --------+------------------- + Vybrid | LE + LS1 | BE + LS2 | LE + +Please see ../regmap/regmap.txt for more detail about how to specify endian +modes in device tree. + + Required properties: - compatible: Should be "fsl,vf610-ftm-pwm". - reg: Physical base address and length of the controller's registers @@ -16,7 +31,8 @@ Required properties: - pinctrl-names: Must contain a "default" entry. - pinctrl-NNN: One property must exist for each entry in pinctrl-names. See pinctrl/pinctrl-bindings.txt for details of the property values. - +- big-endian: Boolean property, required if the FTM PWM registers use a big- + endian rather than little-endian layout. Example: @@ -32,4 +48,5 @@ pwm0: pwm@40038000 { <&clks VF610_CLK_FTM0_EXT_FIX_EN>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pwm0_1>; + big-endian; }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt index d47d15a6a298..b8be3d09ee26 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt @@ -7,8 +7,8 @@ Required properties: "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC - reg: physical base address and length of the controller's registers - clocks: phandle and clock specifier of the PWM reference clock - - #pwm-cells: should be 2. See pwm.txt in this directory for a - description of the cell format. + - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory + for a description of the cell format. Example: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7dbe5ec9d9cd..988160a4ad31 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3465,6 +3465,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. e.g. base its process migration decisions on it. Default is on. + topology_updates= [KNL, PPC, NUMA] + Format: {off} + Specify if the kernel should ignore (off) + topology updates sent by the hypervisor to this + LPAR. + tp720= [HW,PS2] tpm_suspend_pcr=[HW,TPM] diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9596b0ab108d..fe44b2494609 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select BUILDTIME_EXTABLE_SORT + select COMMON_CLK select CLONE_BACKWARDS # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev select DEVTMPFS if !INITRAMFS_SOURCE="" @@ -73,9 +74,6 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y -config NO_DMA - def_bool n - source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -354,7 +352,7 @@ config ARC_CURR_IN_REG kernel mode. This saves memory access for each such access -config ARC_MISALIGN_ACCESS +config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 8c0b1aa56f7e..10bc3d4e8a44 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -25,7 +25,6 @@ ifdef CONFIG_ARC_CURR_IN_REG LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h endif -upto_gcc42 := $(call cc-ifversion, -le, 0402, y) upto_gcc44 := $(call cc-ifversion, -le, 0404, y) atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) atleast_gcc48 := $(call cc-ifversion, -ge, 0408, y) @@ -60,25 +59,11 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB # --build-id w/o "-marclinux". Default arc-elf32-ld is OK ldflags-$(upto_gcc44) += -marclinux -ARC_LIBGCC := -mA7 -cflags-$(CONFIG_ARC_HAS_HW_MPY) += -multcost=16 - ifndef CONFIG_ARC_HAS_HW_MPY cflags-y += -mno-mpy - -# newlib for ARC700 assumes MPY to be always present, which is generally true -# However, if someone really doesn't want MPY, we need to use the 600 ver -# which coupled with -mno-mpy will use mpy emulation -# With gcc 4.4.7, -mno-mpy is enough to make any other related adjustments, -# e.g. increased cost of MPY. With gcc 4.2.1 this had to be explicitly hinted - - ifeq ($(upto_gcc42),y) - ARC_LIBGCC := -marc600 - cflags-y += -multcost=30 - endif endif -LIBGCC := $(shell $(CC) $(ARC_LIBGCC) $(cflags-y) --print-libgcc-file-name) +LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts index 6b57475967a6..757e0c62c4f9 100644 --- a/arch/arc/boot/dts/angel4.dts +++ b/arch/arc/boot/dts/angel4.dts @@ -24,11 +24,6 @@ serial0 = &arcuart0; }; - memory { - device_type = "memory"; - reg = <0x00000000 0x10000000>; /* 256M */ - }; - fpga { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index 4f31b2eb5cdf..cfaedd9c61c9 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,18 +20,13 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug"; + bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; }; aliases { serial0 = &uart0; }; - memory { - device_type = "memory"; - reg = <0x80000000 0x10000000>; /* 256M */ - }; - fpga { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/fpga_defconfig index e283aa586934..ef4d3bc7b6c0 100644 --- a/arch/arc/configs/fpga_defconfig +++ b/arch/arc/configs/fpga_defconfig @@ -23,7 +23,6 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_FPGA_LEGACY=y -CONFIG_ARC_BOARD_ML509=y # CONFIG_ARC_HAS_RTSC is not set CONFIG_ARC_BUILTIN_DTB_NAME="angel4" CONFIG_PREEMPT=y diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig index 5276a52f6a2f..49c93011ab96 100644 --- a/arch/arc/configs/fpga_noramfs_defconfig +++ b/arch/arc/configs/fpga_noramfs_defconfig @@ -20,7 +20,6 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_FPGA_LEGACY=y -CONFIG_ARC_BOARD_ML509=y # CONFIG_ARC_HAS_RTSC is not set CONFIG_ARC_BUILTIN_DTB_NAME="angel4" CONFIG_PREEMPT=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index c01ba35a4eff..278dacf2a3f9 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -21,7 +21,6 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_FPGA_LEGACY=y -CONFIG_ARC_BOARD_ML509=y # CONFIG_ARC_IDE is not set # CONFIG_ARCTANGENT_EMAC is not set # CONFIG_ARC_HAS_RTSC is not set diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 372466b371bf..be33db8a2ee3 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -9,19 +9,16 @@ #ifndef _ASM_ARC_ARCREGS_H #define _ASM_ARC_ARCREGS_H -#ifdef __KERNEL__ - /* Build Configuration Registers */ #define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */ #define ARC_REG_CRC_BCR 0x62 -#define ARC_REG_DVFB_BCR 0x64 -#define ARC_REG_EXTARITH_BCR 0x65 #define ARC_REG_VECBASE_BCR 0x68 #define ARC_REG_PERIBASE_BCR 0x69 -#define ARC_REG_FP_BCR 0x6B /* Single-Precision FPU */ -#define ARC_REG_DPFP_BCR 0x6C /* Dbl Precision FPU */ +#define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ +#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ #define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ #define ARC_REG_TIMERS_BCR 0x75 +#define ARC_REG_AP_BCR 0x76 #define ARC_REG_ICCM_BCR 0x78 #define ARC_REG_XY_MEM_BCR 0x79 #define ARC_REG_MAC_BCR 0x7a @@ -31,6 +28,9 @@ #define ARC_REG_MIXMAX_BCR 0x7e #define ARC_REG_BARREL_BCR 0x7f #define ARC_REG_D_UNCACH_BCR 0x6A +#define ARC_REG_BPU_BCR 0xc0 +#define ARC_REG_ISA_CFG_BCR 0xc1 +#define ARC_REG_SMART_BCR 0xFF /* status32 Bits Positions */ #define STATUS_AE_BIT 5 /* Exception active */ @@ -191,14 +191,6 @@ #define PAGES_TO_KB(n_pages) ((n_pages) << (PAGE_SHIFT - 10)) #define PAGES_TO_MB(n_pages) (PAGES_TO_KB(n_pages) >> 10) -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE -/* These DPFP regs need to be saved/restored across ctx-sw */ -struct arc_fpu { - struct { - unsigned int l, h; - } aux_dpfp[2]; -}; -#endif /* *************************************************************** @@ -212,27 +204,19 @@ struct bcr_identity { #endif }; -#define EXTN_SWAP_VALID 0x1 -#define EXTN_NORM_VALID 0x2 -#define EXTN_MINMAX_VALID 0x2 -#define EXTN_BARREL_VALID 0x2 - -struct bcr_extn { +struct bcr_isa { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:20, crc:1, ext_arith:2, mul:2, barrel:2, minmax:2, - norm:2, swap:1; + unsigned int pad1:23, atomic1:1, ver:8; #else - unsigned int swap:1, norm:2, minmax:2, barrel:2, mul:2, ext_arith:2, - crc:1, pad:20; + unsigned int ver:8, atomic1:1, pad1:23; #endif }; -/* DSP Options Ref Manual */ -struct bcr_extn_mac_mul { +struct bcr_mpy { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:16, type:8, ver:8; + unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8; #else - unsigned int ver:8, type:8, pad:16; + unsigned int ver:8, type:2, cycles:2, dsp:4, x1616:8, pad:8; #endif }; @@ -251,6 +235,7 @@ struct bcr_perip { unsigned int pad:8, sz:8, pad2:8, start:8; #endif }; + struct bcr_iccm { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int base:16, pad:5, sz:3, ver:8; @@ -277,8 +262,8 @@ struct bcr_dccm { #endif }; -/* Both SP and DP FPU BCRs have same format */ -struct bcr_fp { +/* ARCompact: Both SP and DP FPU BCRs have same format */ +struct bcr_fp_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int fast:1, ver:8; #else @@ -286,6 +271,30 @@ struct bcr_fp { #endif }; +struct bcr_timer { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:15, rtsc:1, pad1:6, t1:1, t0:1, ver:8; +#else + unsigned int ver:8, t0:1, t1:1, pad1:6, rtsc:1, pad2:15; +#endif +}; + +struct bcr_bpu_arcompact { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:19, fam:1, pad:2, ent:2, ver:8; +#else + unsigned int ver:8, ent:2, pad:2, fam:1, pad2:19; +#endif +}; + +struct bcr_generic { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, ver:8; +#else + unsigned int ver:8, pad:24; +#endif +}; + /* ******************************************************************* * Generic structures to hold build configuration used at runtime @@ -299,6 +308,10 @@ struct cpuinfo_arc_cache { unsigned int sz_k:8, line_len:8, assoc:4, ver:4, alias:1, vipt:1, pad:6; }; +struct cpuinfo_arc_bpu { + unsigned int ver, full, num_cache, num_pred; +}; + struct cpuinfo_arc_ccm { unsigned int base_addr, sz; }; @@ -306,21 +319,25 @@ struct cpuinfo_arc_ccm { struct cpuinfo_arc { struct cpuinfo_arc_cache icache, dcache; struct cpuinfo_arc_mmu mmu; + struct cpuinfo_arc_bpu bpu; struct bcr_identity core; - unsigned int timers; + struct bcr_isa isa; + struct bcr_timer timers; unsigned int vec_base; unsigned int uncached_base; struct cpuinfo_arc_ccm iccm, dccm; - struct bcr_extn extn; + struct { + unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, + fpu_sp:1, fpu_dp:1, pad2:6, + debug:1, ap:1, smart:1, rtt:1, pad3:4, + pad4:8; + } extn; + struct bcr_mpy extn_mpy; struct bcr_extn_xymem extn_xymem; - struct bcr_extn_mac_mul extn_mac_mul; - struct bcr_fp fp, dpfp; }; extern struct cpuinfo_arc cpuinfo_arc700[]; #endif /* __ASEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* _ASM_ARC_ARCREGS_H */ diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 173f303a868f..067551b6920a 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -9,8 +9,6 @@ #ifndef _ASM_ARC_ATOMIC_H #define _ASM_ARC_ATOMIC_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -170,5 +168,3 @@ ATOMIC_OP(and, &=, and) #endif #endif - -#endif diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index ebc0cf3164dc..1a5bf07eefe2 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -13,8 +13,6 @@ #error only <linux/bitops.h> can be included directly #endif -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -508,6 +506,4 @@ static inline __attribute__ ((const)) int __ffs(unsigned long word) #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 5b18e94c6678..ea022d47896c 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -21,10 +21,9 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, unsigned long address); void die(const char *str, struct pt_regs *regs, unsigned long address); -#define BUG() do { \ - dump_stack(); \ - pr_warn("Kernel BUG in %s: %s: %d!\n", \ - __FILE__, __func__, __LINE__); \ +#define BUG() do { \ + pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ + dump_stack(); \ } while (0) #define HAVE_ARCH_BUG diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index b3c750979aa1..7861255da32d 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -20,7 +20,7 @@ #define CACHE_LINE_MASK (~(L1_CACHE_BYTES - 1)) /* - * ARC700 doesn't cache any access in top 256M. + * ARC700 doesn't cache any access in top 1G (0xc000_0000 to 0xFFFF_FFFF) * Ideal for wiring memory mapped peripherals as we don't need to do * explicit uncached accesses (LD.di/ST.di) hence more portable drivers */ diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 87b918585c4a..c2453ee62801 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -12,8 +12,6 @@ #ifndef _ASM_ARC_CURRENT_H #define _ASM_ARC_CURRENT_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #ifdef CONFIG_ARC_CURR_IN_REG @@ -27,6 +25,4 @@ register struct task_struct *curr_arc asm("r25"); #endif /* ! __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* _ASM_ARC_CURRENT_H */ diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index 587df8236e8b..742816f1b210 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -15,8 +15,6 @@ * -Conditionally disable interrupts (if they are not enabled, don't disable) */ -#ifdef __KERNEL__ - #include <asm/arcregs.h> /* status32 Reg bits related to Interrupt Handling */ @@ -169,6 +167,4 @@ static inline int arch_irqs_disabled(void) #endif /* __ASSEMBLY__ */ -#endif /* KERNEL */ - #endif diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h index b65fca7ffeb5..fea931634136 100644 --- a/arch/arc/include/asm/kgdb.h +++ b/arch/arc/include/asm/kgdb.h @@ -19,7 +19,7 @@ * register API yet */ #undef DBG_MAX_REG_NUM -#define GDB_MAX_REGS 39 +#define GDB_MAX_REGS 87 #define BREAK_INSTR_SIZE 2 #define CACHE_FLUSH_IS_SAFE 1 @@ -33,23 +33,27 @@ static inline void arch_kgdb_breakpoint(void) extern void kgdb_trap(struct pt_regs *regs); -enum arc700_linux_regnums { +/* This is the numbering of registers according to the GDB. See GDB's + * arc-tdep.h for details. + * + * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */ +enum arc_linux_regnums { _R0 = 0, _R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13, _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24, _R25, _R26, - _BTA = 27, - _LP_START = 28, - _LP_END = 29, - _LP_COUNT = 30, - _STATUS32 = 31, - _BLINK = 32, - _FP = 33, - __SP = 34, - _EFA = 35, - _RET = 36, - _ORIG_R8 = 37, - _STOP_PC = 38 + _FP = 27, + __SP = 28, + _R30 = 30, + _BLINK = 31, + _LP_COUNT = 60, + _STOP_PC = 64, + _RET = 64, + _LP_START = 65, + _LP_END = 66, + _STATUS32 = 67, + _ECR = 76, + _BTA = 82, }; #else diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 82588f3ba77f..210fe97464c3 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -14,12 +14,19 @@ #ifndef __ASM_ARC_PROCESSOR_H #define __ASM_ARC_PROCESSOR_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include <asm/ptrace.h> +#ifdef CONFIG_ARC_FPU_SAVE_RESTORE +/* These DPFP regs need to be saved/restored across ctx-sw */ +struct arc_fpu { + struct { + unsigned int l, h; + } aux_dpfp[2]; +}; +#endif + /* Arch specific stuff which needs to be saved per task. * However these items are not so important so as to earn a place in * struct thread_info @@ -128,6 +135,4 @@ extern unsigned int get_wchan(struct task_struct *p); */ #define TASK_UNMAPPED_BASE (TASK_SIZE / 3) -#endif /* __KERNEL__ */ - #endif /* __ASM_ARC_PROCESSOR_H */ diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index e10f8cef56a8..6e3ef5ba4f74 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -29,7 +29,6 @@ struct cpuinfo_data { }; extern int root_mountflags, end_mem; -extern int running_on_hw; void setup_processor(void); void __init setup_arch_memory(void); diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 5d06eee43ea9..3845b9e94f69 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -59,7 +59,15 @@ struct plat_smp_ops { /* TBD: stop exporting it for direct population by platform */ extern struct plat_smp_ops plat_smp_ops; -#endif /* CONFIG_SMP */ +#else /* CONFIG_SMP */ + +static inline void smp_init_cpus(void) {} +static inline const char *arc_platform_smp_cpuinfo(void) +{ + return ""; +} + +#endif /* !CONFIG_SMP */ /* * ARC700 doesn't support atomic Read-Modify-Write ops. diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h index 87676c8f1412..95822b550a18 100644 --- a/arch/arc/include/asm/string.h +++ b/arch/arc/include/asm/string.h @@ -17,8 +17,6 @@ #include <linux/types.h> -#ifdef __KERNEL__ - #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMCMP @@ -36,5 +34,4 @@ extern char *strcpy(char *dest, const char *src); extern int strcmp(const char *cs, const char *ct); extern __kernel_size_t strlen(const char *); -#endif /* __KERNEL__ */ #endif /* _ASM_ARC_STRING_H */ diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index dd785befe7fd..e56f9fcc5581 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -9,8 +9,6 @@ #ifndef _ASM_ARC_SYSCALLS_H #define _ASM_ARC_SYSCALLS_H 1 -#ifdef __KERNEL__ - #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/types.h> @@ -22,6 +20,4 @@ int sys_arc_gettls(void); #include <asm-generic/syscalls.h> -#endif /* __KERNEL__ */ - #endif diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 45be21672011..02bc5ec0fb2e 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -16,8 +16,6 @@ #ifndef _ASM_THREAD_INFO_H #define _ASM_THREAD_INFO_H -#ifdef __KERNEL__ - #include <asm/page.h> #ifdef CONFIG_16KSTACKS @@ -114,6 +112,4 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) * syscall, so all that reamins to be tested is _TIF_WORK_MASK */ -#endif /* __KERNEL__ */ - #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/arc/include/asm/unaligned.h b/arch/arc/include/asm/unaligned.h index 3e5f071bc00c..6da6b4edaeda 100644 --- a/arch/arc/include/asm/unaligned.h +++ b/arch/arc/include/asm/unaligned.h @@ -14,7 +14,7 @@ #include <asm-generic/unaligned.h> #include <asm/ptrace.h> -#ifdef CONFIG_ARC_MISALIGN_ACCESS +#ifdef CONFIG_ARC_EMUL_UNALIGNED int misaligned_fixup(unsigned long address, struct pt_regs *regs, struct callee_regs *cregs); #else diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index 8004b4fa6461..113f2033da9f 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_MODULES) += arcksyms.o module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_ARC_DW2_UNWIND) += unwind.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_ARC_MISALIGN_ACCESS) += unaligned.o +obj-$(CONFIG_ARC_EMUL_UNALIGNED) += unaligned.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARC_METAWARE_HLINK) += arc_hostlink.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/arc/kernel/disasm.c b/arch/arc/kernel/disasm.c index b8a549c4f540..3b7cd4864ba2 100644 --- a/arch/arc/kernel/disasm.c +++ b/arch/arc/kernel/disasm.c @@ -15,7 +15,7 @@ #include <linux/uaccess.h> #include <asm/disasm.h> -#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_MISALIGN_ACCESS) || \ +#if defined(CONFIG_KGDB) || defined(CONFIG_ARC_EMUL_UNALIGNED) || \ defined(CONFIG_KPROBES) /* disasm_instr: Analyses instruction at addr, stores @@ -535,4 +535,4 @@ int __kprobes disasm_next_pc(unsigned long pc, struct pt_regs *regs, return instr.is_branch; } -#endif /* CONFIG_KGDB || CONFIG_ARC_MISALIGN_ACCESS || CONFIG_KPROBES */ +#endif /* CONFIG_KGDB || CONFIG_ARC_EMUL_UNALIGNED || CONFIG_KPROBES */ diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 4d2481bd8b98..b0e8666fdccc 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -91,16 +91,6 @@ stext: st r0, [@uboot_tag] st r2, [@uboot_arg] - ; Identify if running on ISS vs Silicon - ; IDENTITY Reg [ 3 2 1 0 ] - ; (chip-id) ^^^^^ ==> 0xffff for ISS - lr r0, [identity] - lsr r3, r0, 16 - cmp r3, 0xffff - mov.z r4, 0 - mov.nz r4, 1 - st r4, [@running_on_hw] - ; setup "current" tsk and optionally cache it in dedicated r25 mov r9, @init_task SET_CURR_TASK_ON_CPU r9, r0 ; r9 = tsk, r0 = scratch diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index b9a5685a990e..ae1c485cbc68 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -244,25 +244,23 @@ static int arc_pmu_device_probe(struct platform_device *pdev) pr_err("This core does not have performance counters!\n"); return -ENODEV; } + BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS); - arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), - GFP_KERNEL); + READ_BCR(ARC_REG_CC_BUILD, cc_bcr); + if (!cc_bcr.v) { + pr_err("Performance counters exist, but no countable conditions?\n"); + return -ENODEV; + } + + arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL); if (!arc_pmu) return -ENOMEM; arc_pmu->n_counters = pct_bcr.c; - BUG_ON(arc_pmu->n_counters > ARC_PMU_MAX_HWEVENTS); - arc_pmu->counter_size = 32 + (pct_bcr.s << 4); - pr_info("ARC PMU found with %d counters of size %d bits\n", - arc_pmu->n_counters, arc_pmu->counter_size); - - READ_BCR(ARC_REG_CC_BUILD, cc_bcr); - - if (!cc_bcr.v) - pr_err("Strange! Performance counters exist, but no countable conditions?\n"); - pr_info("ARC PMU has %d countable conditions\n", cc_bcr.c); + pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n", + arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); cc_name.str[8] = 0; for (i = 0; i < PERF_COUNT_HW_MAX; i++) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 119dddb752b2..252bf603db9c 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -13,7 +13,9 @@ #include <linux/console.h> #include <linux/module.h> #include <linux/cpu.h> +#include <linux/clk-provider.h> #include <linux/of_fdt.h> +#include <linux/of_platform.h> #include <linux/cache.h> #include <asm/sections.h> #include <asm/arcregs.h> @@ -24,11 +26,10 @@ #include <asm/unwind.h> #include <asm/clk.h> #include <asm/mach_desc.h> +#include <asm/smp.h> #define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x)) -int running_on_hw = 1; /* vs. on ISS */ - /* Part of U-boot ABI: see head.S */ int __initdata uboot_tag; char __initdata *uboot_arg; @@ -42,26 +43,26 @@ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; static void read_arc_build_cfg_regs(void) { struct bcr_perip uncached_space; + struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); + READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); - cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); + READ_BCR(ARC_REG_TIMERS_BCR, cpu->timers); cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); cpu->uncached_base = uncached_space.start << 24; - cpu->extn.mul = read_aux_reg(ARC_REG_MUL_BCR); - cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR); - cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR); - cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR); - cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR); - READ_BCR(ARC_REG_MAC_BCR, cpu->extn_mac_mul); + READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); - cpu->extn.ext_arith = read_aux_reg(ARC_REG_EXTARITH_BCR); - cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR); + cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */ + cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR) > 1 ? 1 : 0; /* 2,3 */ + cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ + cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; + cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ /* Note that we read the CCM BCRs independent of kernel config * This is to catch the cases where user doesn't know that @@ -95,43 +96,76 @@ static void read_arc_build_cfg_regs(void) read_decode_mmu_bcr(); read_decode_cache_bcr(); - READ_BCR(ARC_REG_FP_BCR, cpu->fp); - READ_BCR(ARC_REG_DPFP_BCR, cpu->dpfp); + { + struct bcr_fp_arcompact sp, dp; + struct bcr_bpu_arcompact bpu; + + READ_BCR(ARC_REG_FP_BCR, sp); + READ_BCR(ARC_REG_DPFP_BCR, dp); + cpu->extn.fpu_sp = sp.ver ? 1 : 0; + cpu->extn.fpu_dp = dp.ver ? 1 : 0; + + READ_BCR(ARC_REG_BPU_BCR, bpu); + cpu->bpu.ver = bpu.ver; + cpu->bpu.full = bpu.fam ? 1 : 0; + if (bpu.ent) { + cpu->bpu.num_cache = 256 << (bpu.ent - 1); + cpu->bpu.num_pred = 256 << (bpu.ent - 1); + } + } + + READ_BCR(ARC_REG_AP_BCR, bcr); + cpu->extn.ap = bcr.ver ? 1 : 0; + + READ_BCR(ARC_REG_SMART_BCR, bcr); + cpu->extn.smart = bcr.ver ? 1 : 0; + + cpu->extn.debug = cpu->extn.ap | cpu->extn.smart; } static const struct cpuinfo_data arc_cpu_tbl[] = { - { {0x10, "ARCTangent A5"}, 0x1F}, { {0x20, "ARC 600" }, 0x2F}, { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, + { {0x35, "ARC 700 R4.11"}, 0x35}, { {0x00, NULL } } }; +#define IS_AVAIL1(v, str) ((v) ? str : "") +#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ") +#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg)) + static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { - int n = 0; struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; const struct cpuinfo_data *tbl; - int be = 0; -#ifdef CONFIG_CPU_BIG_ENDIAN - be = 1; -#endif + char *isa_nm; + int i, be, atomic; + int n = 0; + FIX_PTR(cpu); + { + isa_nm = "ARCompact"; + be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + + atomic = cpu->isa.atomic1; + if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + } + n += scnprintf(buf + n, len - n, - "\nARC IDENTITY\t: Family [%#02x]" - " Cpu-id [%#02x] Chip-id [%#4x]\n", - core->family, core->cpu_id, - core->chip_id); + "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", + core->family, core->cpu_id, core->chip_id); for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { if ((core->family >= tbl->info.id) && (core->family <= tbl->up_range)) { n += scnprintf(buf + n, len - n, - "processor\t: %s %s\n", - tbl->info.str, - be ? "[Big Endian]" : ""); + "processor [%d]\t: %s (%s ISA) %s\n", + cpu_id, tbl->info.str, isa_nm, + IS_AVAIL1(be, "[Big-Endian]")); break; } } @@ -143,34 +177,35 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) (unsigned int)(arc_get_core_freq() / 1000000), (unsigned int)(arc_get_core_freq() / 10000) % 100); - n += scnprintf(buf + n, len - n, "Timers\t\t: %s %s\n", - (cpu->timers & 0x200) ? "TIMER1" : "", - (cpu->timers & 0x100) ? "TIMER0" : ""); + n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", + IS_AVAIL1(cpu->timers.t0, "Timer0 "), + IS_AVAIL1(cpu->timers.t1, "Timer1 "), + IS_AVAIL2(cpu->timers.rtsc, "64-bit RTSC ", CONFIG_ARC_HAS_RTSC)); - n += scnprintf(buf + n, len - n, "Vect Tbl Base\t: %#x\n", - cpu->vec_base); + n += i = scnprintf(buf + n, len - n, "%s%s", + IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC)); - n += scnprintf(buf + n, len - n, "UNCACHED Base\t: %#x\n", - cpu->uncached_base); + if (i) + n += scnprintf(buf + n, len - n, "\n\t\t: "); - return buf; -} + n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", + IS_AVAIL1(cpu->extn_mpy.ver, "mpy "), + IS_AVAIL1(cpu->extn.norm, "norm "), + IS_AVAIL1(cpu->extn.barrel, "barrel-shift "), + IS_AVAIL1(cpu->extn.swap, "swap "), + IS_AVAIL1(cpu->extn.minmax, "minmax "), + IS_AVAIL1(cpu->extn.crc, "crc "), + IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE)); -static const struct id_to_str mul_type_nm[] = { - { 0x0, "N/A"}, - { 0x1, "32x32 (spl Result Reg)" }, - { 0x2, "32x32 (ANY Result Reg)" } -}; + if (cpu->bpu.ver) + n += scnprintf(buf + n, len - n, + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n", + IS_AVAIL1(cpu->bpu.full, "full"), + IS_AVAIL1(!cpu->bpu.full, "partial"), + cpu->bpu.num_cache, cpu->bpu.num_pred); -static const struct id_to_str mac_mul_nm[] = { - {0x0, "N/A"}, - {0x1, "N/A"}, - {0x2, "Dual 16 x 16"}, - {0x3, "N/A"}, - {0x4, "32x16"}, - {0x5, "N/A"}, - {0x6, "Dual 16x16 and 32x16"} -}; + return buf; +} static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) { @@ -178,67 +213,46 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; FIX_PTR(cpu); -#define IS_AVAIL1(var, str) ((var) ? str : "") -#define IS_AVAIL2(var, str) ((var == 0x2) ? str : "") -#define IS_USED(cfg) (IS_ENABLED(cfg) ? "(in-use)" : "(not used)") n += scnprintf(buf + n, len - n, - "Extn [700-Base]\t: %s %s %s %s %s %s\n", - IS_AVAIL2(cpu->extn.norm, "norm,"), - IS_AVAIL2(cpu->extn.barrel, "barrel-shift,"), - IS_AVAIL1(cpu->extn.swap, "swap,"), - IS_AVAIL2(cpu->extn.minmax, "minmax,"), - IS_AVAIL1(cpu->extn.crc, "crc,"), - IS_AVAIL2(cpu->extn.ext_arith, "ext-arith")); - - n += scnprintf(buf + n, len - n, "Extn [700-MPY]\t: %s", - mul_type_nm[cpu->extn.mul].str); - - n += scnprintf(buf + n, len - n, " MAC MPY: %s\n", - mac_mul_nm[cpu->extn_mac_mul.type].str); - - if (cpu->core.family == 0x34) { - n += scnprintf(buf + n, len - n, - "Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n", - IS_USED(CONFIG_ARC_HAS_LLSC), - IS_USED(CONFIG_ARC_HAS_SWAPE), - IS_USED(CONFIG_ARC_HAS_RTSC)); - } - - n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s", - !(cpu->dccm.sz || cpu->iccm.sz) ? "N/A" : ""); - - if (cpu->dccm.sz) - n += scnprintf(buf + n, len - n, "DCCM: @ %x, %d KB ", - cpu->dccm.base_addr, TO_KB(cpu->dccm.sz)); - - if (cpu->iccm.sz) - n += scnprintf(buf + n, len - n, "ICCM: @ %x, %d KB", + "Vector Table\t: %#x\nUncached Base\t: %#x\n", + cpu->vec_base, cpu->uncached_base); + + if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) + n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", + IS_AVAIL1(cpu->extn.fpu_sp, "SP "), + IS_AVAIL1(cpu->extn.fpu_dp, "DP ")); + + if (cpu->extn.debug) + n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n", + IS_AVAIL1(cpu->extn.ap, "ActionPoint "), + IS_AVAIL1(cpu->extn.smart, "smaRT "), + IS_AVAIL1(cpu->extn.rtt, "RTT ")); + + if (cpu->dccm.sz || cpu->iccm.sz) + n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n", + cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); - n += scnprintf(buf + n, len - n, "\nExtn [FPU]\t: %s", - !(cpu->fp.ver || cpu->dpfp.ver) ? "N/A" : ""); - - if (cpu->fp.ver) - n += scnprintf(buf + n, len - n, "SP [v%d] %s", - cpu->fp.ver, cpu->fp.fast ? "(fast)" : ""); - - if (cpu->dpfp.ver) - n += scnprintf(buf + n, len - n, "DP [v%d] %s", - cpu->dpfp.ver, cpu->dpfp.fast ? "(fast)" : ""); - - n += scnprintf(buf + n, len - n, "\n"); - n += scnprintf(buf + n, len - n, "OS ABI [v3]\t: no-legacy-syscalls\n"); return buf; } -static void arc_chk_ccms(void) +static void arc_chk_core_config(void) { -#if defined(CONFIG_ARC_HAS_DCCM) || defined(CONFIG_ARC_HAS_ICCM) struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + int fpu_enabled; + + if (!cpu->timers.t0) + panic("Timer0 is not present!\n"); + + if (!cpu->timers.t1) + panic("Timer1 is not present!\n"); + + if (IS_ENABLED(CONFIG_ARC_HAS_RTSC) && !cpu->timers.rtsc) + panic("RTSC is not present\n"); #ifdef CONFIG_ARC_HAS_DCCM /* @@ -256,33 +270,20 @@ static void arc_chk_ccms(void) if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz) panic("Linux built with incorrect ICCM Size\n"); #endif -#endif -} -/* - * Ensure that FP hardware and kernel config match - * -If hardware contains DPFP, kernel needs to save/restore FPU state - * across context switches - * -If hardware lacks DPFP, but kernel configured to save FPU state then - * kernel trying to access non-existant DPFP regs will crash - * - * We only check for Dbl precision Floating Point, because only DPFP - * hardware has dedicated regs which need to be saved/restored on ctx-sw - * (Single Precision uses core regs), thus kernel is kind of oblivious to it - */ -static void arc_chk_fpu(void) -{ - struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + /* + * FP hardware/software config sanity + * -If hardware contains DPFP, kernel needs to save/restore FPU state + * -If not, it will crash trying to save/restore the non-existant regs + * + * (only DPDP checked since SP has no arch visible regs) + */ + fpu_enabled = IS_ENABLED(CONFIG_ARC_FPU_SAVE_RESTORE); - if (cpu->dpfp.ver) { -#ifndef CONFIG_ARC_FPU_SAVE_RESTORE - pr_warn("DPFP support broken in this kernel...\n"); -#endif - } else { -#ifdef CONFIG_ARC_FPU_SAVE_RESTORE - panic("H/w lacks DPFP support, apps won't work\n"); -#endif - } + if (cpu->extn.fpu_dp && !fpu_enabled) + pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); + else if (!cpu->extn.fpu_dp && fpu_enabled) + panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); } /* @@ -303,15 +304,11 @@ void setup_processor(void) arc_mmu_init(); arc_cache_init(); - arc_chk_ccms(); printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); - -#ifdef CONFIG_SMP printk(arc_platform_smp_cpuinfo()); -#endif - arc_chk_fpu(); + arc_chk_core_config(); } static inline int is_kernel(unsigned long addr) @@ -360,11 +357,7 @@ void __init setup_arch(char **cmdline_p) machine_desc->init_early(); setup_processor(); - -#ifdef CONFIG_SMP smp_init_cpus(); -#endif - setup_arch_memory(); /* copy flat DT out of .init and then unflatten it */ @@ -385,7 +378,13 @@ void __init setup_arch(char **cmdline_p) static int __init customize_machine(void) { - /* Add platform devices */ + of_clk_init(NULL); + /* + * Traverses flattened DeviceTree - registering platform devices + * (if any) complete with their resources + */ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + if (machine_desc->init_machine) machine_desc->init_machine(); @@ -419,19 +418,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE)); - seq_printf(m, "Bogo MIPS : \t%lu.%02lu\n", + seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE)); - seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE)); - seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE)); - -#ifdef CONFIG_SMP seq_printf(m, arc_platform_smp_cpuinfo()); -#endif free_page((unsigned long)str); done: diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index dcd317c47d09..d01df0c517a2 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -101,7 +101,7 @@ void __weak arc_platform_smp_wait_to_boot(int cpu) const char *arc_platform_smp_cpuinfo(void) { - return plat_smp_ops.info; + return plat_smp_ops.info ? : ""; } /* diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 9e1142729fd1..8c3a3e02ba92 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -530,16 +530,9 @@ EXPORT_SYMBOL(dma_cache_wback); */ void flush_icache_range(unsigned long kstart, unsigned long kend) { - unsigned int tot_sz, off, sz; - unsigned long phy, pfn; + unsigned int tot_sz; - /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */ - - /* This is not the right API for user virtual address */ - if (kstart < TASK_SIZE) { - BUG_ON("Flush icache range for user virtual addr space"); - return; - } + WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__); /* Shortcut for bigger flush ranges. * Here we don't care if this was kernel virtual or phy addr @@ -572,6 +565,9 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) * straddles across 2 virtual pages and hence need for loop */ while (tot_sz > 0) { + unsigned int off, sz; + unsigned long phy, pfn; + off = kstart % PAGE_SIZE; pfn = vmalloc_to_pfn((void *)kstart); phy = (pfn << PAGE_SHIFT) + off; diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e1acf0ce5647..7f47d2a56f44 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -609,14 +609,12 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) int n = 0; struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; - n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ", - p_mmu->ver, TO_KB(p_mmu->pg_sz)); - n += scnprintf(buf + n, len - n, - "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", + "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + p_mmu->ver, TO_KB(p_mmu->pg_sz), p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : ""); + IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); return buf; } diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig index b9f34cf55acf..217593a70751 100644 --- a/arch/arc/plat-arcfpga/Kconfig +++ b/arch/arc/plat-arcfpga/Kconfig @@ -8,7 +8,7 @@ menuconfig ARC_PLAT_FPGA_LEGACY bool "\"Legacy\" ARC FPGA dev Boards" - select ISS_SMP_EXTN if SMP + select ARC_HAS_COH_CACHES if SMP help Support for ARC development boards, provided by Synopsys. These are based on FPGA or ISS. e.g. @@ -18,17 +18,6 @@ menuconfig ARC_PLAT_FPGA_LEGACY if ARC_PLAT_FPGA_LEGACY -config ARC_BOARD_ANGEL4 - bool "ARC Angel4" - default y - help - ARC Angel4 FPGA Ref Platform (Xilinx Virtex Based) - -config ARC_BOARD_ML509 - bool "ML509" - help - ARC ML509 FPGA Ref Platform (Xilinx Virtex-5 Based) - config ISS_SMP_EXTN bool "ARC SMP Extensions (ISS Models only)" default n diff --git a/arch/arc/plat-arcfpga/include/plat/irq.h b/arch/arc/plat-arcfpga/include/plat/irq.h deleted file mode 100644 index 2c9dea690ac4..000000000000 --- a/arch/arc/plat-arcfpga/include/plat/irq.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * vineetg: Feb 2009 - * -For AA4 board, IRQ assignments to peripherals - */ - -#ifndef __PLAT_IRQ_H -#define __PLAT_IRQ_H - -#define UART0_IRQ 5 -#define UART1_IRQ 10 -#define UART2_IRQ 11 - -#define IDE_IRQ 13 -#define PCI_IRQ 14 -#define PS2_IRQ 15 - -#ifdef CONFIG_SMP -#define IDU_INTERRUPT_0 16 -#endif - -#endif diff --git a/arch/arc/plat-arcfpga/include/plat/memmap.h b/arch/arc/plat-arcfpga/include/plat/memmap.h deleted file mode 100644 index 5c78e6135a1f..000000000000 --- a/arch/arc/plat-arcfpga/include/plat/memmap.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * vineetg: Feb 2009 - * -For AA4 board, System Memory Map for Peripherals etc - */ - -#ifndef __PLAT_MEMMAP_H -#define __PLAT_MEMMAP_H - -#define UART0_BASE 0xC0FC1000 -#define UART1_BASE 0xC0FC1100 - -#define IDE_CONTROLLER_BASE 0xC0FC9000 - -#define AHB_PCI_HOST_BRG_BASE 0xC0FD0000 - -#define PGU_BASEADDR 0xC0FC8000 -#define VLCK_ADDR 0xC0FCF028 - -#define BVCI_LAT_UNIT_BASE 0xC0FED000 - -#define PS2_BASE_ADDR 0xC0FCC000 - -#endif diff --git a/arch/arc/plat-arcfpga/platform.c b/arch/arc/plat-arcfpga/platform.c index 1038949a99a1..afc88254acc1 100644 --- a/arch/arc/plat-arcfpga/platform.c +++ b/arch/arc/plat-arcfpga/platform.c @@ -8,37 +8,9 @@ * published by the Free Software Foundation. */ -#include <linux/types.h> #include <linux/init.h> -#include <linux/device.h> -#include <linux/platform_device.h> -#include <linux/io.h> -#include <linux/console.h> -#include <linux/of_platform.h> -#include <asm/setup.h> -#include <asm/clk.h> #include <asm/mach_desc.h> -#include <plat/memmap.h> #include <plat/smp.h> -#include <plat/irq.h> - -static void __init plat_fpga_early_init(void) -{ - pr_info("[plat-arcfpga]: registering early dev resources\n"); - -#ifdef CONFIG_ISS_SMP_EXTN - iss_model_init_early_smp(); -#endif -} - -static void __init plat_fpga_populate_dev(void) -{ - /* - * Traverses flattened DeviceTree - registering platform devices - * (if any) complete with their resources - */ - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); -} /*----------------------- Machine Descriptions ------------------------------ * @@ -48,41 +20,26 @@ static void __init plat_fpga_populate_dev(void) * callback set, by matching the DT compatible name. */ -static const char *aa4_compat[] __initconst = { +static const char *legacy_fpga_compat[] __initconst = { "snps,arc-angel4", - NULL, -}; - -MACHINE_START(ANGEL4, "angel4") - .dt_compat = aa4_compat, - .init_early = plat_fpga_early_init, - .init_machine = plat_fpga_populate_dev, -#ifdef CONFIG_ISS_SMP_EXTN - .init_smp = iss_model_init_smp, -#endif -MACHINE_END - -static const char *ml509_compat[] __initconst = { "snps,arc-ml509", NULL, }; -MACHINE_START(ML509, "ml509") - .dt_compat = ml509_compat, - .init_early = plat_fpga_early_init, - .init_machine = plat_fpga_populate_dev, -#ifdef CONFIG_SMP +MACHINE_START(LEGACY_FPGA, "legacy_fpga") + .dt_compat = legacy_fpga_compat, +#ifdef CONFIG_ISS_SMP_EXTN + .init_early = iss_model_init_early_smp, .init_smp = iss_model_init_smp, #endif MACHINE_END -static const char *nsimosci_compat[] __initconst = { +static const char *simulation_compat[] __initconst = { + "snps,nsim", "snps,nsimosci", NULL, }; -MACHINE_START(NSIMOSCI, "nsimosci") - .dt_compat = nsimosci_compat, - .init_early = NULL, - .init_machine = plat_fpga_populate_dev, +MACHINE_START(SIMULATION, "simulation") + .dt_compat = simulation_compat, MACHINE_END diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c index 92bad9122077..64797ba3bbe3 100644 --- a/arch/arc/plat-arcfpga/smp.c +++ b/arch/arc/plat-arcfpga/smp.c @@ -13,9 +13,10 @@ #include <linux/smp.h> #include <linux/irq.h> -#include <plat/irq.h> #include <plat/smp.h> +#define IDU_INTERRUPT_0 16 + static char smp_cpuinfo_buf[128]; /* diff --git a/arch/arc/plat-tb10x/Kconfig b/arch/arc/plat-tb10x/Kconfig index 6994c188dc88..d14b3d3c5dfd 100644 --- a/arch/arc/plat-tb10x/Kconfig +++ b/arch/arc/plat-tb10x/Kconfig @@ -18,7 +18,6 @@ menuconfig ARC_PLAT_TB10X bool "Abilis TB10x" - select COMMON_CLK select PINCTRL select PINCTRL_TB10X select PINMUX diff --git a/arch/arc/plat-tb10x/tb10x.c b/arch/arc/plat-tb10x/tb10x.c index 06cb30929460..da0ac0960a4b 100644 --- a/arch/arc/plat-tb10x/tb10x.c +++ b/arch/arc/plat-tb10x/tb10x.c @@ -19,21 +19,9 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - #include <linux/init.h> -#include <linux/of_platform.h> -#include <linux/clk-provider.h> -#include <linux/pinctrl/consumer.h> - #include <asm/mach_desc.h> - -static void __init tb10x_platform_init(void) -{ - of_clk_init(NULL); - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); -} - static const char *tb10x_compat[] __initdata = { "abilis,arc-tb10x", NULL, @@ -41,5 +29,4 @@ static const char *tb10x_compat[] __initdata = { MACHINE_START(TB10x, "tb10x") .dt_compat = tb10x_compat, - .init_machine = tb10x_platform_init, MACHINE_END diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 63392f4b29a4..d2008887eb8c 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -48,7 +48,6 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y -CONFIG_CMA=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y @@ -138,6 +137,7 @@ CONFIG_NETCONSOLE=y CONFIG_NETPOLL_TRAP=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m +CONFIG_VHOST_NET=m CONFIG_VORTEX=y CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y @@ -303,4 +303,9 @@ CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m +CONFIG_VIRTUALIZATION=y +CONFIG_KVM_BOOK3S_64=m +CONFIG_KVM_BOOK3S_64_HV=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 3b260efbfbf9..ca07f9c27335 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -71,9 +71,10 @@ struct device_node; #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ -#define EEH_PE_RESET (1 << 2) /* PE reset in progress */ +#define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */ #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ +#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 0bb23725b1e7..8bf1b6351716 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -34,7 +34,7 @@ do { \ (regs)->result = 0; \ (regs)->nip = __ip; \ - (regs)->gpr[1] = *(unsigned long *)__get_SP(); \ + (regs)->gpr[1] = current_stack_pointer(); \ asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \ } while (0) #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index fe3f9488f321..c998279bd85b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1265,8 +1265,7 @@ static inline unsigned long mfvtb (void) #define proc_trap() asm volatile("trap") -#define __get_SP() ({unsigned long sp; \ - asm volatile("mr %0,1": "=r" (sp)); sp;}) +extern unsigned long current_stack_pointer(void); extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 6fa2708da153..6240698fee9a 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -19,7 +19,7 @@ /* ftrace syscalls requires exporting the sys_call_table */ #ifdef CONFIG_FTRACE_SYSCALLS -extern const unsigned long *sys_call_table; +extern const unsigned long sys_call_table[]; #endif /* CONFIG_FTRACE_SYSCALLS */ static inline long syscall_get_nr(struct task_struct *task, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index adac9dc54aee..484b2d4462c1 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -53,9 +53,16 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, #else struct page *page; int node = dev_to_node(dev); +#ifdef CONFIG_FSL_SOC u64 pfn = get_pfn_limit(dev); int zone; + /* + * This code should be OK on other platforms, but we have drivers that + * don't set coherent_dma_mask. As a workaround we just ifdef it. This + * whole routine needs some serious cleanup. + */ + zone = dma_pfn_limit_to_zone(pfn); if (zone < 0) { dev_err(dev, "%s: No suitable zone for pfn %#llx\n", @@ -73,6 +80,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, break; #endif }; +#endif /* CONFIG_FSL_SOC */ /* ignore region specifiers */ flag &= ~(__GFP_HIGHMEM); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index d543e4179c18..2248a1999c64 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -257,6 +257,13 @@ static void *eeh_dump_pe_log(void *data, void *flag) struct eeh_dev *edev, *tmp; size_t *plen = flag; + /* If the PE's config space is blocked, 0xFF's will be + * returned. It's pointless to collect the log in this + * case. + */ + if (pe->state & EEH_PE_CFG_BLOCKED) + return NULL; + eeh_pe_for_each_dev(pe, edev, tmp) *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, EEH_PCI_REGS_LOG_LEN - *plen); @@ -673,18 +680,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat switch (state) { case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); break; case pcie_hot_reset: - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); return -EINVAL; }; @@ -1523,7 +1530,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) switch (option) { case EEH_RESET_DEACTIVATE: ret = eeh_ops->reset(pe, option); - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); if (ret) break; @@ -1538,7 +1545,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option) */ eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); ret = eeh_ops->reset(pe, option); break; default: diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3fd514f8e4b2..6535936bdf27 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -528,13 +528,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Issue reset */ - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); ret = eeh_reset_pe(pe); if (ret) { - eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED); return ret; } - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Unfreeze the PE */ ret = eeh_clear_pe_frozen_state(pe, true); @@ -601,10 +601,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - eeh_pe_state_mark(pe, EEH_PE_RESET); + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); rc = eeh_reset_pe(pe); if (rc) { - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); return rc; } @@ -613,7 +613,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); - eeh_pe_state_clear(pe, EEH_PE_RESET); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 53dd0915e690..5a63e2b0f65b 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -525,7 +525,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) pe->state |= state; /* Offline PCI devices if applicable */ - if (state != EEH_PE_ISOLATED) + if (!(state & EEH_PE_ISOLATED)) return NULL; eeh_pe_for_each_dev(pe, edev, tmp) { @@ -534,6 +534,10 @@ static void *__eeh_pe_state_mark(void *data, void *flag) pdev->error_state = pci_channel_io_frozen; } + /* Block PCI config access if required */ + if (pe->state & EEH_PE_CFG_RESTRICTED) + pe->state |= EEH_PE_CFG_BLOCKED; + return NULL; } @@ -611,6 +615,10 @@ static void *__eeh_pe_state_clear(void *data, void *flag) pdev->error_state = pci_channel_io_normal; } + /* Unblock PCI config access if required */ + if (pe->state & EEH_PE_CFG_RESTRICTED) + pe->state &= ~EEH_PE_CFG_BLOCKED; + return NULL; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 050f79a4a168..72e783ea0681 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1270,11 +1270,6 @@ hmi_exception_early: addi r3,r1,STACK_FRAME_OVERHEAD bl hmi_exception_realmode /* Windup the stack. */ - /* Clear MSR_RI before setting SRR0 and SRR1. */ - li r0,MSR_RI - mfmsr r9 /* get MSR value */ - andc r9,r9,r0 - mtmsrd r9,1 /* Clear MSR_RI */ /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) mtspr SPRN_HSRR1,r9 diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8eb857f216c1..c14383575fe8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -466,7 +466,7 @@ static inline void check_stack_overflow(void) #ifdef CONFIG_DEBUG_STACKOVERFLOW long sp; - sp = __get_SP() & (THREAD_SIZE-1); + sp = current_stack_pointer() & (THREAD_SIZE-1); /* check for stack overflow: is there less than 2KB free? */ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 7ce26d45777e..0d432194c018 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -114,3 +114,7 @@ _GLOBAL(longjmp) mtlr r0 mr r3,r4 blr + +_GLOBAL(current_stack_pointer) + PPC_LL r3,0(r1) + blr diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index c4dfff6c2719..202963ee013a 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -41,3 +41,5 @@ EXPORT_SYMBOL(giveup_spe); #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif + +EXPORT_SYMBOL(current_stack_pointer); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index aa1df89c8b2a..923cd2daba89 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) tsk = current; if (sp == 0) { if (tsk == current) - asm("mr %0,1" : "=r" (sp)); + sp = current_stack_pointer(); else sp = tsk->thread.ksp; } diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index c168337aef9d..7c55b86206b3 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -66,6 +66,11 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) return PCIBIOS_DEVICE_NOT_FOUND; if (!config_access_valid(pdn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; +#ifdef CONFIG_EEH + if (pdn->edev && pdn->edev->pe && + (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED)) + return PCIBIOS_SET_FAILED; +#endif addr = rtas_config_addr(pdn->busno, pdn->devfn, where); buid = pdn->phb->buid; @@ -90,9 +95,6 @@ static int rtas_pci_read_config(struct pci_bus *bus, struct device_node *busdn, *dn; struct pci_dn *pdn; bool found = false; -#ifdef CONFIG_EEH - struct eeh_dev *edev; -#endif int ret; /* Search only direct children of the bus */ @@ -109,11 +111,6 @@ static int rtas_pci_read_config(struct pci_bus *bus, if (!found) return PCIBIOS_DEVICE_NOT_FOUND; -#ifdef CONFIG_EEH - edev = of_node_to_eeh_dev(dn); - if (edev && edev->pe && edev->pe->state & EEH_PE_RESET) - return PCIBIOS_DEVICE_NOT_FOUND; -#endif ret = rtas_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && @@ -132,6 +129,11 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) return PCIBIOS_DEVICE_NOT_FOUND; if (!config_access_valid(pdn, where)) return PCIBIOS_BAD_REGISTER_NUMBER; +#ifdef CONFIG_EEH + if (pdn->edev && pdn->edev->pe && + (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED)) + return PCIBIOS_SET_FAILED; +#endif addr = rtas_config_addr(pdn->busno, pdn->devfn, where); buid = pdn->phb->buid; @@ -155,10 +157,6 @@ static int rtas_pci_write_config(struct pci_bus *bus, struct device_node *busdn, *dn; struct pci_dn *pdn; bool found = false; -#ifdef CONFIG_EEH - struct eeh_dev *edev; -#endif - int ret; /* Search only direct children of the bus */ busdn = pci_bus_to_OF_node(bus); @@ -173,14 +171,8 @@ static int rtas_pci_write_config(struct pci_bus *bus, if (!found) return PCIBIOS_DEVICE_NOT_FOUND; -#ifdef CONFIG_EEH - edev = of_node_to_eeh_dev(dn); - if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET)) - return PCIBIOS_DEVICE_NOT_FOUND; -#endif - ret = rtas_write_config(pdn, where, size, val); - return ret; + return rtas_write_config(pdn, where, size, val); } static struct pci_ops rtas_pci_ops = { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index cd07d79ad21c..4f3cfe1b6a33 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -522,36 +522,36 @@ void __init setup_system(void) smp_release_cpus(); #endif - printk("Starting Linux PPC64 %s\n", init_utsname()->version); + pr_info("Starting Linux PPC64 %s\n", init_utsname()->version); - printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + pr_info("-----------------------------------------------------\n"); + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); if (ppc64_caches.dline_size != 0x80) - printk("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); + pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); if (ppc64_caches.iline_size != 0x80) - printk("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - printk("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - printk(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - printk(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, cur_cpu_spec->cpu_user_features2); - printk("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - printk("firmware_features = 0x%016lx\n", powerpc_firmware_features); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #ifdef CONFIG_PPC_STD_MMU_64 if (htab_address) - printk("htab_address = 0x%p\n", htab_address); + pr_info("htab_address = 0x%p\n", htab_address); - printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); #endif if (PHYSICAL_START > 0) - printk("physical_start = 0x%llx\n", + pr_info("physical_start = 0x%llx\n", (unsigned long long)PHYSICAL_START); - printk("-----------------------------------------------------\n"); + pr_info("-----------------------------------------------------\n"); DBG(" <- setup_system()\n"); } diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 3d30ef1038e5..ea43a347a104 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace) { unsigned long sp; - asm("mr %0,1" : "=r" (sp)); + sp = current_stack_pointer(); save_context_stack(trace, sp, current, 1); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 649666d5d1c2..e5236c24dc07 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -8,6 +8,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "numa: " fmt + #include <linux/threads.h> #include <linux/bootmem.h> #include <linux/init.h> @@ -1153,6 +1155,22 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); +static bool topology_updates_enabled = true; + +static int __init early_topology_updates(char *p) +{ + if (!p) + return 0; + + if (!strcmp(p, "off")) { + pr_info("Disabling topology updates\n"); + topology_updates_enabled = false; + } + + return 0; +} +early_param("topology_updates", early_topology_updates); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Find the node associated with a hot added memory section for @@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity) long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; u64 flags = 1; int hwcpu = get_hard_smp_processor_id(cpu); + int i; rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); + for (i = 0; i < 6; i++) + retbuf[i] = cpu_to_be64(retbuf[i]); vphn_unpack_associativity(retbuf, associativity); return rc; @@ -1539,6 +1560,9 @@ int arch_update_cpu_topology(void) struct device *dev; int weight, new_nid, i = 0; + if (!prrn_enabled && !vphn_enabled) + return 0; + weight = cpumask_weight(&cpu_associativity_changes_mask); if (!weight) return 0; @@ -1592,6 +1616,15 @@ int arch_update_cpu_topology(void) cpu = cpu_last_thread_sibling(cpu); } + pr_debug("Topology update for the following CPUs:\n"); + if (cpumask_weight(&updated_cpus)) { + for (ud = &updates[0]; ud; ud = ud->next) { + pr_debug("cpu %d moving from node %d " + "to %d\n", ud->cpu, + ud->old_nid, ud->new_nid); + } + } + /* * In cases where we have nothing to update (because the updates list * is too short or because the new topology is same as the old one), @@ -1800,8 +1833,12 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - start_topology_update(); - proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops); + /* Do not poll for changes if disabled at boot */ + if (topology_updates_enabled) + start_topology_update(); + + if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) + return -ENOMEM; return 0; } diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 426814a2ede3..eba9cb10619c 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -373,7 +373,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) * moving forward, we have to return operational * state during PE reset. */ - if (pe->state & EEH_PE_RESET) { + if (pe->state & EEH_PE_CFG_BLOCKED) { result = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE | EEH_STATE_MMIO_ENABLED | diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 3e89cbf55885..1d19e7917d7f 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -169,6 +169,26 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) } /* + * If the PE contains any one of following adapters, the + * PCI config space can't be accessed when dumping EEH log. + * Otherwise, we will run into fenced PHB caused by shortage + * of outbound credits in the adapter. The PCI config access + * should be blocked until PE reset. MMIO access is dropped + * by hardware certainly. In order to drop PCI config requests, + * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which + * will be checked in the backend for PE state retrival. If + * the PE becomes frozen for the first time and the flag has + * been set for the PE, we will set EEH_PE_CFG_BLOCKED for + * that PE to block its config space. + * + * Broadcom Austin 4-ports NICs (14e4:1657) + * Broadcom Shiner 2-ports 10G NICs (14e4:168e) + */ + if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) || + (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e)) + edev->pe->state |= EEH_PE_CFG_RESTRICTED; + + /* * Cache the PE primary bus, which can't be fetched when * full hotplug is in progress. In that case, all child * PCI devices of the PE are expected to be removed prior @@ -383,6 +403,39 @@ static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, return ret; } +static inline bool powernv_eeh_cfg_blocked(struct device_node *dn) +{ + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + + if (!edev || !edev->pe) + return false; + + if (edev->pe->state & EEH_PE_CFG_BLOCKED) + return true; + + return false; +} + +static int powernv_eeh_read_config(struct device_node *dn, + int where, int size, u32 *val) +{ + if (powernv_eeh_cfg_blocked(dn)) { + *val = 0xFFFFFFFF; + return PCIBIOS_SET_FAILED; + } + + return pnv_pci_cfg_read(dn, where, size, val); +} + +static int powernv_eeh_write_config(struct device_node *dn, + int where, int size, u32 val) +{ + if (powernv_eeh_cfg_blocked(dn)) + return PCIBIOS_SET_FAILED; + + return pnv_pci_cfg_write(dn, where, size, val); +} + /** * powernv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE @@ -440,8 +493,8 @@ static struct eeh_ops powernv_eeh_ops = { .get_log = powernv_eeh_get_log, .configure_bridge = powernv_eeh_configure_bridge, .err_inject = powernv_eeh_err_inject, - .read_config = pnv_pci_cfg_read, - .write_config = pnv_pci_cfg_write, + .read_config = powernv_eeh_read_config, + .write_config = powernv_eeh_write_config, .next_error = powernv_eeh_next_error, .restore_config = powernv_eeh_restore_config }; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index b642b0562f5a..d019b081df9d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -194,6 +194,27 @@ static int __init opal_register_exception_handlers(void) * fwnmi area at 0x7000 to provide the glue space to OPAL */ glue = 0x7000; + + /* + * Check if we are running on newer firmware that exports + * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch + * the HMI interrupt and we catch it directly in Linux. + * + * For older firmware (i.e currently released POWER8 System Firmware + * as of today <= SV810_087), we fallback to old behavior and let OPAL + * patch the HMI vector and handle it inside OPAL firmware. + * + * For newer firmware (in development/yet to be released) we will + * start catching/handling HMI directly in Linux. + */ + if (!opal_check_token(OPAL_HANDLE_HMI)) { + pr_info("opal: Old firmware detected, OPAL handles HMIs.\n"); + opal_register_exception_handler( + OPAL_HYPERVISOR_MAINTENANCE_HANDLER, + 0, glue); + glue += 128; + } + opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b3ca77ddf36d..b2187d0068b8 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -505,7 +505,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, edev = of_node_to_eeh_dev(dn); if (edev) { if (edev->pe && - (edev->pe->state & EEH_PE_RESET)) + (edev->pe->state & EEH_PE_CFG_BLOCKED)) return false; if (edev->mode & EEH_DEV_REMOVED) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index fdf01b660d59..6ad83bd11fe2 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -25,11 +25,11 @@ #include <asm/rtas.h> struct cc_workarea { - u32 drc_index; - u32 zero; - u32 name_offset; - u32 prop_length; - u32 prop_offset; + __be32 drc_index; + __be32 zero; + __be32 name_offset; + __be32 prop_length; + __be32 prop_offset; }; void dlpar_free_cc_property(struct property *prop) @@ -49,11 +49,11 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) if (!prop) return NULL; - name = (char *)ccwa + ccwa->name_offset; + name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); prop->name = kstrdup(name, GFP_KERNEL); - prop->length = ccwa->prop_length; - value = (char *)ccwa + ccwa->prop_offset; + prop->length = be32_to_cpu(ccwa->prop_length); + value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset); prop->value = kmemdup(value, prop->length, GFP_KERNEL); if (!prop->value) { dlpar_free_cc_property(prop); @@ -79,7 +79,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, if (!dn) return NULL; - name = (char *)ccwa + ccwa->name_offset; + name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name); if (!dn->full_name) { kfree(dn); @@ -126,7 +126,7 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define CALL_AGAIN -2 #define ERR_CFG_USE -9003 -struct device_node *dlpar_configure_connector(u32 drc_index, +struct device_node *dlpar_configure_connector(__be32 drc_index, struct device_node *parent) { struct device_node *dn; @@ -414,7 +414,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) if (!parent) return -ENODEV; - dn = dlpar_configure_connector(drc_index, parent); + dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); if (!dn) return -EINVAL; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index b174fa751d26..5c375f93c669 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -247,7 +247,7 @@ static int pseries_add_processor(struct device_node *np) unsigned int cpu; cpumask_var_t candidate_mask, tmp; int err = -ENOSPC, len, nthreads, i; - const u32 *intserv; + const __be32 *intserv; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -293,7 +293,7 @@ static int pseries_add_processor(struct device_node *np) for_each_cpu(cpu, tmp) { BUG_ON(cpu_present(cpu)); set_cpu_present(cpu, true); - set_hard_smp_processor_id(cpu, *intserv++); + set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++)); } err = 0; out_unlock: diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index de1ec54a2a57..e32e00976a94 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -30,7 +30,6 @@ #include <linux/mm.h> #include <linux/memblock.h> #include <linux/spinlock.h> -#include <linux/sched.h> /* for show_stack */ #include <linux/string.h> #include <linux/pci.h> #include <linux/dma-mapping.h> @@ -168,7 +167,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } tcenum++; @@ -257,7 +256,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\tnpages = 0x%llx\n", (u64)npages); printk("\ttce[0] val = 0x%llx\n", tcep[0]); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } return ret; } @@ -273,7 +272,7 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } tcenum++; @@ -292,7 +291,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n printk("\trc = %lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\tnpages = 0x%llx\n", (u64)npages); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } } @@ -307,7 +306,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); printk("\tindex = 0x%llx\n", (u64)tbl->it_index); printk("\ttcenum = 0x%llx\n", (u64)tcenum); - show_stack(current, (unsigned long *)__get_SP()); + dump_stack(); } return tce_ret; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 361add62abf1..1796c5438cc6 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -56,7 +56,8 @@ extern void hvc_vio_init_early(void); /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); -extern struct device_node *dlpar_configure_connector(u32, struct device_node *); +extern struct device_node *dlpar_configure_connector(__be32, + struct device_node *); extern int dlpar_attach_node(struct device_node *); extern int dlpar_detach_node(struct device_node *); diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 0c75214b6f92..73b64c73505b 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -145,59 +145,64 @@ void msi_bitmap_free(struct msi_bitmap *bmp) #ifdef CONFIG_MSI_BITMAP_SELFTEST -#define check(x) \ - if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__); - static void __init test_basics(void) { struct msi_bitmap bmp; - int i, size = 512; + int rc, i, size = 512; /* Can't allocate a bitmap of 0 irqs */ - check(msi_bitmap_alloc(&bmp, 0, NULL) != 0); + WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0); /* of_node may be NULL */ - check(0 == msi_bitmap_alloc(&bmp, size, NULL)); + WARN_ON(msi_bitmap_alloc(&bmp, size, NULL)); /* Should all be free by default */ - check(0 == bitmap_find_free_region(bmp.bitmap, size, - get_count_order(size))); + WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size))); bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); /* With no node, there's no msi-available-ranges, so expect > 0 */ - check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0); + WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0); /* Should all still be free */ - check(0 == bitmap_find_free_region(bmp.bitmap, size, - get_count_order(size))); + WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size))); bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); /* Check we can fill it up and then no more */ for (i = 0; i < size; i++) - check(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0); + WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0); + WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0); /* Should all be allocated */ - check(bitmap_find_free_region(bmp.bitmap, size, 0) < 0); + WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0); /* And if we free one we can then allocate another */ msi_bitmap_free_hwirqs(&bmp, size / 2, 1); - check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2); + WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2); + + /* Free most of them for the alignment tests */ + msi_bitmap_free_hwirqs(&bmp, 3, size - 3); /* Check we get a naturally aligned offset */ - check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0); - check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 2); + WARN_ON(rc < 0 && rc % 2 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 4); + WARN_ON(rc < 0 && rc % 4 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 8); + WARN_ON(rc < 0 && rc % 8 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 9); + WARN_ON(rc < 0 && rc % 16 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 3); + WARN_ON(rc < 0 && rc % 4 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 7); + WARN_ON(rc < 0 && rc % 8 != 0); + rc = msi_bitmap_alloc_hwirqs(&bmp, 121); + WARN_ON(rc < 0 && rc % 128 != 0); msi_bitmap_free(&bmp); - /* Clients may check bitmap == NULL for "not-allocated" */ - check(bmp.bitmap == NULL); + /* Clients may WARN_ON bitmap == NULL for "not-allocated" */ + WARN_ON(bmp.bitmap != NULL); kfree(bmp.bitmap); } @@ -219,14 +224,13 @@ static void __init test_of_node(void) of_node_init(&of_node); of_node.full_name = node_name; - check(0 == msi_bitmap_alloc(&bmp, size, &of_node)); + WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node)); /* No msi-available-ranges, so expect > 0 */ - check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0); + WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0); /* Should all still be free */ - check(0 == bitmap_find_free_region(bmp.bitmap, size, - get_count_order(size))); + WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size))); bitmap_release_region(bmp.bitmap, 0, get_count_order(size)); /* Now create a fake msi-available-ranges property */ @@ -240,11 +244,11 @@ static void __init test_of_node(void) of_node.properties = ∝ /* msi-available-ranges, so expect == 0 */ - check(msi_bitmap_reserve_dt_hwirqs(&bmp) == 0); + WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp)); /* Check we got the expected result */ - check(0 == bitmap_parselist(expected_str, expected, size)); - check(bitmap_equal(expected, bmp.bitmap, size)); + WARN_ON(bitmap_parselist(expected_str, expected, size)); + WARN_ON(!bitmap_equal(expected, bmp.bitmap, size)); msi_bitmap_free(&bmp); kfree(bmp.bitmap); diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 940ac49198db..4197c89c52d4 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -286,7 +286,8 @@ #define __NR_seccomp 348 #define __NR_getrandom 349 #define __NR_memfd_create 350 -#define NR_syscalls 351 +#define __NR_bpf 351 +#define NR_syscalls 352 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index faf6caa510dc..c4f7a3d655b8 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -217,3 +217,4 @@ COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs) COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags) +COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 6fe886ac2db5..9f7087fd58de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -359,3 +359,4 @@ SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2) SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp) SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom) SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */ +SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf) diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 956f4f7a591c..f6b3cd056ec2 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -5,13 +5,13 @@ * Author(s): Jan Willeke, */ -#include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/uprobes.h> #include <linux/compat.h> #include <linux/kdebug.h> #include <asm/switch_to.h> #include <asm/facility.h> +#include <asm/kprobes.h> #include <asm/dis.h> #include "entry.h" diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c index c5d64a099719..ae90e1ae3607 100644 --- a/arch/s390/lib/probes.c +++ b/arch/s390/lib/probes.c @@ -4,7 +4,7 @@ * Copyright IBM Corp. 2014 */ -#include <linux/kprobes.h> +#include <asm/kprobes.h> #include <asm/dis.h> int probe_is_prohibited_opcode(u16 *insn) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 296b61a4af59..1b79ca67392f 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -656,7 +656,7 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) } pgste_set_unlock(ptep, pgste); out_pte: - pte_unmap_unlock(*ptep, ptl); + pte_unmap_unlock(ptep, ptl); } EXPORT_SYMBOL_GPL(__gmap_zap); @@ -943,7 +943,7 @@ retry: } if (!(pte_val(*ptep) & _PAGE_INVALID) && (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(*ptep, ptl); + pte_unmap_unlock(ptep, ptl); if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { up_read(&mm->mmap_sem); return -EFAULT; @@ -974,7 +974,7 @@ retry: pgste_val(new) |= PGSTE_UC_BIT; pgste_set_unlock(ptep, new); - pte_unmap_unlock(*ptep, ptl); + pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); return 0; } diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index aa29198fca3e..7440c58b8e6f 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -9,26 +9,21 @@ * published by the Free Software Foundation. */ -#include <linux/module.h> -#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/err.h> #include <linux/init.h> +#include <linux/kernel.h> +#include <linux/leds.h> #include <linux/list.h> +#include <linux/module.h> +#include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/device.h> #include <linux/timer.h> -#include <linux/err.h> -#include <linux/ctype.h> -#include <linux/leds.h> #include "leds.h" static struct class *leds_class; -static void led_update_brightness(struct led_classdev *led_cdev) -{ - if (led_cdev->brightness_get) - led_cdev->brightness = led_cdev->brightness_get(led_cdev); -} - static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -59,14 +54,14 @@ static ssize_t brightness_store(struct device *dev, } static DEVICE_ATTR_RW(brightness); -static ssize_t led_max_brightness_show(struct device *dev, +static ssize_t max_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); return sprintf(buf, "%u\n", led_cdev->max_brightness); } -static DEVICE_ATTR(max_brightness, 0444, led_max_brightness_show, NULL); +static DEVICE_ATTR_RO(max_brightness); #ifdef CONFIG_LEDS_TRIGGERS static DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store); diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 71b40d3bf776..aaa8eba9099f 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -12,10 +12,11 @@ */ #include <linux/kernel.h> +#include <linux/leds.h> #include <linux/list.h> #include <linux/module.h> +#include <linux/mutex.h> #include <linux/rwsem.h> -#include <linux/leds.h> #include "leds.h" DECLARE_RWSEM(leds_list_lock); @@ -126,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev, __led_set_brightness(led_cdev, brightness); } EXPORT_SYMBOL(led_set_brightness); + +int led_update_brightness(struct led_classdev *led_cdev) +{ + int ret = 0; + + if (led_cdev->brightness_get) { + ret = led_cdev->brightness_get(led_cdev); + if (ret >= 0) { + led_cdev->brightness = ret; + return 0; + } + } + + return ret; +} +EXPORT_SYMBOL(led_update_brightness); diff --git a/drivers/leds/leds-gpio-register.c b/drivers/leds/leds-gpio-register.c index 1c4ed5510f35..75717ba68ae0 100644 --- a/drivers/leds/leds-gpio-register.c +++ b/drivers/leds/leds-gpio-register.c @@ -7,9 +7,9 @@ * Free Software Foundation. */ #include <linux/err.h> +#include <linux/leds.h> #include <linux/platform_device.h> #include <linux/slab.h> -#include <linux/leds.h> /** * gpio_led_register_device - register a gpio-led device @@ -28,6 +28,9 @@ struct platform_device *__init gpio_led_register_device( struct platform_device *ret; struct gpio_led_platform_data _pdata = *pdata; + if (!pdata->num_leds) + return ERR_PTR(-EINVAL); + _pdata.leds = kmemdup(pdata->leds, pdata->num_leds * sizeof(*pdata->leds), GFP_KERNEL); if (!_pdata.leds) diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 57ff20fecf57..b4518c8751c8 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -10,17 +10,17 @@ * published by the Free Software Foundation. * */ -#include <linux/kernel.h> -#include <linux/platform_device.h> +#include <linux/err.h> #include <linux/gpio.h> +#include <linux/kernel.h> #include <linux/leds.h> +#include <linux/module.h> #include <linux/of.h> -#include <linux/of_platform.h> #include <linux/of_gpio.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/module.h> -#include <linux/err.h> struct gpio_led_data { struct led_classdev cdev; @@ -36,7 +36,7 @@ struct gpio_led_data { static void gpio_led_work(struct work_struct *work) { - struct gpio_led_data *led_dat = + struct gpio_led_data *led_dat = container_of(work, struct gpio_led_data, work); if (led_dat->blinking) { @@ -235,14 +235,12 @@ static struct gpio_leds_priv *gpio_leds_create_of(struct platform_device *pdev) } #endif /* CONFIG_OF_GPIO */ - static int gpio_led_probe(struct platform_device *pdev) { struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev); struct gpio_leds_priv *priv; int i, ret = 0; - if (pdata && pdata->num_leds) { priv = devm_kzalloc(&pdev->dev, sizeof_gpio_leds_priv(pdata->num_leds), diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c index 8e1abdcd4c9d..53144fb96167 100644 --- a/drivers/leds/leds-lp3944.c +++ b/drivers/leds/leds-lp3944.c @@ -335,7 +335,8 @@ static int lp3944_configure(struct i2c_client *client, } /* to expose the default value to userspace */ - led->ldev.brightness = led->status; + led->ldev.brightness = + (enum led_brightness) led->status; /* Set the default led status */ err = lp3944_led_set(led, led->status); diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c index 35812e3a37f2..c86c41826476 100644 --- a/drivers/leds/trigger/ledtrig-gpio.c +++ b/drivers/leds/trigger/ledtrig-gpio.c @@ -48,7 +48,7 @@ static void gpio_trig_work(struct work_struct *work) if (!gpio_data->gpio) return; - tmp = gpio_get_value(gpio_data->gpio); + tmp = gpio_get_value_cansleep(gpio_data->gpio); if (gpio_data->inverted) tmp = !tmp; diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index b800783800a3..ef2dd2e4754b 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -83,6 +83,7 @@ config PWM_BFIN config PWM_CLPS711X tristate "CLPS711X PWM support" depends on ARCH_CLPS711X || COMPILE_TEST + depends on HAS_IOMEM help Generic PWM framework driver for Cirrus Logic CLPS711X. @@ -101,6 +102,7 @@ config PWM_EP93XX config PWM_FSL_FTM tristate "Freescale FlexTimer Module (FTM) PWM support" depends on OF + select REGMAP_MMIO help Generic FTM PWM framework driver for Freescale VF610 and Layerscape LS-1 SoCs. @@ -149,7 +151,7 @@ config PWM_LPC32XX config PWM_LPSS tristate "Intel LPSS PWM support" - depends on ACPI + depends on X86 help Generic PWM framework driver for Intel Low Power Subsystem PWM controller. @@ -157,6 +159,24 @@ config PWM_LPSS To compile this driver as a module, choose M here: the module will be called pwm-lpss. +config PWM_LPSS_PCI + tristate "Intel LPSS PWM PCI driver" + depends on PWM_LPSS && PCI + help + The PCI driver for Intel Low Power Subsystem PWM controller. + + To compile this driver as a module, choose M here: the module + will be called pwm-lpss-pci. + +config PWM_LPSS_PLATFORM + tristate "Intel LPSS PWM platform driver" + depends on PWM_LPSS && ACPI + help + The platform driver for Intel Low Power Subsystem PWM controller. + + To compile this driver as a module, choose M here: the module + will be called pwm-lpss-platform. + config PWM_MXS tristate "Freescale MXS PWM support" depends on ARCH_MXS && OF diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index f8c577d41091..c458606c3755 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -13,6 +13,8 @@ obj-$(CONFIG_PWM_JZ4740) += pwm-jz4740.o obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o obj-$(CONFIG_PWM_LPSS) += pwm-lpss.o +obj-$(CONFIG_PWM_LPSS_PCI) += pwm-lpss-pci.o +obj-$(CONFIG_PWM_LPSS_PLATFORM) += pwm-lpss-platform.o obj-$(CONFIG_PWM_MXS) += pwm-mxs.o obj-$(CONFIG_PWM_PCA9685) += pwm-pca9685.o obj-$(CONFIG_PWM_PUV3) += pwm-puv3.o diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index d2c35920ff08..966497d10c6e 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -236,7 +236,7 @@ int pwmchip_add(struct pwm_chip *chip) int ret; if (!chip || !chip->dev || !chip->ops || !chip->ops->config || - !chip->ops->enable || !chip->ops->disable) + !chip->ops->enable || !chip->ops->disable || !chip->npwm) return -EINVAL; mutex_lock(&pwm_lock); @@ -602,12 +602,9 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) struct pwm_device *pwm = ERR_PTR(-EPROBE_DEFER); const char *dev_id = dev ? dev_name(dev) : NULL; struct pwm_chip *chip = NULL; - unsigned int index = 0; unsigned int best = 0; - struct pwm_lookup *p; + struct pwm_lookup *p, *chosen = NULL; unsigned int match; - unsigned int period; - enum pwm_polarity polarity; /* look up via DT first */ if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node) @@ -653,10 +650,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) } if (match > best) { - chip = pwmchip_find_by_name(p->provider); - index = p->index; - period = p->period; - polarity = p->polarity; + chosen = p; if (match != 3) best = match; @@ -665,17 +659,22 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id) } } - mutex_unlock(&pwm_lookup_lock); + if (!chosen) + goto out; - if (chip) - pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id); - if (IS_ERR(pwm)) - return pwm; + chip = pwmchip_find_by_name(chosen->provider); + if (!chip) + goto out; - pwm_set_period(pwm, period); - pwm_set_polarity(pwm, polarity); + pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id); + if (IS_ERR(pwm)) + goto out; + pwm_set_period(pwm, chosen->period); + pwm_set_polarity(pwm, chosen->polarity); +out: + mutex_unlock(&pwm_lookup_lock); return pwm; } EXPORT_SYMBOL_GPL(pwm_get); diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index 6e700a541ca3..d3c22de9ee47 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -102,7 +102,7 @@ static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); - unsigned long clk_rate, prd, dty; + unsigned long prd, dty; unsigned long long div; unsigned int pres = 0; u32 val; @@ -113,20 +113,18 @@ static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return -EBUSY; } - clk_rate = clk_get_rate(atmel_pwm->clk); - div = clk_rate; + /* Calculate the period cycles and prescale value */ + div = (unsigned long long)clk_get_rate(atmel_pwm->clk) * period_ns; + do_div(div, NSEC_PER_SEC); - /* Calculate the period cycles */ while (div > PWM_MAX_PRD) { - div = clk_rate / (1 << pres); - div = div * period_ns; - /* 1/Hz = 100000000 ns */ - do_div(div, 1000000000); - - if (pres++ > PRD_MAX_PRES) { - dev_err(chip->dev, "pres exceeds the maximum value\n"); - return -EINVAL; - } + div >>= 1; + pres++; + } + + if (pres > PRD_MAX_PRES) { + dev_err(chip->dev, "pres exceeds the maximum value\n"); + return -EINVAL; } /* Calculate the duty cycles */ diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index a18bc8fea385..0f2cc7ef7784 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -18,14 +18,14 @@ #include <linux/of_address.h> #include <linux/platform_device.h> #include <linux/pwm.h> +#include <linux/regmap.h> #include <linux/slab.h> #define FTM_SC 0x00 -#define FTM_SC_CLK_MASK 0x3 -#define FTM_SC_CLK_SHIFT 3 -#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_SHIFT) +#define FTM_SC_CLK_MASK_SHIFT 3 +#define FTM_SC_CLK_MASK (3 << FTM_SC_CLK_MASK_SHIFT) +#define FTM_SC_CLK(c) (((c) + 1) << FTM_SC_CLK_MASK_SHIFT) #define FTM_SC_PS_MASK 0x7 -#define FTM_SC_PS_SHIFT 0 #define FTM_CNT 0x04 #define FTM_MOD 0x08 @@ -83,7 +83,7 @@ struct fsl_pwm_chip { unsigned int cnt_select; unsigned int clk_ps; - void __iomem *base; + struct regmap *regmap; int period_ns; @@ -219,10 +219,11 @@ static unsigned long fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc, unsigned long period_ns, unsigned long duty_ns) { - unsigned long long val, duty; + unsigned long long duty; + u32 val; - val = readl(fpc->base + FTM_MOD); - duty = duty_ns * (val + 1); + regmap_read(fpc->regmap, FTM_MOD, &val); + duty = (unsigned long long)duty_ns * (val + 1); do_div(duty, period_ns); return (unsigned long)duty; @@ -232,7 +233,7 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct fsl_pwm_chip *fpc = to_fsl_chip(chip); - u32 val, period, duty; + u32 period, duty; mutex_lock(&fpc->lock); @@ -257,11 +258,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return -EINVAL; } - val = readl(fpc->base + FTM_SC); - val &= ~(FTM_SC_PS_MASK << FTM_SC_PS_SHIFT); - val |= fpc->clk_ps; - writel(val, fpc->base + FTM_SC); - writel(period - 1, fpc->base + FTM_MOD); + regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_PS_MASK, + fpc->clk_ps); + regmap_write(fpc->regmap, FTM_MOD, period - 1); fpc->period_ns = period_ns; } @@ -270,8 +269,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns); - writel(FTM_CSC_MSB | FTM_CSC_ELSB, fpc->base + FTM_CSC(pwm->hwpwm)); - writel(duty, fpc->base + FTM_CV(pwm->hwpwm)); + regmap_write(fpc->regmap, FTM_CSC(pwm->hwpwm), + FTM_CSC_MSB | FTM_CSC_ELSB); + regmap_write(fpc->regmap, FTM_CV(pwm->hwpwm), duty); return 0; } @@ -283,31 +283,28 @@ static int fsl_pwm_set_polarity(struct pwm_chip *chip, struct fsl_pwm_chip *fpc = to_fsl_chip(chip); u32 val; - val = readl(fpc->base + FTM_POL); + regmap_read(fpc->regmap, FTM_POL, &val); if (polarity == PWM_POLARITY_INVERSED) val |= BIT(pwm->hwpwm); else val &= ~BIT(pwm->hwpwm); - writel(val, fpc->base + FTM_POL); + regmap_write(fpc->regmap, FTM_POL, val); return 0; } static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc) { - u32 val; int ret; if (fpc->use_count != 0) return 0; /* select counter clock source */ - val = readl(fpc->base + FTM_SC); - val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT); - val |= FTM_SC_CLK(fpc->cnt_select); - writel(val, fpc->base + FTM_SC); + regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK, + FTM_SC_CLK(fpc->cnt_select)); ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]); if (ret) @@ -327,13 +324,10 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc) static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct fsl_pwm_chip *fpc = to_fsl_chip(chip); - u32 val; int ret; mutex_lock(&fpc->lock); - val = readl(fpc->base + FTM_OUTMASK); - val &= ~BIT(pwm->hwpwm); - writel(val, fpc->base + FTM_OUTMASK); + regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), 0); ret = fsl_counter_clock_enable(fpc); mutex_unlock(&fpc->lock); @@ -343,8 +337,6 @@ static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc) { - u32 val; - /* * already disabled, do nothing */ @@ -356,9 +348,7 @@ static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc) return; /* no users left, disable PWM counter clock */ - val = readl(fpc->base + FTM_SC); - val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT); - writel(val, fpc->base + FTM_SC); + regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK, 0); clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]); clk_disable_unprepare(fpc->clk[fpc->cnt_select]); @@ -370,14 +360,12 @@ static void fsl_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) u32 val; mutex_lock(&fpc->lock); - val = readl(fpc->base + FTM_OUTMASK); - val |= BIT(pwm->hwpwm); - writel(val, fpc->base + FTM_OUTMASK); + regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), + BIT(pwm->hwpwm)); fsl_counter_clock_disable(fpc); - val = readl(fpc->base + FTM_OUTMASK); - + regmap_read(fpc->regmap, FTM_OUTMASK, &val); if ((val & 0xFF) == 0xFF) fpc->period_ns = 0; @@ -402,19 +390,28 @@ static int fsl_pwm_init(struct fsl_pwm_chip *fpc) if (ret) return ret; - writel(0x00, fpc->base + FTM_CNTIN); - writel(0x00, fpc->base + FTM_OUTINIT); - writel(0xFF, fpc->base + FTM_OUTMASK); + regmap_write(fpc->regmap, FTM_CNTIN, 0x00); + regmap_write(fpc->regmap, FTM_OUTINIT, 0x00); + regmap_write(fpc->regmap, FTM_OUTMASK, 0xFF); clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_SYS]); return 0; } +static const struct regmap_config fsl_pwm_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + + .max_register = FTM_PWMLOAD, +}; + static int fsl_pwm_probe(struct platform_device *pdev) { struct fsl_pwm_chip *fpc; struct resource *res; + void __iomem *base; int ret; fpc = devm_kzalloc(&pdev->dev, sizeof(*fpc), GFP_KERNEL); @@ -426,9 +423,16 @@ static int fsl_pwm_probe(struct platform_device *pdev) fpc->chip.dev = &pdev->dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - fpc->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(fpc->base)) - return PTR_ERR(fpc->base); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + fpc->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base, + &fsl_pwm_regmap_config); + if (IS_ERR(fpc->regmap)) { + dev_err(&pdev->dev, "regmap init failed\n"); + return PTR_ERR(fpc->regmap); + } fpc->clk[FSL_PWM_CLK_SYS] = devm_clk_get(&pdev->dev, "ftm_sys"); if (IS_ERR(fpc->clk[FSL_PWM_CLK_SYS])) { diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c index 5449d9150d40..f8b5f109c1ab 100644 --- a/drivers/pwm/pwm-imx.c +++ b/drivers/pwm/pwm-imx.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/err.h> #include <linux/clk.h> +#include <linux/delay.h> #include <linux/io.h> #include <linux/pwm.h> #include <linux/of.h> @@ -21,24 +22,30 @@ /* i.MX1 and i.MX21 share the same PWM function block: */ -#define MX1_PWMC 0x00 /* PWM Control Register */ -#define MX1_PWMS 0x04 /* PWM Sample Register */ -#define MX1_PWMP 0x08 /* PWM Period Register */ +#define MX1_PWMC 0x00 /* PWM Control Register */ +#define MX1_PWMS 0x04 /* PWM Sample Register */ +#define MX1_PWMP 0x08 /* PWM Period Register */ -#define MX1_PWMC_EN (1 << 4) +#define MX1_PWMC_EN (1 << 4) /* i.MX27, i.MX31, i.MX35 share the same PWM function block: */ -#define MX3_PWMCR 0x00 /* PWM Control Register */ -#define MX3_PWMSAR 0x0C /* PWM Sample Register */ -#define MX3_PWMPR 0x10 /* PWM Period Register */ -#define MX3_PWMCR_PRESCALER(x) (((x - 1) & 0xFFF) << 4) -#define MX3_PWMCR_DOZEEN (1 << 24) -#define MX3_PWMCR_WAITEN (1 << 23) +#define MX3_PWMCR 0x00 /* PWM Control Register */ +#define MX3_PWMSR 0x04 /* PWM Status Register */ +#define MX3_PWMSAR 0x0C /* PWM Sample Register */ +#define MX3_PWMPR 0x10 /* PWM Period Register */ +#define MX3_PWMCR_PRESCALER(x) ((((x) - 1) & 0xFFF) << 4) +#define MX3_PWMCR_DOZEEN (1 << 24) +#define MX3_PWMCR_WAITEN (1 << 23) #define MX3_PWMCR_DBGEN (1 << 22) -#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16) -#define MX3_PWMCR_CLKSRC_IPG (1 << 16) -#define MX3_PWMCR_EN (1 << 0) +#define MX3_PWMCR_CLKSRC_IPG_HIGH (2 << 16) +#define MX3_PWMCR_CLKSRC_IPG (1 << 16) +#define MX3_PWMCR_SWR (1 << 3) +#define MX3_PWMCR_EN (1 << 0) +#define MX3_PWMSR_FIFOAV_4WORDS 0x4 +#define MX3_PWMSR_FIFOAV_MASK 0x7 + +#define MX3_PWM_SWR_LOOP 5 struct imx_chip { struct clk *clk_per; @@ -103,9 +110,43 @@ static int imx_pwm_config_v2(struct pwm_chip *chip, struct pwm_device *pwm, int duty_ns, int period_ns) { struct imx_chip *imx = to_imx_chip(chip); + struct device *dev = chip->dev; unsigned long long c; unsigned long period_cycles, duty_cycles, prescale; - u32 cr; + unsigned int period_ms; + bool enable = test_bit(PWMF_ENABLED, &pwm->flags); + int wait_count = 0, fifoav; + u32 cr, sr; + + /* + * i.MX PWMv2 has a 4-word sample FIFO. + * In order to avoid FIFO overflow issue, we do software reset + * to clear all sample FIFO if the controller is disabled or + * wait for a full PWM cycle to get a relinquished FIFO slot + * when the controller is enabled and the FIFO is fully loaded. + */ + if (enable) { + sr = readl(imx->mmio_base + MX3_PWMSR); + fifoav = sr & MX3_PWMSR_FIFOAV_MASK; + if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) { + period_ms = DIV_ROUND_UP(pwm->period, NSEC_PER_MSEC); + msleep(period_ms); + + sr = readl(imx->mmio_base + MX3_PWMSR); + if (fifoav == (sr & MX3_PWMSR_FIFOAV_MASK)) + dev_warn(dev, "there is no free FIFO slot\n"); + } + } else { + writel(MX3_PWMCR_SWR, imx->mmio_base + MX3_PWMCR); + do { + usleep_range(200, 1000); + cr = readl(imx->mmio_base + MX3_PWMCR); + } while ((cr & MX3_PWMCR_SWR) && + (wait_count++ < MX3_PWM_SWR_LOOP)); + + if (cr & MX3_PWMCR_SWR) + dev_warn(dev, "software reset timeout\n"); + } c = clk_get_rate(imx->clk_per); c = c * period_ns; @@ -135,7 +176,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip, MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN | MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH; - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (enable) cr |= MX3_PWMCR_EN; writel(cr, imx->mmio_base + MX3_PWMCR); diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c new file mode 100644 index 000000000000..cf20d2beacdd --- /dev/null +++ b/drivers/pwm/pwm-lpss-pci.c @@ -0,0 +1,64 @@ +/* + * Intel Low Power Subsystem PWM controller PCI driver + * + * Copyright (C) 2014, Intel Corporation + * + * Derived from the original pwm-lpss.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "pwm-lpss.h" + +static int pwm_lpss_probe_pci(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + const struct pwm_lpss_boardinfo *info; + struct pwm_lpss_chip *lpwm; + int err; + + err = pcim_enable_device(pdev); + if (err < 0) + return err; + + info = (struct pwm_lpss_boardinfo *)id->driver_data; + lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info); + if (IS_ERR(lpwm)) + return PTR_ERR(lpwm); + + pci_set_drvdata(pdev, lpwm); + return 0; +} + +static void pwm_lpss_remove_pci(struct pci_dev *pdev) +{ + struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev); + + pwm_lpss_remove(lpwm); +} + +static const struct pci_device_id pwm_lpss_pci_ids[] = { + { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, + { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, + { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, + { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, + { }, +}; +MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids); + +static struct pci_driver pwm_lpss_driver_pci = { + .name = "pwm-lpss", + .id_table = pwm_lpss_pci_ids, + .probe = pwm_lpss_probe_pci, + .remove = pwm_lpss_remove_pci, +}; +module_pci_driver(pwm_lpss_driver_pci); + +MODULE_DESCRIPTION("PWM PCI driver for Intel LPSS"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c new file mode 100644 index 000000000000..18a9c880a76d --- /dev/null +++ b/drivers/pwm/pwm-lpss-platform.c @@ -0,0 +1,68 @@ +/* + * Intel Low Power Subsystem PWM controller driver + * + * Copyright (C) 2014, Intel Corporation + * + * Derived from the original pwm-lpss.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/acpi.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include "pwm-lpss.h" + +static int pwm_lpss_probe_platform(struct platform_device *pdev) +{ + const struct pwm_lpss_boardinfo *info; + const struct acpi_device_id *id; + struct pwm_lpss_chip *lpwm; + struct resource *r; + + id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); + if (!id) + return -ENODEV; + + info = (const struct pwm_lpss_boardinfo *)id->driver_data; + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + lpwm = pwm_lpss_probe(&pdev->dev, r, info); + if (IS_ERR(lpwm)) + return PTR_ERR(lpwm); + + platform_set_drvdata(pdev, lpwm); + return 0; +} + +static int pwm_lpss_remove_platform(struct platform_device *pdev) +{ + struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev); + + return pwm_lpss_remove(lpwm); +} + +static const struct acpi_device_id pwm_lpss_acpi_match[] = { + { "80860F09", (unsigned long)&pwm_lpss_byt_info }, + { "80862288", (unsigned long)&pwm_lpss_bsw_info }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match); + +static struct platform_driver pwm_lpss_driver_platform = { + .driver = { + .name = "pwm-lpss", + .acpi_match_table = pwm_lpss_acpi_match, + }, + .probe = pwm_lpss_probe_platform, + .remove = pwm_lpss_remove_platform, +}; +module_platform_driver(pwm_lpss_driver_platform); + +MODULE_DESCRIPTION("PWM platform driver for Intel LPSS"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:pwm-lpss"); diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4df994f72d96..e9798253a16f 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -13,15 +13,11 @@ * published by the Free Software Foundation. */ -#include <linux/acpi.h> -#include <linux/device.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/pwm.h> -#include <linux/platform_device.h> -#include <linux/pci.h> -static int pci_drv, plat_drv; /* So we know which drivers registered */ +#include "pwm-lpss.h" #define PWM 0x00000000 #define PWM_ENABLE BIT(31) @@ -39,14 +35,17 @@ struct pwm_lpss_chip { unsigned long clk_rate; }; -struct pwm_lpss_boardinfo { - unsigned long clk_rate; +/* BayTrail */ +const struct pwm_lpss_boardinfo pwm_lpss_byt_info = { + .clk_rate = 25000000 }; +EXPORT_SYMBOL_GPL(pwm_lpss_byt_info); -/* BayTrail */ -static const struct pwm_lpss_boardinfo byt_info = { - 25000000 +/* Braswell */ +const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = { + .clk_rate = 19200000 }; +EXPORT_SYMBOL_GPL(pwm_lpss_bsw_info); static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip) { @@ -118,9 +117,8 @@ static const struct pwm_ops pwm_lpss_ops = { .owner = THIS_MODULE, }; -static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, - struct resource *r, - const struct pwm_lpss_boardinfo *info) +struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, + const struct pwm_lpss_boardinfo *info) { struct pwm_lpss_chip *lpwm; int ret; @@ -147,8 +145,9 @@ static struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, return lpwm; } +EXPORT_SYMBOL_GPL(pwm_lpss_probe); -static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) +int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) { u32 ctrl; @@ -157,114 +156,8 @@ static int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) return pwmchip_remove(&lpwm->chip); } - -static int pwm_lpss_probe_pci(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - const struct pwm_lpss_boardinfo *info; - struct pwm_lpss_chip *lpwm; - int err; - - err = pci_enable_device(pdev); - if (err < 0) - return err; - - info = (struct pwm_lpss_boardinfo *)id->driver_data; - lpwm = pwm_lpss_probe(&pdev->dev, &pdev->resource[0], info); - if (IS_ERR(lpwm)) - return PTR_ERR(lpwm); - - pci_set_drvdata(pdev, lpwm); - return 0; -} - -static void pwm_lpss_remove_pci(struct pci_dev *pdev) -{ - struct pwm_lpss_chip *lpwm = pci_get_drvdata(pdev); - - pwm_lpss_remove(lpwm); - pci_disable_device(pdev); -} - -static struct pci_device_id pwm_lpss_pci_ids[] = { - { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&byt_info}, - { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&byt_info}, - { }, -}; -MODULE_DEVICE_TABLE(pci, pwm_lpss_pci_ids); - -static struct pci_driver pwm_lpss_driver_pci = { - .name = "pwm-lpss", - .id_table = pwm_lpss_pci_ids, - .probe = pwm_lpss_probe_pci, - .remove = pwm_lpss_remove_pci, -}; - -static int pwm_lpss_probe_platform(struct platform_device *pdev) -{ - const struct pwm_lpss_boardinfo *info; - const struct acpi_device_id *id; - struct pwm_lpss_chip *lpwm; - struct resource *r; - - id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); - if (!id) - return -ENODEV; - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - info = (struct pwm_lpss_boardinfo *)id->driver_data; - lpwm = pwm_lpss_probe(&pdev->dev, r, info); - if (IS_ERR(lpwm)) - return PTR_ERR(lpwm); - - platform_set_drvdata(pdev, lpwm); - return 0; -} - -static int pwm_lpss_remove_platform(struct platform_device *pdev) -{ - struct pwm_lpss_chip *lpwm = platform_get_drvdata(pdev); - - return pwm_lpss_remove(lpwm); -} - -static const struct acpi_device_id pwm_lpss_acpi_match[] = { - { "80860F09", (unsigned long)&byt_info }, - { }, -}; -MODULE_DEVICE_TABLE(acpi, pwm_lpss_acpi_match); - -static struct platform_driver pwm_lpss_driver_platform = { - .driver = { - .name = "pwm-lpss", - .acpi_match_table = pwm_lpss_acpi_match, - }, - .probe = pwm_lpss_probe_platform, - .remove = pwm_lpss_remove_platform, -}; - -static int __init pwm_init(void) -{ - pci_drv = pci_register_driver(&pwm_lpss_driver_pci); - plat_drv = platform_driver_register(&pwm_lpss_driver_platform); - if (pci_drv && plat_drv) - return pci_drv; - - return 0; -} -module_init(pwm_init); - -static void __exit pwm_exit(void) -{ - if (!pci_drv) - pci_unregister_driver(&pwm_lpss_driver_pci); - if (!plat_drv) - platform_driver_unregister(&pwm_lpss_driver_platform); -} -module_exit(pwm_exit); +EXPORT_SYMBOL_GPL(pwm_lpss_remove); MODULE_DESCRIPTION("PWM driver for Intel LPSS"); MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:pwm-lpss"); diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h new file mode 100644 index 000000000000..aa041bb1b67d --- /dev/null +++ b/drivers/pwm/pwm-lpss.h @@ -0,0 +1,32 @@ +/* + * Intel Low Power Subsystem PWM controller driver + * + * Copyright (C) 2014, Intel Corporation + * + * Derived from the original pwm-lpss.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __PWM_LPSS_H +#define __PWM_LPSS_H + +#include <linux/device.h> +#include <linux/pwm.h> + +struct pwm_lpss_chip; + +struct pwm_lpss_boardinfo { + unsigned long clk_rate; +}; + +extern const struct pwm_lpss_boardinfo pwm_lpss_byt_info; +extern const struct pwm_lpss_boardinfo pwm_lpss_bsw_info; + +struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, + const struct pwm_lpss_boardinfo *info); +int pwm_lpss_remove(struct pwm_lpss_chip *lpwm); + +#endif /* __PWM_LPSS_H */ diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index bdd8644c01cf..9442df244101 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -24,7 +24,9 @@ #define PWM_ENABLE (1 << 0) #define PWM_CONTINUOUS (1 << 1) #define PWM_DUTY_POSITIVE (1 << 3) +#define PWM_DUTY_NEGATIVE (0 << 3) #define PWM_INACTIVE_NEGATIVE (0 << 4) +#define PWM_INACTIVE_POSITIVE (1 << 4) #define PWM_OUTPUT_LEFT (0 << 5) #define PWM_LP_DISABLE (0 << 8) @@ -45,8 +47,10 @@ struct rockchip_pwm_regs { struct rockchip_pwm_data { struct rockchip_pwm_regs regs; unsigned int prescaler; + const struct pwm_ops *ops; - void (*set_enable)(struct pwm_chip *chip, bool enable); + void (*set_enable)(struct pwm_chip *chip, + struct pwm_device *pwm, bool enable); }; static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) @@ -54,7 +58,8 @@ static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c) return container_of(c, struct rockchip_pwm_chip, chip); } -static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, bool enable) +static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, + struct pwm_device *pwm, bool enable) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_CTRL_OUTPUT_EN | PWM_CTRL_TIMER_EN; @@ -70,14 +75,19 @@ static void rockchip_pwm_set_enable_v1(struct pwm_chip *chip, bool enable) writel_relaxed(val, pc->base + pc->data->regs.ctrl); } -static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, bool enable) +static void rockchip_pwm_set_enable_v2(struct pwm_chip *chip, + struct pwm_device *pwm, bool enable) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); u32 enable_conf = PWM_OUTPUT_LEFT | PWM_LP_DISABLE | PWM_ENABLE | - PWM_CONTINUOUS | PWM_DUTY_POSITIVE | - PWM_INACTIVE_NEGATIVE; + PWM_CONTINUOUS; u32 val; + if (pwm->polarity == PWM_POLARITY_INVERSED) + enable_conf |= PWM_DUTY_NEGATIVE | PWM_INACTIVE_POSITIVE; + else + enable_conf |= PWM_DUTY_POSITIVE | PWM_INACTIVE_NEGATIVE; + val = readl_relaxed(pc->base + pc->data->regs.ctrl); if (enable) @@ -124,6 +134,19 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } +static int rockchip_pwm_set_polarity(struct pwm_chip *chip, + struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + /* + * No action needed here because pwm->polarity will be set by the core + * and the core will only change polarity when the PWM is not enabled. + * We'll handle things in set_enable(). + */ + + return 0; +} + static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); @@ -133,7 +156,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) if (ret) return ret; - pc->data->set_enable(chip, true); + pc->data->set_enable(chip, pwm, true); return 0; } @@ -142,18 +165,26 @@ static void rockchip_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); - pc->data->set_enable(chip, false); + pc->data->set_enable(chip, pwm, false); clk_disable(pc->clk); } -static const struct pwm_ops rockchip_pwm_ops = { +static const struct pwm_ops rockchip_pwm_ops_v1 = { .config = rockchip_pwm_config, .enable = rockchip_pwm_enable, .disable = rockchip_pwm_disable, .owner = THIS_MODULE, }; +static const struct pwm_ops rockchip_pwm_ops_v2 = { + .config = rockchip_pwm_config, + .set_polarity = rockchip_pwm_set_polarity, + .enable = rockchip_pwm_enable, + .disable = rockchip_pwm_disable, + .owner = THIS_MODULE, +}; + static const struct rockchip_pwm_data pwm_data_v1 = { .regs = { .duty = 0x04, @@ -162,6 +193,7 @@ static const struct rockchip_pwm_data pwm_data_v1 = { .ctrl = 0x0c, }, .prescaler = 2, + .ops = &rockchip_pwm_ops_v1, .set_enable = rockchip_pwm_set_enable_v1, }; @@ -173,6 +205,7 @@ static const struct rockchip_pwm_data pwm_data_v2 = { .ctrl = 0x0c, }, .prescaler = 1, + .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, }; @@ -184,6 +217,7 @@ static const struct rockchip_pwm_data pwm_data_vop = { .ctrl = 0x00, }, .prescaler = 1, + .ops = &rockchip_pwm_ops_v2, .set_enable = rockchip_pwm_set_enable_v2, }; @@ -227,10 +261,15 @@ static int rockchip_pwm_probe(struct platform_device *pdev) pc->data = id->data; pc->chip.dev = &pdev->dev; - pc->chip.ops = &rockchip_pwm_ops; + pc->chip.ops = pc->data->ops; pc->chip.base = -1; pc->chip.npwm = 1; + if (pc->data->ops->set_polarity) { + pc->chip.of_xlate = of_pwm_xlate_with_flags; + pc->chip.of_pwm_n_cells = 3; + } + ret = pwmchip_add(&pc->chip); if (ret < 0) { clk_unprepare(pc->clk); diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index dc24ecfac2d1..db2cb1f8a1b5 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -105,7 +105,7 @@ config SCLP_ASYNC config HMC_DRV def_tristate m prompt "Support for file transfers from HMC drive CD/DVD-ROM" - depends on 64BIT + depends on S390 && 64BIT select CRC16 help This option enables support for file transfers from a Hardware diff --git a/fs/buffer.c b/fs/buffer.c index 9614adc7e754..6c48f20eddd4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, */ static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size, int sizebits) + pgoff_t index, int size, int sizebits, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct page *page; @@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, int ret = 0; /* Will call free_more_memory() */ gfp_t gfp_mask; - gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS; - gfp_mask |= __GFP_MOVABLE; + gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; + /* * XXX: __getblk_slow() can not really deal with failure and * will endlessly loop on improvised global reclaim. Prefer @@ -1060,7 +1060,7 @@ failed: * that page was dirty, the buffers are set dirty also. */ static int -grow_buffers(struct block_device *bdev, sector_t block, int size) +grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) { pgoff_t index; int sizebits; @@ -1087,11 +1087,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) } /* Create a page with the proper size buffers.. */ - return grow_dev_page(bdev, block, index, size, sizebits); + return grow_dev_page(bdev, block, index, size, sizebits, gfp); } -static struct buffer_head * -__getblk_slow(struct block_device *bdev, sector_t block, int size) +struct buffer_head * +__getblk_slow(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || @@ -1113,13 +1114,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) if (bh) return bh; - ret = grow_buffers(bdev, block, size); + ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; if (ret == 0) free_more_memory(); } } +EXPORT_SYMBOL(__getblk_slow); /* * The relationship between dirty buffers and dirty pages: @@ -1373,24 +1375,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__find_get_block); /* - * __getblk will locate (and, if necessary, create) the buffer_head + * __getblk_gfp() will locate (and, if necessary, create) the buffer_head * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() - * attempt is failing. FIXME, perhaps? + * __getblk_gfp() will lock up the machine if grow_dev_page's + * try_to_free_buffers() attempt is failing. FIXME, perhaps? */ struct buffer_head * -__getblk(struct block_device *bdev, sector_t block, unsigned size) +__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { struct buffer_head *bh = __find_get_block(bdev, block, size); might_sleep(); if (bh == NULL) - bh = __getblk_slow(bdev, block, size); + bh = __getblk_slow(bdev, block, size, gfp); return bh; } -EXPORT_SYMBOL(__getblk); +EXPORT_SYMBOL(__getblk_gfp); /* * Do async read-ahead on a buffer.. @@ -1406,24 +1409,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__breadahead); /** - * __bread() - reads a specified block and returns the bh + * __bread_gfp() - reads a specified block and returns the bh * @bdev: the block_device to read from * @block: number of block * @size: size (in bytes) to read - * + * @gfp: page allocation flag + * * Reads a specified block, and returns buffer head that contains it. + * The page cache can be allocated from non-movable area + * not to prevent page migration if you set gfp to zero. * It returns NULL if the block was unreadable. */ struct buffer_head * -__bread(struct block_device *bdev, sector_t block, unsigned size) +__bread_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { - struct buffer_head *bh = __getblk(bdev, block, size); + struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); if (likely(bh) && !buffer_uptodate(bh)) bh = __bread_slow(bh); return bh; } -EXPORT_SYMBOL(__bread); +EXPORT_SYMBOL(__bread_gfp); /* * invalidate_bh_lrus() is called rarely - but not only at unmount. @@ -2082,6 +2089,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; int i_size_changed = 0; copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -2101,6 +2109,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); /* * Don't mark the inode dirty under page lock. First, it unnecessarily * makes the holding time of page lock longer. Second, it forces lock diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 581ef40fbe90..83a6f497c4e0 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb, } /* Initializes an uninitialized block bitmap */ -static void ext4_init_block_bitmap(struct super_block *sb, +static int ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) @@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, count); } set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return; + return -EIO; } memset(bh->b_data, 0, sb->s_blocksize); @@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb, sb->s_blocksize * 8, bh->b_data); ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); ext4_group_desc_csum_set(sb, block_group, gdp); + return 0; } /* Return the number of free blocks in a block group. It is used when @@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - ext4_init_block_bitmap(sb, bh, block_group, desc); + int err; + + err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); + if (err) + ext4_error(sb, "Checksum bad for grp %u", block_group); return bh; } ext4_unlock_group(sb, block_group); @@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, * Account for the allocated meta blocks. We will never * fail EDQUOT for metdata, but we do account for it. */ - if (!(*errp) && - ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { + if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { spin_lock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); dquot_alloc_block_nofail(inode, diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 3285aa5a706a..b610779a958c 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, __u32 provided, calculated; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); @@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); @@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct ext4_sb_info *sbi = EXT4_SB(sb); int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); @@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0bb3f9ea0832..c24143ea9c08 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) &file->f_ra, file, index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; - bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); + bh = ext4_bread(NULL, inode, map.m_lblk, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); } - /* - * We ignore I/O errors on directories so users have a chance - * of recovering data when there's a bad sector - */ if (!bh) { if (!dir_has_error) { EXT4_ERROR_FILE(file, 0, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b0c225cdb52c..c55a1faaed58 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -572,15 +572,15 @@ enum { /* * The bit position of these flags must not overlap with any of the - * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), + * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(), * read_extent_tree_block(), ext4_split_extent_at(), * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be * caching the extents when reading from the extent tree while a * truncate or punch hole operation is in progress. */ -#define EXT4_EX_NOCACHE 0x0400 -#define EXT4_EX_FORCE_CACHE 0x0800 +#define EXT4_EX_NOCACHE 0x40000000 +#define EXT4_EX_FORCE_CACHE 0x20000000 /* * Flags used by ext4_free_blocks @@ -890,6 +890,7 @@ struct ext4_inode_info { struct ext4_es_tree i_es_tree; rwlock_t i_es_lock; struct list_head i_es_lru; + unsigned int i_es_all_nr; /* protected by i_es_lock */ unsigned int i_es_lru_nr; /* protected by i_es_lock */ unsigned long i_touch_when; /* jiffies of last accessing */ @@ -1174,6 +1175,9 @@ struct ext4_super_block { #define EXT4_MF_MNTDIR_SAMPLED 0x0001 #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ +/* Number of quota types we support */ +#define EXT4_MAXQUOTAS 2 + /* * fourth extended-fs super-block data in memory */ @@ -1237,7 +1241,7 @@ struct ext4_sb_info { u32 s_min_batch_time; struct block_device *journal_bdev; #ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ @@ -1330,8 +1334,7 @@ struct ext4_sb_info { /* Reclaim extents from extent status tree */ struct shrinker s_es_shrinker; struct list_head s_es_lru; - unsigned long s_es_last_sorted; - struct percpu_counter s_extent_cache_cnt; + struct ext4_es_stats s_es_stats; struct mb_cache *s_mb_cache; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; @@ -1399,7 +1402,6 @@ enum { EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ EXT4_STATE_NEWENTRY, /* File just added to dir */ - EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read nolocking */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ @@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); /* inode.c */ -struct buffer_head *ext4_getblk(handle_t *, struct inode *, - ext4_lblk_t, int, int *); -struct buffer_head *ext4_bread(handle_t *, struct inode *, - ext4_lblk_t, int, int *); +struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); +struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); int ext4_get_block(struct inode *inode, sector_t iblock, @@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle, #define CONVERT_INLINE_DATA 2 extern struct inode *ext4_iget(struct super_block *, unsigned long); +extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, @@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb, static inline int ext4_has_group_desc_csum(struct super_block *sb) { return EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM | - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); + EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || + (EXT4_SB(sb)->s_chksum_driver != NULL); } +static inline int ext4_has_metadata_csum(struct super_block *sb) +{ + WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + !EXT4_SB(sb)->s_chksum_driver); + + return (EXT4_SB(sb)->s_chksum_driver != NULL); +} static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | @@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2); extern int ext4_ext_insert_extent(handle_t *, struct inode *, - struct ext4_ext_path *, + struct ext4_ext_path **, struct ext4_extent *, int); -extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, - struct ext4_ext_path *, - int flags); +extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t, + struct ext4_ext_path **, + int flags); extern void ext4_ext_drop_refs(struct ext4_ext_path *); extern int ext4_ext_check_inode(struct inode *inode); extern int ext4_find_delalloc_range(struct inode *inode, ext4_lblk_t lblk_start, ext4_lblk_t lblk_end); extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); +extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); extern int ext4_ext_precache(struct inode *inode); extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); +extern int ext4_swap_extents(handle_t *handle, struct inode *inode1, + struct inode *inode2, ext4_lblk_t lblk1, + ext4_lblk_t lblk2, ext4_lblk_t count, + int mark_unwritten,int *err); /* move_extent.c */ extern void ext4_double_down_write_data_sem(struct inode *first, @@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); -extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent **extent); /* page-io.c */ extern int __init ext4_init_pageio(void); diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index a867f5ca9991..3c9381547094 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh) struct ext4_ext_path { ext4_fsblk_t p_block; __u16 p_depth; + __u16 p_maxdepth; struct ext4_extent *p_ext; struct ext4_extent_idx *p_idx; struct ext4_extent_header *p_hdr; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0074e0d23d6e..3445035c7e01 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, set_buffer_prio(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); - /* Errors can only happen if there is a bug */ - if (WARN_ON_ONCE(err)) { + /* Errors can only happen due to aborted journal or a nasty bug */ + if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) { ext4_journal_abort_handle(where, line, __func__, bh, handle, err); if (inode == NULL) { diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 17c00ff202f2..9c5b49fb281e 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -102,9 +102,9 @@ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif -#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) -#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) -#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) static inline int ext4_jbd2_credits_xattr(struct inode *inode) { diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74292a71b384..37043d0b2be8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode, { struct ext4_extent_tail *et; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; et = find_ext4_extent_tail(eh); @@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode, { struct ext4_extent_tail *et; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; et = find_ext4_extent_tail(eh); @@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode, static int ext4_split_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, struct ext4_map_blocks *map, int split_flag, int flags); static int ext4_split_extent_at(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, ext4_lblk_t split, int split_flag, int flags); @@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) return size; } +static inline int +ext4_force_split_extent_at(handle_t *handle, struct inode *inode, + struct ext4_ext_path **ppath, ext4_lblk_t lblk, + int nofail) +{ + struct ext4_ext_path *path = *ppath; + int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); + + return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? + EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, + EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | + (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); +} + /* * Calculate the number of metadata blocks needed * to allocate @blocks @@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, void ext4_ext_drop_refs(struct ext4_ext_path *path) { - int depth = path->p_depth; - int i; + int depth, i; + if (!path) + return; + depth = path->p_depth; for (i = 0; i <= depth; i++, path++) if (path->p_bh) { brelse(path->p_bh); @@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) } struct ext4_ext_path * -ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_path *path, int flags) +ext4_find_extent(struct inode *inode, ext4_lblk_t block, + struct ext4_ext_path **orig_path, int flags) { struct ext4_extent_header *eh; struct buffer_head *bh; - short int depth, i, ppos = 0, alloc = 0; + struct ext4_ext_path *path = orig_path ? *orig_path : NULL; + short int depth, i, ppos = 0; int ret; eh = ext_inode_hdr(inode); depth = ext_depth(inode); - /* account possible depth increase */ + if (path) { + ext4_ext_drop_refs(path); + if (depth > path[0].p_maxdepth) { + kfree(path); + *orig_path = path = NULL; + } + } if (!path) { + /* account possible depth increase */ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), GFP_NOFS); - if (!path) + if (unlikely(!path)) return ERR_PTR(-ENOMEM); - alloc = 1; + path[0].p_maxdepth = depth + 1; } path[0].p_hdr = eh; path[0].p_bh = NULL; @@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, bh = read_extent_tree_block(inode, path[ppos].p_block, --i, flags); - if (IS_ERR(bh)) { + if (unlikely(IS_ERR(bh))) { ret = PTR_ERR(bh); goto err; } @@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, err: ext4_ext_drop_refs(path); - if (alloc) - kfree(path); + kfree(path); + if (orig_path) + *orig_path = NULL; return ERR_PTR(ret); } @@ -1238,16 +1261,24 @@ cleanup: * just created block */ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, - unsigned int flags, - struct ext4_extent *newext) + unsigned int flags) { struct ext4_extent_header *neh; struct buffer_head *bh; - ext4_fsblk_t newblock; + ext4_fsblk_t newblock, goal = 0; + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err = 0; - newblock = ext4_ext_new_meta_block(handle, inode, NULL, - newext, &err, flags); + /* Try to prepend new index to old one */ + if (ext_depth(inode)) + goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); + if (goal > le32_to_cpu(es->s_first_data_block)) { + flags |= EXT4_MB_HINT_TRY_GOAL; + goal--; + } else + goal = ext4_inode_to_goal_block(inode); + newblock = ext4_new_meta_blocks(handle, inode, goal, flags, + NULL, &err); if (newblock == 0) return err; @@ -1314,9 +1345,10 @@ out: static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, unsigned int mb_flags, unsigned int gb_flags, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, struct ext4_extent *newext) { + struct ext4_ext_path *path = *ppath; struct ext4_ext_path *curp; int depth, i, err = 0; @@ -1340,23 +1372,21 @@ repeat: goto out; /* refill path */ - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, + path = ext4_find_extent(inode, (ext4_lblk_t)le32_to_cpu(newext->ee_block), - path, gb_flags); + ppath, gb_flags); if (IS_ERR(path)) err = PTR_ERR(path); } else { /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); + err = ext4_ext_grow_indepth(handle, inode, mb_flags); if (err) goto out; /* refill path */ - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, + path = ext4_find_extent(inode, (ext4_lblk_t)le32_to_cpu(newext->ee_block), - path, gb_flags); + ppath, gb_flags); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -1559,7 +1589,7 @@ found_extent: * allocated block. Thus, index entries have to be consistent * with leaves. */ -static ext4_lblk_t +ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path) { int depth; @@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, sizeof(struct ext4_extent_idx); s += sizeof(struct ext4_extent_header); + path[1].p_maxdepth = path[0].p_maxdepth; memcpy(path[0].p_hdr, path[1].p_hdr, s); path[0].p_depth = 0; path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + @@ -1896,9 +1927,10 @@ out: * creating new leaf in the no-space case. */ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, struct ext4_extent *newext, int gb_flags) { + struct ext4_ext_path *path = *ppath; struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ @@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_lblk_t next; int mb_flags = 0, unwritten; + if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); return -EIO; @@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* * Try to see whether we should rather test the extent on * right from ex, or from the left of ex. This is because - * ext4_ext_find_extent() can return either extent on the + * ext4_find_extent() can return either extent on the * left, or on the right from the searched position. This * will make merging more effective. */ @@ -2008,7 +2042,7 @@ prepend: if (next != EXT_MAX_BLOCKS) { ext_debug("next leaf block - %u\n", next); BUG_ON(npath != NULL); - npath = ext4_ext_find_extent(inode, next, NULL, 0); + npath = ext4_find_extent(inode, next, NULL, 0); if (IS_ERR(npath)) return PTR_ERR(npath); BUG_ON(npath->p_depth != path->p_depth); @@ -2028,9 +2062,9 @@ prepend: * We're gonna add a new leaf in the tree. */ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) - mb_flags = EXT4_MB_USE_RESERVED; + mb_flags |= EXT4_MB_USE_RESERVED; err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, - path, newext); + ppath, newext); if (err) goto cleanup; depth = ext_depth(inode); @@ -2108,10 +2142,8 @@ merge: err = ext4_ext_dirty(handle, inode, path + path->p_depth); cleanup: - if (npath) { - ext4_ext_drop_refs(npath); - kfree(npath); - } + ext4_ext_drop_refs(npath); + kfree(npath); return err; } @@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode, /* find extent for this block */ down_read(&EXT4_I(inode)->i_data_sem); - if (path && ext_depth(inode) != depth) { - /* depth was changed. we have to realloc path */ - kfree(path); - path = NULL; - } - - path = ext4_ext_find_extent(inode, block, path, 0); + path = ext4_find_extent(inode, block, &path, 0); if (IS_ERR(path)) { up_read(&EXT4_I(inode)->i_data_sem); err = PTR_ERR(path); @@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode, } ex = path[depth].p_ext; next = ext4_ext_next_allocated_block(path); - ext4_ext_drop_refs(path); flags = 0; exists = 0; @@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, block = es.es_lblk + es.es_len; } - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } - + ext4_ext_drop_refs(path); + kfree(path); return err; } @@ -2826,7 +2848,7 @@ again: ext4_lblk_t ee_block; /* find extent for this block */ - path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); + path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) { ext4_journal_stop(handle); return PTR_ERR(path); @@ -2854,24 +2876,14 @@ again: */ if (end >= ee_block && end < ee_block + ext4_ext_get_actual_len(ex) - 1) { - int split_flag = 0; - - if (ext4_ext_is_unwritten(ex)) - split_flag = EXT4_EXT_MARK_UNWRIT1 | - EXT4_EXT_MARK_UNWRIT2; - /* * Split the extent in two so that 'end' is the last * block in the first new extent. Also we should not * fail removing space due to ENOSPC so try to use * reserved block if that happens. */ - err = ext4_split_extent_at(handle, inode, path, - end + 1, split_flag, - EXT4_EX_NOCACHE | - EXT4_GET_BLOCKS_PRE_IO | - EXT4_GET_BLOCKS_METADATA_NOFAIL); - + err = ext4_force_split_extent_at(handle, inode, &path, + end + 1, 1); if (err < 0) goto out; } @@ -2893,7 +2905,7 @@ again: ext4_journal_stop(handle); return -ENOMEM; } - path[0].p_depth = depth; + path[0].p_maxdepth = path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); i = 0; @@ -3013,10 +3025,9 @@ again: out: ext4_ext_drop_refs(path); kfree(path); - if (err == -EAGAIN) { - path = NULL; + path = NULL; + if (err == -EAGAIN) goto again; - } ext4_journal_stop(handle); return err; @@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) */ static int ext4_split_extent_at(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, ext4_lblk_t split, int split_flag, int flags) { + struct ext4_ext_path *path = *ppath; ext4_fsblk_t newblock; ext4_lblk_t ee_block; struct ext4_extent *ex, newex, orig_ex, zero_ex; @@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle, if (split_flag & EXT4_EXT_MARK_UNWRIT2) ext4_ext_mark_unwritten(ex2); - err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { @@ -3271,11 +3283,12 @@ fix_extent_len: */ static int ext4_split_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, struct ext4_map_blocks *map, int split_flag, int flags) { + struct ext4_ext_path *path = *ppath; ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len, depth; @@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle, EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; - err = ext4_split_extent_at(handle, inode, path, + err = ext4_split_extent_at(handle, inode, ppath, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; @@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle, * Update path is required because previous ext4_split_extent_at() may * result in split of original leaf or extent zeroout. */ - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); + path = ext4_find_extent(inode, map->m_lblk, ppath, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = ext_depth(inode); @@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle, split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | EXT4_EXT_MARK_UNWRIT2); } - err = ext4_split_extent_at(handle, inode, path, + err = ext4_split_extent_at(handle, inode, ppath, map->m_lblk, split_flag1, flags); if (err) goto out; @@ -3364,9 +3376,10 @@ out: static int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, int flags) { + struct ext4_ext_path *path = *ppath; struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; @@ -3590,7 +3603,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } } - allocated = ext4_split_extent(handle, inode, path, + allocated = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, flags); if (allocated < 0) err = allocated; @@ -3629,9 +3642,10 @@ out: static int ext4_split_convert_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path *path, + struct ext4_ext_path **ppath, int flags) { + struct ext4_ext_path *path = *ppath; ext4_lblk_t eof_block; ext4_lblk_t ee_block; struct ext4_extent *ex; @@ -3665,74 +3679,15 @@ static int ext4_split_convert_extents(handle_t *handle, split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); } flags |= EXT4_GET_BLOCKS_PRE_IO; - return ext4_split_extent(handle, inode, path, map, split_flag, flags); -} - -static int ext4_convert_initialized_extents(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path *path) -{ - struct ext4_extent *ex; - ext4_lblk_t ee_block; - unsigned int ee_len; - int depth; - int err = 0; - - depth = ext_depth(inode); - ex = path[depth].p_ext; - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); - - ext_debug("%s: inode %lu, logical" - "block %llu, max_blocks %u\n", __func__, inode->i_ino, - (unsigned long long)ee_block, ee_len); - - if (ee_block != map->m_lblk || ee_len > map->m_len) { - err = ext4_split_convert_extents(handle, inode, map, path, - EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); - if (err < 0) - goto out; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - depth = ext_depth(inode); - ex = path[depth].p_ext; - if (!ex) { - EXT4_ERROR_INODE(inode, "unexpected hole at %lu", - (unsigned long) map->m_lblk); - err = -EIO; - goto out; - } - } - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; - /* first mark the extent as unwritten */ - ext4_ext_mark_unwritten(ex); - - /* note: ext4_ext_correct_indexes() isn't needed here because - * borders are not changed - */ - ext4_ext_try_to_merge(handle, inode, path, ex); - - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + path->p_depth); -out: - ext4_ext_show_leaf(inode, path); - return err; + return ext4_split_extent(handle, inode, ppath, map, split_flag, flags); } - static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path *path) + struct ext4_ext_path **ppath) { + struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; ext4_lblk_t ee_block; unsigned int ee_len; @@ -3761,16 +3716,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); #endif - err = ext4_split_convert_extents(handle, inode, map, path, + err = ext4_split_convert_extents(handle, inode, map, ppath, EXT4_GET_BLOCKS_CONVERT); if (err < 0) - goto out; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } + return err; + path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + if (IS_ERR(path)) + return PTR_ERR(path); depth = ext_depth(inode); ex = path[depth].p_ext; } @@ -3963,12 +3915,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, } static int -ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path *path, int flags, - unsigned int allocated, ext4_fsblk_t newblock) +convert_initialized_extent(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path **ppath, int flags, + unsigned int allocated, ext4_fsblk_t newblock) { - int ret = 0; + struct ext4_ext_path *path = *ppath; + struct ext4_extent *ex; + ext4_lblk_t ee_block; + unsigned int ee_len; + int depth; int err = 0; /* @@ -3978,28 +3934,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, if (map->m_len > EXT_UNWRITTEN_MAX_LEN) map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; - ret = ext4_convert_initialized_extents(handle, inode, map, - path); - if (ret >= 0) { - ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map->m_lblk, - path, map->m_len); - } else - err = ret; + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + + ext_debug("%s: inode %lu, logical" + "block %llu, max_blocks %u\n", __func__, inode->i_ino, + (unsigned long long)ee_block, ee_len); + + if (ee_block != map->m_lblk || ee_len > map->m_len) { + err = ext4_split_convert_extents(handle, inode, map, ppath, + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); + if (err < 0) + return err; + path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + if (IS_ERR(path)) + return PTR_ERR(path); + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) map->m_lblk); + return -EIO; + } + } + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + return err; + /* first mark the extent as unwritten */ + ext4_ext_mark_unwritten(ex); + + /* note: ext4_ext_correct_indexes() isn't needed here because + * borders are not changed + */ + ext4_ext_try_to_merge(handle, inode, path, ex); + + /* Mark modified extent as dirty */ + err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (err) + return err; + ext4_ext_show_leaf(inode, path); + + ext4_update_inode_fsync_trans(handle, inode, 1); + err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); + if (err) + return err; map->m_flags |= EXT4_MAP_UNWRITTEN; if (allocated > map->m_len) allocated = map->m_len; map->m_len = allocated; - - return err ? err : allocated; + return allocated; } static int ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path *path, int flags, + struct ext4_ext_path **ppath, int flags, unsigned int allocated, ext4_fsblk_t newblock) { + struct ext4_ext_path *path = *ppath; int ret = 0; int err = 0; ext4_io_end_t *io = ext4_inode_aio(inode); @@ -4021,8 +4016,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, /* get_block() before submit the IO, split the extent */ if (flags & EXT4_GET_BLOCKS_PRE_IO) { - ret = ext4_split_convert_extents(handle, inode, map, - path, flags | EXT4_GET_BLOCKS_CONVERT); + ret = ext4_split_convert_extents(handle, inode, map, ppath, + flags | EXT4_GET_BLOCKS_CONVERT); if (ret <= 0) goto out; /* @@ -4040,7 +4035,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, /* IO end_io complete, convert the filled extent to written */ if (flags & EXT4_GET_BLOCKS_CONVERT) { ret = ext4_convert_unwritten_extents_endio(handle, inode, map, - path); + ppath); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); err = check_eofblocks_fl(handle, inode, map->m_lblk, @@ -4078,7 +4073,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, } /* buffered write, writepage time, convert*/ - ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); + ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); out: @@ -4279,7 +4274,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* find extent for this block */ - path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); + path = ext4_find_extent(inode, map->m_lblk, NULL, 0); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -4291,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* * consistent leaf must not be empty; * this situation is possible, though, _during_ tree modification; - * this is why assert can't be put in ext4_ext_find_extent() + * this is why assert can't be put in ext4_find_extent() */ if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " @@ -4331,15 +4326,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { - allocated = ext4_ext_convert_initialized_extent( - handle, inode, map, path, flags, - allocated, newblock); + allocated = convert_initialized_extent( + handle, inode, map, &path, + flags, allocated, newblock); goto out2; } else if (!ext4_ext_is_unwritten(ex)) goto out; ret = ext4_ext_handle_unwritten_extents( - handle, inode, map, path, flags, + handle, inode, map, &path, flags, allocated, newblock); if (ret < 0) err = ret; @@ -4376,7 +4371,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* * If we are doing bigalloc, check to see if the extent returned - * by ext4_ext_find_extent() implies a cluster we can use. + * by ext4_find_extent() implies a cluster we can use. */ if (cluster_offset && ex && get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { @@ -4451,6 +4446,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ar.flags = 0; if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) ar.flags |= EXT4_MB_HINT_NOPREALLOC; + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ar.flags |= EXT4_MB_DELALLOC_RESERVED; newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; @@ -4486,7 +4483,7 @@ got_allocated_blocks: err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); if (!err) - err = ext4_ext_insert_extent(handle, inode, path, + err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags); if (!err && set_unwritten) { @@ -4619,10 +4616,8 @@ out: map->m_pblk = newblock; map->m_len = allocated; out2: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } + ext4_ext_drop_refs(path); + kfree(path); trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); @@ -4799,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, max_blocks -= lblk; flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | - EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | + EXT4_EX_NOCACHE; if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -4837,15 +4833,21 @@ static long ext4_zero_range(struct file *file, loff_t offset, ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, + flags, mode); + if (ret) + goto out_dio; /* * Remove entire range from the extent status tree. + * + * ext4_es_remove_extent(inode, lblk, max_blocks) is + * NOT sufficient. I'm not sure why this is the case, + * but let's be conservative and remove the extent + * status tree for the entire inode. There should be + * no outstanding delalloc extents thanks to the + * filemap_write_and_wait_range() call above. */ - ret = ext4_es_remove_extent(inode, lblk, max_blocks); - if (ret) - goto out_dio; - - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags, mode); + ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); if (ret) goto out_dio; } @@ -5304,36 +5306,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, struct ext4_ext_path *path; int ret = 0, depth; struct ext4_extent *extent; - ext4_lblk_t stop_block, current_block; + ext4_lblk_t stop_block; ext4_lblk_t ex_start, ex_end; /* Let path point to the last extent */ - path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); + path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; extent = path[depth].p_ext; - if (!extent) { - ext4_ext_drop_refs(path); - kfree(path); - return ret; - } + if (!extent) + goto out; stop_block = le32_to_cpu(extent->ee_block) + ext4_ext_get_actual_len(extent); - ext4_ext_drop_refs(path); - kfree(path); /* Nothing to shift, if hole is at the end of file */ if (start >= stop_block) - return ret; + goto out; /* * Don't start shifting extents until we make sure the hole is big * enough to accomodate the shift. */ - path = ext4_ext_find_extent(inode, start - 1, NULL, 0); + path = ext4_find_extent(inode, start - 1, &path, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5346,8 +5343,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ex_start = 0; ex_end = 0; } - ext4_ext_drop_refs(path); - kfree(path); if ((start == ex_start && shift > ex_start) || (shift > start - ex_end)) @@ -5355,7 +5350,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, /* Its safe to start updating extents */ while (start < stop_block) { - path = ext4_ext_find_extent(inode, start, NULL, 0); + path = ext4_find_extent(inode, start, &path, 0); if (IS_ERR(path)) return PTR_ERR(path); depth = path->p_depth; @@ -5365,27 +5360,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, (unsigned long) start); return -EIO; } - - current_block = le32_to_cpu(extent->ee_block); - if (start > current_block) { + if (start > le32_to_cpu(extent->ee_block)) { /* Hole, move to the next extent */ - ret = mext_next_extent(inode, path, &extent); - if (ret != 0) { - ext4_ext_drop_refs(path); - kfree(path); - if (ret == 1) - ret = 0; - break; + if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { + path[depth].p_ext++; + } else { + start = ext4_ext_next_allocated_block(path); + continue; } } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, &start); - ext4_ext_drop_refs(path); - kfree(path); if (ret) break; } - +out: + ext4_ext_drop_refs(path); + kfree(path); return ret; } @@ -5508,3 +5499,199 @@ out_mutex: mutex_unlock(&inode->i_mutex); return ret; } + +/** + * ext4_swap_extents - Swap extents between two inodes + * + * @inode1: First inode + * @inode2: Second inode + * @lblk1: Start block for first inode + * @lblk2: Start block for second inode + * @count: Number of blocks to swap + * @mark_unwritten: Mark second inode's extents as unwritten after swap + * @erp: Pointer to save error value + * + * This helper routine does exactly what is promise "swap extents". All other + * stuff such as page-cache locking consistency, bh mapping consistency or + * extent's data copying must be performed by caller. + * Locking: + * i_mutex is held for both inodes + * i_data_sem is locked for write for both inodes + * Assumptions: + * All pages from requested range are locked for both inodes + */ +int +ext4_swap_extents(handle_t *handle, struct inode *inode1, + struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, + ext4_lblk_t count, int unwritten, int *erp) +{ + struct ext4_ext_path *path1 = NULL; + struct ext4_ext_path *path2 = NULL; + int replaced_count = 0; + + BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); + BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); + BUG_ON(!mutex_is_locked(&inode1->i_mutex)); + BUG_ON(!mutex_is_locked(&inode1->i_mutex)); + + *erp = ext4_es_remove_extent(inode1, lblk1, count); + if (unlikely(*erp)) + return 0; + *erp = ext4_es_remove_extent(inode2, lblk2, count); + if (unlikely(*erp)) + return 0; + + while (count) { + struct ext4_extent *ex1, *ex2, tmp_ex; + ext4_lblk_t e1_blk, e2_blk; + int e1_len, e2_len, len; + int split = 0; + + path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); + if (unlikely(IS_ERR(path1))) { + *erp = PTR_ERR(path1); + path1 = NULL; + finish: + count = 0; + goto repeat; + } + path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); + if (unlikely(IS_ERR(path2))) { + *erp = PTR_ERR(path2); + path2 = NULL; + goto finish; + } + ex1 = path1[path1->p_depth].p_ext; + ex2 = path2[path2->p_depth].p_ext; + /* Do we have somthing to swap ? */ + if (unlikely(!ex2 || !ex1)) + goto finish; + + e1_blk = le32_to_cpu(ex1->ee_block); + e2_blk = le32_to_cpu(ex2->ee_block); + e1_len = ext4_ext_get_actual_len(ex1); + e2_len = ext4_ext_get_actual_len(ex2); + + /* Hole handling */ + if (!in_range(lblk1, e1_blk, e1_len) || + !in_range(lblk2, e2_blk, e2_len)) { + ext4_lblk_t next1, next2; + + /* if hole after extent, then go to next extent */ + next1 = ext4_ext_next_allocated_block(path1); + next2 = ext4_ext_next_allocated_block(path2); + /* If hole before extent, then shift to that extent */ + if (e1_blk > lblk1) + next1 = e1_blk; + if (e2_blk > lblk2) + next2 = e1_blk; + /* Do we have something to swap */ + if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) + goto finish; + /* Move to the rightest boundary */ + len = next1 - lblk1; + if (len < next2 - lblk2) + len = next2 - lblk2; + if (len > count) + len = count; + lblk1 += len; + lblk2 += len; + count -= len; + goto repeat; + } + + /* Prepare left boundary */ + if (e1_blk < lblk1) { + split = 1; + *erp = ext4_force_split_extent_at(handle, inode1, + &path1, lblk1, 0); + if (unlikely(*erp)) + goto finish; + } + if (e2_blk < lblk2) { + split = 1; + *erp = ext4_force_split_extent_at(handle, inode2, + &path2, lblk2, 0); + if (unlikely(*erp)) + goto finish; + } + /* ext4_split_extent_at() may result in leaf extent split, + * path must to be revalidated. */ + if (split) + goto repeat; + + /* Prepare right boundary */ + len = count; + if (len > e1_blk + e1_len - lblk1) + len = e1_blk + e1_len - lblk1; + if (len > e2_blk + e2_len - lblk2) + len = e2_blk + e2_len - lblk2; + + if (len != e1_len) { + split = 1; + *erp = ext4_force_split_extent_at(handle, inode1, + &path1, lblk1 + len, 0); + if (unlikely(*erp)) + goto finish; + } + if (len != e2_len) { + split = 1; + *erp = ext4_force_split_extent_at(handle, inode2, + &path2, lblk2 + len, 0); + if (*erp) + goto finish; + } + /* ext4_split_extent_at() may result in leaf extent split, + * path must to be revalidated. */ + if (split) + goto repeat; + + BUG_ON(e2_len != e1_len); + *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); + if (unlikely(*erp)) + goto finish; + *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); + if (unlikely(*erp)) + goto finish; + + /* Both extents are fully inside boundaries. Swap it now */ + tmp_ex = *ex1; + ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2)); + ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex)); + ex1->ee_len = cpu_to_le16(e2_len); + ex2->ee_len = cpu_to_le16(e1_len); + if (unwritten) + ext4_ext_mark_unwritten(ex2); + if (ext4_ext_is_unwritten(&tmp_ex)) + ext4_ext_mark_unwritten(ex1); + + ext4_ext_try_to_merge(handle, inode2, path2, ex2); + ext4_ext_try_to_merge(handle, inode1, path1, ex1); + *erp = ext4_ext_dirty(handle, inode2, path2 + + path2->p_depth); + if (unlikely(*erp)) + goto finish; + *erp = ext4_ext_dirty(handle, inode1, path1 + + path1->p_depth); + /* + * Looks scarry ah..? second inode already points to new blocks, + * and it was successfully dirtied. But luckily error may happen + * only due to journal error, so full transaction will be + * aborted anyway. + */ + if (unlikely(*erp)) + goto finish; + lblk1 += len; + lblk2 += len; + replaced_count += len; + count -= len; + + repeat: + ext4_ext_drop_refs(path1); + kfree(path1); + ext4_ext_drop_refs(path2); + kfree(path2); + path1 = path2 = NULL; + } + return replaced_count; +} diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 0b7e28e7eaa4..94e7855ae71b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -11,6 +11,8 @@ */ #include <linux/rbtree.h> #include <linux/list_sort.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> #include "ext4.h" #include "extents_status.h" @@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, */ if (!ext4_es_is_delayed(es)) { EXT4_I(inode)->i_es_lru_nr++; - percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); + percpu_counter_inc(&EXT4_SB(inode->i_sb)-> + s_es_stats.es_stats_lru_cnt); } + EXT4_I(inode)->i_es_all_nr++; + percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); + return es; } static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) { + EXT4_I(inode)->i_es_all_nr--; + percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); + /* Decrease the lru counter when this es is not delayed */ if (!ext4_es_is_delayed(es)) { BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); EXT4_I(inode)->i_es_lru_nr--; - percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); + percpu_counter_dec(&EXT4_SB(inode->i_sb)-> + s_es_stats.es_stats_lru_cnt); } kmem_cache_free(ext4_es_cachep, es); @@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, unsigned short ee_len; int depth, ee_status, es_status; - path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); + path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); if (IS_ERR(path)) return; @@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, } } out: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } + ext4_ext_drop_refs(path); + kfree(path); } static void ext4_es_insert_extent_ind_check(struct inode *inode, @@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status *es) { struct ext4_es_tree *tree; + struct ext4_es_stats *stats; struct extent_status *es1 = NULL; struct rb_node *node; int found = 0; @@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, } out: + stats = &EXT4_SB(inode->i_sb)->s_es_stats; if (found) { BUG_ON(!es1); es->es_lblk = es1->es_lblk; es->es_len = es1->es_len; es->es_pblk = es1->es_pblk; + stats->es_stats_cache_hits++; + } else { + stats->es_stats_cache_misses++; } read_unlock(&EXT4_I(inode)->i_es_lock); @@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *locked_ei) { struct ext4_inode_info *ei; + struct ext4_es_stats *es_stats; struct list_head *cur, *tmp; LIST_HEAD(skipped); + ktime_t start_time; + u64 scan_time; int nr_shrunk = 0; int retried = 0, skip_precached = 1, nr_skipped = 0; + es_stats = &sbi->s_es_stats; + start_time = ktime_get(); spin_lock(&sbi->s_es_lru_lock); retry: @@ -948,7 +966,8 @@ retry: * If we have already reclaimed all extents from extent * status tree, just stop the loop immediately. */ - if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) + if (percpu_counter_read_positive( + &es_stats->es_stats_lru_cnt) == 0) break; ei = list_entry(cur, struct ext4_inode_info, i_es_lru); @@ -958,7 +977,7 @@ retry: * time. Normally we try hard to avoid shrinking * precached inodes, but we will as a last resort. */ - if ((sbi->s_es_last_sorted < ei->i_touch_when) || + if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || (skip_precached && ext4_test_inode_state(&ei->vfs_inode, EXT4_STATE_EXT_PRECACHED))) { nr_skipped++; @@ -992,7 +1011,7 @@ retry: if ((nr_shrunk == 0) && nr_skipped && !retried) { retried++; list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); - sbi->s_es_last_sorted = jiffies; + es_stats->es_stats_last_sorted = jiffies; ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); /* @@ -1010,6 +1029,22 @@ retry: if (locked_ei && nr_shrunk == 0) nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); + scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + if (likely(es_stats->es_stats_scan_time)) + es_stats->es_stats_scan_time = (scan_time + + es_stats->es_stats_scan_time*3) / 4; + else + es_stats->es_stats_scan_time = scan_time; + if (scan_time > es_stats->es_stats_max_scan_time) + es_stats->es_stats_max_scan_time = scan_time; + if (likely(es_stats->es_stats_shrunk)) + es_stats->es_stats_shrunk = (nr_shrunk + + es_stats->es_stats_shrunk*3) / 4; + else + es_stats->es_stats_shrunk = nr_shrunk; + + trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached, + nr_skipped, retried); return nr_shrunk; } @@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink, struct ext4_sb_info *sbi; sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); - nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); + nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); + trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr); return nr; } @@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk; - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); + ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); + trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); if (!nr_to_scan) return ret; nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); - trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); + trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); return nr_shrunk; } -void ext4_es_register_shrinker(struct ext4_sb_info *sbi) +static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) { + return *pos ? NULL : SEQ_START_TOKEN; +} + +static void * +ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) +{ + return NULL; +} + +static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) +{ + struct ext4_sb_info *sbi = seq->private; + struct ext4_es_stats *es_stats = &sbi->s_es_stats; + struct ext4_inode_info *ei, *max = NULL; + unsigned int inode_cnt = 0; + + if (v != SEQ_START_TOKEN) + return 0; + + /* here we just find an inode that has the max nr. of objects */ + spin_lock(&sbi->s_es_lru_lock); + list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { + inode_cnt++; + if (max && max->i_es_all_nr < ei->i_es_all_nr) + max = ei; + else if (!max) + max = ei; + } + spin_unlock(&sbi->s_es_lru_lock); + + seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", + percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), + percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); + seq_printf(seq, " %lu/%lu cache hits/misses\n", + es_stats->es_stats_cache_hits, + es_stats->es_stats_cache_misses); + if (es_stats->es_stats_last_sorted != 0) + seq_printf(seq, " %u ms last sorted interval\n", + jiffies_to_msecs(jiffies - + es_stats->es_stats_last_sorted)); + if (inode_cnt) + seq_printf(seq, " %d inodes on lru list\n", inode_cnt); + + seq_printf(seq, "average:\n %llu us scan time\n", + div_u64(es_stats->es_stats_scan_time, 1000)); + seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); + if (inode_cnt) + seq_printf(seq, + "maximum:\n %lu inode (%u objects, %u reclaimable)\n" + " %llu us max scan time\n", + max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, + div_u64(es_stats->es_stats_max_scan_time, 1000)); + + return 0; +} + +static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations ext4_es_seq_shrinker_info_ops = { + .start = ext4_es_seq_shrinker_info_start, + .next = ext4_es_seq_shrinker_info_next, + .stop = ext4_es_seq_shrinker_info_stop, + .show = ext4_es_seq_shrinker_info_show, +}; + +static int +ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = PDE_DATA(inode); + } + + return ret; +} + +static int +ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) +{ + return seq_release(inode, file); +} + +static const struct file_operations ext4_es_seq_shrinker_info_fops = { + .owner = THIS_MODULE, + .open = ext4_es_seq_shrinker_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = ext4_es_seq_shrinker_info_release, +}; + +int ext4_es_register_shrinker(struct ext4_sb_info *sbi) +{ + int err; + INIT_LIST_HEAD(&sbi->s_es_lru); spin_lock_init(&sbi->s_es_lru_lock); - sbi->s_es_last_sorted = 0; + sbi->s_es_stats.es_stats_last_sorted = 0; + sbi->s_es_stats.es_stats_shrunk = 0; + sbi->s_es_stats.es_stats_cache_hits = 0; + sbi->s_es_stats.es_stats_cache_misses = 0; + sbi->s_es_stats.es_stats_scan_time = 0; + sbi->s_es_stats.es_stats_max_scan_time = 0; + err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL); + if (err) + return err; + err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL); + if (err) + goto err1; + sbi->s_es_shrinker.scan_objects = ext4_es_scan; sbi->s_es_shrinker.count_objects = ext4_es_count; sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&sbi->s_es_shrinker); + err = register_shrinker(&sbi->s_es_shrinker); + if (err) + goto err2; + + if (sbi->s_proc) + proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, + &ext4_es_seq_shrinker_info_fops, sbi); + + return 0; + +err2: + percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); +err1: + percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); + return err; } void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) { + if (sbi->s_proc) + remove_proc_entry("es_shrinker_info", sbi->s_proc); + percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); + percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); unregister_shrinker(&sbi->s_es_shrinker); } diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f1b62a419920..efd5f970b501 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -64,6 +64,17 @@ struct ext4_es_tree { struct extent_status *cache_es; /* recently accessed extent */ }; +struct ext4_es_stats { + unsigned long es_stats_last_sorted; + unsigned long es_stats_shrunk; + unsigned long es_stats_cache_hits; + unsigned long es_stats_cache_misses; + u64 es_stats_scan_time; + u64 es_stats_max_scan_time; + struct percpu_counter es_stats_all_cnt; + struct percpu_counter es_stats_lru_cnt; +}; + extern int __init ext4_init_es(void); extern void ext4_exit_es(void); extern void ext4_es_init_tree(struct ext4_es_tree *tree); @@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, (pb & ~ES_MASK)); } -extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); +extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); extern void ext4_es_lru_add(struct inode *inode); extern void ext4_es_lru_del(struct inode *inode); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 5b87fc36aab8..8012a5daf401 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1011,8 +1011,7 @@ got: spin_unlock(&sbi->s_next_gen_lock); /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { __u32 csum; __le32 inum = cpu_to_le32(inode->i_ino); __le32 gen = cpu_to_le32(inode->i_generation); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index e75f840000a0..36b369697a13 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain * as described above and return 0. */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) +static int ext4_alloc_branch(handle_t *handle, + struct ext4_allocation_request *ar, + int indirect_blks, ext4_lblk_t *offsets, + Indirect *branch) { - struct ext4_allocation_request ar; struct buffer_head * bh; ext4_fsblk_t b, new_blocks[4]; __le32 *p; int i, j, err, len = 1; - /* - * Set up for the direct block allocation - */ - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.len = *blks; - ar.logical = iblock; - if (S_ISREG(inode->i_mode)) - ar.flags = EXT4_MB_HINT_DATA; - for (i = 0; i <= indirect_blks; i++) { if (i == indirect_blks) { - ar.goal = goal; - new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); + new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); } else - goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, - goal, 0, NULL, &err); + ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, + ar->inode, ar->goal, + ar->flags & EXT4_MB_DELALLOC_RESERVED, + NULL, &err); if (err) { i--; goto failed; @@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, if (i == 0) continue; - bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); + bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); if (unlikely(!bh)) { err = -ENOMEM; goto failed; @@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, b = new_blocks[i]; if (i == indirect_blks) - len = ar.len; + len = ar->len; for (j = 0; j < len; j++) *p++ = cpu_to_le32(b++); @@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, unlock_buffer(bh); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); + err = ext4_handle_dirty_metadata(handle, ar->inode, bh); if (err) goto failed; } - *blks = ar.len; return 0; failed: for (; i >= 0; i--) { @@ -396,10 +385,10 @@ failed: * existing before ext4_alloc_branch() was called. */ if (i > 0 && i != indirect_blks && branch[i].bh) - ext4_forget(handle, 1, inode, branch[i].bh, + ext4_forget(handle, 1, ar->inode, branch[i].bh, branch[i].bh->b_blocknr); - ext4_free_blocks(handle, inode, NULL, new_blocks[i], - (i == indirect_blks) ? ar.len : 1, 0); + ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], + (i == indirect_blks) ? ar->len : 1, 0); } return err; } @@ -419,9 +408,9 @@ failed: * inode (->i_blocks, etc.). In case of success we end up with the full * chain to new block and return 0. */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, - int blks) +static int ext4_splice_branch(handle_t *handle, + struct ext4_allocation_request *ar, + Indirect *where, int num) { int i; int err = 0; @@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, * Update the host buffer_head or inode to point to more just allocated * direct blocks blocks */ - if (num == 0 && blks > 1) { + if (num == 0 && ar->len > 1) { current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) + for (i = 1; i < ar->len; i++) *(where->p + i) = cpu_to_le32(current_block++); } @@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, */ jbd_debug(5, "splicing indirect only\n"); BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, where->bh); + err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); if (err) goto err_out; } else { /* * OK, we spliced it into the inode itself on a direct block. */ - ext4_mark_inode_dirty(handle, inode); + ext4_mark_inode_dirty(handle, ar->inode); jbd_debug(5, "splicing direct\n"); } return err; @@ -484,11 +473,11 @@ err_out: * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, EXT4_FREE_BLOCKS_FORGET); } - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), - blks, 0); + ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), + ar->len, 0); return err; } @@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { + struct ext4_allocation_request ar; int err = -EIO; ext4_lblk_t offsets[4]; Indirect chain[4]; Indirect *partial; - ext4_fsblk_t goal; int indirect_blks; int blocks_to_boundary = 0; int depth; @@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, return -ENOSPC; } - goal = ext4_find_goal(inode, map->m_lblk, partial); + /* Set up for the direct block allocation */ + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.logical = map->m_lblk; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT4_MB_HINT_DATA; + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ar.flags |= EXT4_MB_DELALLOC_RESERVED; + + ar.goal = ext4_find_goal(inode, map->m_lblk, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, * Next look up the indirect map to count the totoal number of * direct blocks to allocate for this branch. */ - count = ext4_blks_to_allocate(partial, indirect_blks, - map->m_len, blocks_to_boundary); + ar.len = ext4_blks_to_allocate(partial, indirect_blks, + map->m_len, blocks_to_boundary); + /* * Block out ext4_truncate while we alter the tree */ - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, - &count, goal, + err = ext4_alloc_branch(handle, &ar, indirect_blks, offsets + (partial - chain), partial); /* @@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, * may need to return -EAGAIN upwards in the worst case. --sct */ if (!err) - err = ext4_splice_branch(handle, inode, map->m_lblk, - partial, indirect_blks, count); + err = ext4_splice_branch(handle, &ar, partial, indirect_blks); if (err) goto cleanup; map->m_flags |= EXT4_MAP_NEW; ext4_update_inode_fsync_trans(handle, inode, 1); + count = ar.len; got_it: map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = le32_to_cpu(chain[depth-1].key); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bea662bd0ca6..3ea62695abce 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -594,6 +594,7 @@ retry: if (ret) { unlock_page(page); page_cache_release(page); + page = NULL; ext4_orphan_add(handle, inode); up_write(&EXT4_I(inode)->xattr_sem); sem_held = 0; @@ -613,7 +614,8 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; - block_commit_write(page, from, to); + if (page) + block_commit_write(page, from, to); out: if (page) { unlock_page(page); @@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); inode->i_size = inode->i_sb->s_blocksize; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3aa26e9117c4..e9777f93cf05 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + !ext4_has_metadata_csum(inode->i_sb)) return 1; provided = le16_to_cpu(raw->i_checksum_lo); @@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + !ext4_has_metadata_csum(inode->i_sb)) return; csum = ext4_inode_csum(inode, raw, ei); @@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode) goto no_delete; } - if (!is_bad_inode(inode)) - dquot_initialize(inode); + if (is_bad_inode(inode)) + goto no_delete; + dquot_initialize(inode); if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages_final(&inode->i_data); WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); - if (is_bad_inode(inode)) - goto no_delete; /* * Protect us against freezing - iput() caller didn't have to have any @@ -590,20 +587,12 @@ found: /* * New blocks allocate and/or writing to unwritten extent * will possibly result in updating i_data, so we take - * the write lock of i_data_sem, and call get_blocks() + * the write lock of i_data_sem, and call get_block() * with create == 1 flag. */ down_write(&EXT4_I(inode)->i_data_sem); /* - * if the caller is from delayed allocation writeout path - * we have already reserved fs blocks for allocation - * let the underlying get_block() function know to - * avoid double accounting - */ - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) - ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); - /* * We need to check for EXT4 here because migrate * could have changed the inode type in between */ @@ -631,8 +620,6 @@ found: (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) ext4_da_update_reserve_space(inode, retval, 1); } - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) - ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); if (retval > 0) { unsigned int status; @@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock, * `handle' can be NULL if create is zero */ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create, int *errp) + ext4_lblk_t block, int create) { struct ext4_map_blocks map; struct buffer_head *bh; - int fatal = 0, err; + int err; J_ASSERT(handle != NULL || create == 0); @@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, err = ext4_map_blocks(handle, inode, &map, create ? EXT4_GET_BLOCKS_CREATE : 0); - /* ensure we send some value back into *errp */ - *errp = 0; - - if (create && err == 0) - err = -ENOSPC; /* should never happen */ + if (err == 0) + return create ? ERR_PTR(-ENOSPC) : NULL; if (err < 0) - *errp = err; - if (err <= 0) - return NULL; + return ERR_PTR(err); bh = sb_getblk(inode->i_sb, map.m_pblk); - if (unlikely(!bh)) { - *errp = -ENOMEM; - return NULL; - } + if (unlikely(!bh)) + return ERR_PTR(-ENOMEM); if (map.m_flags & EXT4_MAP_NEW) { J_ASSERT(create != 0); J_ASSERT(handle != NULL); @@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, */ lock_buffer(bh); BUFFER_TRACE(bh, "call get_create_access"); - fatal = ext4_journal_get_create_access(handle, bh); - if (!fatal && !buffer_uptodate(bh)) { + err = ext4_journal_get_create_access(handle, bh); + if (unlikely(err)) { + unlock_buffer(bh); + goto errout; + } + if (!buffer_uptodate(bh)) { memset(bh->b_data, 0, inode->i_sb->s_blocksize); set_buffer_uptodate(bh); } unlock_buffer(bh); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, inode, bh); - if (!fatal) - fatal = err; - } else { + if (unlikely(err)) + goto errout; + } else BUFFER_TRACE(bh, "not a new buffer"); - } - if (fatal) { - *errp = fatal; - brelse(bh); - bh = NULL; - } return bh; +errout: + brelse(bh); + return ERR_PTR(err); } struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, - ext4_lblk_t block, int create, int *err) + ext4_lblk_t block, int create) { struct buffer_head *bh; - bh = ext4_getblk(handle, inode, block, create, err); - if (!bh) + bh = ext4_getblk(handle, inode, block, create); + if (IS_ERR(bh)) return bh; - if (buffer_uptodate(bh)) + if (!bh || buffer_uptodate(bh)) return bh; ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; put_bh(bh); - *err = -EIO; - return NULL; + return ERR_PTR(-EIO); } int ext4_walk_page_buffers(handle_t *handle, @@ -1536,7 +1516,7 @@ out_unlock: } /* - * This is a special get_blocks_t callback which is used by + * This is a special get_block_t callback which is used by * ext4_da_write_begin(). It will either return mapped block or * reserve space for a single block. * @@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * in data loss. So use reserved blocks to allocate metadata if * possible. * - * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks - * in question are delalloc blocks. This affects functions in many - * different parts of the allocation call path. This flag exists - * primarily because we don't want to change *many* call functions, so - * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag - * once the inode's allocation semaphore is taken. + * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if + * the blocks in question are delalloc blocks. This indicates + * that the blocks and quotas has already been checked when + * the data was copied into the page cache. */ get_blocks_flags = EXT4_GET_BLOCKS_CREATE | EXT4_GET_BLOCKS_METADATA_NOFAIL; @@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb) return 0; } +/* We always reserve for an inode update; the superblock could be there too */ +static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) +{ + if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) + return 1; + + if (pos + len <= 0x7fffffffULL) + return 1; + + /* We might need to update the superblock to set LARGE_FILE */ + return 2; +} + static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -2565,7 +2557,8 @@ retry_grab: * of file which has an already mapped buffer. */ retry_journal: - handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); + handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, + ext4_da_write_credits(inode, pos, len)); if (IS_ERR(handle)) { page_cache_release(page); return PTR_ERR(handle); @@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file, if (copied && new_i_size > EXT4_I(inode)->i_disksize) { if (ext4_has_inline_data(inode) || ext4_da_should_update_i_disksize(page, end)) { - down_write(&EXT4_I(inode)->i_data_sem); - if (new_i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = new_i_size; - up_write(&EXT4_I(inode)->i_data_sem); + ext4_update_i_disksize(inode, new_i_size); /* We need to mark inode dirty even if * new_i_size is less that inode->i_size * bu greater than i_disksize.(hint delalloc) @@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_extra_isize = 0; /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 csum; __le32 inum = cpu_to_le32(inode->i_ino); @@ -4127,6 +4116,13 @@ bad_inode: return ERR_PTR(ret); } +struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) +{ + if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) + return ERR_PTR(-EIO); + return ext4_iget(sb, ino); +} + static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode *raw_inode, struct ext4_inode_info *ei) @@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - if (ext4_inode_blocks_set(handle, raw_inode, ei)) { + err = ext4_inode_blocks_set(handle, raw_inode, ei); + if (err) { spin_unlock(&ei->i_raw_lock); goto out_brelse; } @@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_orphan_del(NULL, inode); goto err_out; } - } else + } else { + loff_t oldsize = inode->i_size; + i_size_write(inode, attr->ia_size); + pagecache_isize_extended(inode, oldsize, inode->i_size); + } /* * Blocks are going to be removed from the inode. Wait diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0f2252ec274d..bfda18a15592 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -331,8 +331,7 @@ flags_out: if (!inode_owner_or_capable(inode)) return -EPERM; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(inode->i_sb)) { ext4_warning(sb, "Setting inode version is not " "supported with metadata_csum enabled."); return -ENOTTY; @@ -532,9 +531,17 @@ group_add_out: } case EXT4_IOC_SWAP_BOOT: + { + int err; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - return swap_inode_boot_loader(sb, inode); + err = mnt_want_write_file(filp); + if (err) + return err; + err = swap_inode_boot_loader(sb, inode); + mnt_drop_write_file(filp); + return err; + } case EXT4_IOC_RESIZE_FS: { ext4_fsblk_t n_blocks_count; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 748c9136a60a..dbfe15c2533c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, "start %lu, size %lu, fe_logical %lu", (unsigned long) start, (unsigned long) size, (unsigned long) ac->ac_o_ex.fe_logical); + BUG(); } - BUG_ON(start + size <= ac->ac_o_ex.fe_logical && - start > ac->ac_o_ex.fe_logical); BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ @@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, if (IS_NOQUOTA(ar->inode)) ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; - /* - * For delayed allocation, we could skip the ENOSPC and - * EDQUOT check, as blocks and quotas have been already - * reserved when data being copied into pagecache. - */ - if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) - ar->flags |= EXT4_MB_DELALLOC_RESERVED; - else { + if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { /* Without delayed allocation we need to verify * there is enough free blocks to do block allocation * and verify allocation doesn't exceed the quota limits. @@ -4528,8 +4520,7 @@ out: if (inquota && ar->len < inquota) dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); if (!ar->len) { - if (!ext4_test_inode_state(ar->inode, - EXT4_STATE_DELALLOC_RESERVED)) + if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) /* release all the reserved blocks if non delalloc */ percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d3567f27bae7..a432634f2e6a 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode, ext4_ext_store_pblock(&newext, lb->first_pblock); /* Locking only for convinience since we are operating on temp inode */ down_write(&EXT4_I(inode)->i_data_sem); - path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); - + path = ext4_find_extent(inode, lb->first_block, NULL, 0); if (IS_ERR(path)) { retval = PTR_ERR(path); path = NULL; @@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode, goto err_out; } } - retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); + retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); err_out: up_write((&EXT4_I(inode)->i_data_sem)); - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } + ext4_ext_drop_refs(path); + kfree(path); lb->first_pblock = 0; return retval; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 32bce844c2e1..8313ca3324ec 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); @@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 671a74b14fd7..9f2311bc9c4f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -27,120 +27,26 @@ * @lblock: logical block number to find an extent path * @path: pointer to an extent path pointer (for output) * - * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value + * ext4_find_extent wrapper. Return 0 on success, or a negative error value * on failure. */ static inline int get_ext_path(struct inode *inode, ext4_lblk_t lblock, - struct ext4_ext_path **orig_path) + struct ext4_ext_path **ppath) { - int ret = 0; struct ext4_ext_path *path; - path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); + path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE); if (IS_ERR(path)) - ret = PTR_ERR(path); - else if (path[ext_depth(inode)].p_ext == NULL) - ret = -ENODATA; - else - *orig_path = path; - - return ret; -} - -/** - * copy_extent_status - Copy the extent's initialization status - * - * @src: an extent for getting initialize status - * @dest: an extent to be set the status - */ -static void -copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) -{ - if (ext4_ext_is_unwritten(src)) - ext4_ext_mark_unwritten(dest); - else - dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); -} - -/** - * mext_next_extent - Search for the next extent and set it to "extent" - * - * @inode: inode which is searched - * @path: this will obtain data for the next extent - * @extent: pointer to the next extent we have just gotten - * - * Search the next extent in the array of ext4_ext_path structure (@path) - * and set it to ext4_extent structure (@extent). In addition, the member of - * @path (->p_ext) also points the next extent. Return 0 on success, 1 if - * ext4_ext_path structure refers to the last extent, or a negative error - * value on failure. - */ -int -mext_next_extent(struct inode *inode, struct ext4_ext_path *path, - struct ext4_extent **extent) -{ - struct ext4_extent_header *eh; - int ppos, leaf_ppos = path->p_depth; - - ppos = leaf_ppos; - if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { - /* leaf block */ - *extent = ++path[ppos].p_ext; - path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); - return 0; - } - - while (--ppos >= 0) { - if (EXT_LAST_INDEX(path[ppos].p_hdr) > - path[ppos].p_idx) { - int cur_ppos = ppos; - - /* index block */ - path[ppos].p_idx++; - path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); - if (path[ppos+1].p_bh) - brelse(path[ppos+1].p_bh); - path[ppos+1].p_bh = - sb_bread(inode->i_sb, path[ppos].p_block); - if (!path[ppos+1].p_bh) - return -EIO; - path[ppos+1].p_hdr = - ext_block_hdr(path[ppos+1].p_bh); - - /* Halfway index block */ - while (++cur_ppos < leaf_ppos) { - path[cur_ppos].p_idx = - EXT_FIRST_INDEX(path[cur_ppos].p_hdr); - path[cur_ppos].p_block = - ext4_idx_pblock(path[cur_ppos].p_idx); - if (path[cur_ppos+1].p_bh) - brelse(path[cur_ppos+1].p_bh); - path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, - path[cur_ppos].p_block); - if (!path[cur_ppos+1].p_bh) - return -EIO; - path[cur_ppos+1].p_hdr = - ext_block_hdr(path[cur_ppos+1].p_bh); - } - - path[leaf_ppos].p_ext = *extent = NULL; - - eh = path[leaf_ppos].p_hdr; - if (le16_to_cpu(eh->eh_entries) == 0) - /* empty leaf is found */ - return -ENODATA; - - /* leaf block */ - path[leaf_ppos].p_ext = *extent = - EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); - path[leaf_ppos].p_block = - ext4_ext_pblock(path[leaf_ppos].p_ext); - return 0; - } + return PTR_ERR(path); + if (path[ext_depth(inode)].p_ext == NULL) { + ext4_ext_drop_refs(path); + kfree(path); + *ppath = NULL; + return -ENODATA; } - /* We found the last extent */ - return 1; + *ppath = path; + return 0; } /** @@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode, } /** - * mext_insert_across_blocks - Insert extents across leaf block - * - * @handle: journal handle - * @orig_inode: original inode - * @o_start: first original extent to be changed - * @o_end: last original extent to be changed - * @start_ext: first new extent to be inserted - * @new_ext: middle of new extent to be inserted - * @end_ext: last new extent to be inserted - * - * Allocate a new leaf block and insert extents into it. Return 0 on success, - * or a negative error value on failure. - */ -static int -mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, - struct ext4_extent *o_start, struct ext4_extent *o_end, - struct ext4_extent *start_ext, struct ext4_extent *new_ext, - struct ext4_extent *end_ext) -{ - struct ext4_ext_path *orig_path = NULL; - ext4_lblk_t eblock = 0; - int new_flag = 0; - int end_flag = 0; - int err = 0; - - if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) { - if (o_start == o_end) { - - /* start_ext new_ext end_ext - * donor |---------|-----------|--------| - * orig |------------------------------| - */ - end_flag = 1; - } else { - - /* start_ext new_ext end_ext - * donor |---------|----------|---------| - * orig |---------------|--------------| - */ - o_end->ee_block = end_ext->ee_block; - o_end->ee_len = end_ext->ee_len; - ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); - } - - o_start->ee_len = start_ext->ee_len; - eblock = le32_to_cpu(start_ext->ee_block); - new_flag = 1; - - } else if (start_ext->ee_len && new_ext->ee_len && - !end_ext->ee_len && o_start == o_end) { - - /* start_ext new_ext - * donor |--------------|---------------| - * orig |------------------------------| - */ - o_start->ee_len = start_ext->ee_len; - eblock = le32_to_cpu(start_ext->ee_block); - new_flag = 1; - - } else if (!start_ext->ee_len && new_ext->ee_len && - end_ext->ee_len && o_start == o_end) { - - /* new_ext end_ext - * donor |--------------|---------------| - * orig |------------------------------| - */ - o_end->ee_block = end_ext->ee_block; - o_end->ee_len = end_ext->ee_len; - ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); - - /* - * Set 0 to the extent block if new_ext was - * the first block. - */ - if (new_ext->ee_block) - eblock = le32_to_cpu(new_ext->ee_block); - - new_flag = 1; - } else { - ext4_debug("ext4 move extent: Unexpected insert case\n"); - return -EIO; - } - - if (new_flag) { - err = get_ext_path(orig_inode, eblock, &orig_path); - if (err) - goto out; - - if (ext4_ext_insert_extent(handle, orig_inode, - orig_path, new_ext, 0)) - goto out; - } - - if (end_flag) { - err = get_ext_path(orig_inode, - le32_to_cpu(end_ext->ee_block) - 1, &orig_path); - if (err) - goto out; - - if (ext4_ext_insert_extent(handle, orig_inode, - orig_path, end_ext, 0)) - goto out; - } -out: - if (orig_path) { - ext4_ext_drop_refs(orig_path); - kfree(orig_path); - } - - return err; - -} - -/** - * mext_insert_inside_block - Insert new extent to the extent block - * - * @o_start: first original extent to be moved - * @o_end: last original extent to be moved - * @start_ext: first new extent to be inserted - * @new_ext: middle of new extent to be inserted - * @end_ext: last new extent to be inserted - * @eh: extent header of target leaf block - * @range_to_move: used to decide how to insert extent - * - * Insert extents into the leaf block. The extent (@o_start) is overwritten - * by inserted extents. - */ -static void -mext_insert_inside_block(struct ext4_extent *o_start, - struct ext4_extent *o_end, - struct ext4_extent *start_ext, - struct ext4_extent *new_ext, - struct ext4_extent *end_ext, - struct ext4_extent_header *eh, - int range_to_move) -{ - int i = 0; - unsigned long len; - - /* Move the existing extents */ - if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { - len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - - (unsigned long)(o_end + 1); - memmove(o_end + 1 + range_to_move, o_end + 1, len); - } - - /* Insert start entry */ - if (start_ext->ee_len) - o_start[i++].ee_len = start_ext->ee_len; - - /* Insert new entry */ - if (new_ext->ee_len) { - o_start[i] = *new_ext; - ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); - } - - /* Insert end entry */ - if (end_ext->ee_len) - o_start[i] = *end_ext; - - /* Increment the total entries counter on the extent block */ - le16_add_cpu(&eh->eh_entries, range_to_move); -} - -/** - * mext_insert_extents - Insert new extent - * - * @handle: journal handle - * @orig_inode: original inode - * @orig_path: path indicates first extent to be changed - * @o_start: first original extent to be changed - * @o_end: last original extent to be changed - * @start_ext: first new extent to be inserted - * @new_ext: middle of new extent to be inserted - * @end_ext: last new extent to be inserted - * - * Call the function to insert extents. If we cannot add more extents into - * the leaf block, we call mext_insert_across_blocks() to create a - * new leaf block. Otherwise call mext_insert_inside_block(). Return 0 - * on success, or a negative error value on failure. - */ -static int -mext_insert_extents(handle_t *handle, struct inode *orig_inode, - struct ext4_ext_path *orig_path, - struct ext4_extent *o_start, - struct ext4_extent *o_end, - struct ext4_extent *start_ext, - struct ext4_extent *new_ext, - struct ext4_extent *end_ext) -{ - struct ext4_extent_header *eh; - unsigned long need_slots, slots_range; - int range_to_move, depth, ret; - - /* - * The extents need to be inserted - * start_extent + new_extent + end_extent. - */ - need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) + - (new_ext->ee_len ? 1 : 0); - - /* The number of slots between start and end */ - slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) - / sizeof(struct ext4_extent); - - /* Range to move the end of extent */ - range_to_move = need_slots - slots_range; - depth = orig_path->p_depth; - orig_path += depth; - eh = orig_path->p_hdr; - - if (depth) { - /* Register to journal */ - BUFFER_TRACE(orig_path->p_bh, "get_write_access"); - ret = ext4_journal_get_write_access(handle, orig_path->p_bh); - if (ret) - return ret; - } - - /* Expansion */ - if (range_to_move > 0 && - (range_to_move > le16_to_cpu(eh->eh_max) - - le16_to_cpu(eh->eh_entries))) { - - ret = mext_insert_across_blocks(handle, orig_inode, o_start, - o_end, start_ext, new_ext, end_ext); - if (ret < 0) - return ret; - } else - mext_insert_inside_block(o_start, o_end, start_ext, new_ext, - end_ext, eh, range_to_move); - - return ext4_ext_dirty(handle, orig_inode, orig_path); -} - -/** - * mext_leaf_block - Move one leaf extent block into the inode. - * - * @handle: journal handle - * @orig_inode: original inode - * @orig_path: path indicates first extent to be changed - * @dext: donor extent - * @from: start offset on the target file - * - * In order to insert extents into the leaf block, we must divide the extent - * in the leaf block into three extents. The one is located to be inserted - * extents, and the others are located around it. - * - * Therefore, this function creates structures to save extents of the leaf - * block, and inserts extents by calling mext_insert_extents() with - * created extents. Return 0 on success, or a negative error value on failure. - */ -static int -mext_leaf_block(handle_t *handle, struct inode *orig_inode, - struct ext4_ext_path *orig_path, struct ext4_extent *dext, - ext4_lblk_t *from) -{ - struct ext4_extent *oext, *o_start, *o_end, *prev_ext; - struct ext4_extent new_ext, start_ext, end_ext; - ext4_lblk_t new_ext_end; - int oext_alen, new_ext_alen, end_ext_alen; - int depth = ext_depth(orig_inode); - int ret; - - start_ext.ee_block = end_ext.ee_block = 0; - o_start = o_end = oext = orig_path[depth].p_ext; - oext_alen = ext4_ext_get_actual_len(oext); - start_ext.ee_len = end_ext.ee_len = 0; - - new_ext.ee_block = cpu_to_le32(*from); - ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); - new_ext.ee_len = dext->ee_len; - new_ext_alen = ext4_ext_get_actual_len(&new_ext); - new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; - - /* - * Case: original extent is first - * oext |--------| - * new_ext |--| - * start_ext |--| - */ - if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) && - le32_to_cpu(new_ext.ee_block) < - le32_to_cpu(oext->ee_block) + oext_alen) { - start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - - le32_to_cpu(oext->ee_block)); - start_ext.ee_block = oext->ee_block; - copy_extent_status(oext, &start_ext); - } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { - prev_ext = oext - 1; - /* - * We can merge new_ext into previous extent, - * if these are contiguous and same extent type. - */ - if (ext4_can_extents_be_merged(orig_inode, prev_ext, - &new_ext)) { - o_start = prev_ext; - start_ext.ee_len = cpu_to_le16( - ext4_ext_get_actual_len(prev_ext) + - new_ext_alen); - start_ext.ee_block = oext->ee_block; - copy_extent_status(prev_ext, &start_ext); - new_ext.ee_len = 0; - } - } - - /* - * Case: new_ext_end must be less than oext - * oext |-----------| - * new_ext |-------| - */ - if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { - EXT4_ERROR_INODE(orig_inode, - "new_ext_end(%u) should be less than or equal to " - "oext->ee_block(%u) + oext_alen(%d) - 1", - new_ext_end, le32_to_cpu(oext->ee_block), - oext_alen); - ret = -EIO; - goto out; - } - - /* - * Case: new_ext is smaller than original extent - * oext |---------------| - * new_ext |-----------| - * end_ext |---| - */ - if (le32_to_cpu(oext->ee_block) <= new_ext_end && - new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) { - end_ext.ee_len = - cpu_to_le16(le32_to_cpu(oext->ee_block) + - oext_alen - 1 - new_ext_end); - copy_extent_status(oext, &end_ext); - end_ext_alen = ext4_ext_get_actual_len(&end_ext); - ext4_ext_store_pblock(&end_ext, - (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); - end_ext.ee_block = - cpu_to_le32(le32_to_cpu(o_end->ee_block) + - oext_alen - end_ext_alen); - } - - ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, - o_end, &start_ext, &new_ext, &end_ext); -out: - return ret; -} - -/** - * mext_calc_swap_extents - Calculate extents for extent swapping. - * - * @tmp_dext: the extent that will belong to the original inode - * @tmp_oext: the extent that will belong to the donor inode - * @orig_off: block offset of original inode - * @donor_off: block offset of donor inode - * @max_count: the maximum length of extents - * - * Return 0 on success, or a negative error value on failure. - */ -static int -mext_calc_swap_extents(struct ext4_extent *tmp_dext, - struct ext4_extent *tmp_oext, - ext4_lblk_t orig_off, ext4_lblk_t donor_off, - ext4_lblk_t max_count) -{ - ext4_lblk_t diff, orig_diff; - struct ext4_extent dext_old, oext_old; - - BUG_ON(orig_off != donor_off); - - /* original and donor extents have to cover the same block offset */ - if (orig_off < le32_to_cpu(tmp_oext->ee_block) || - le32_to_cpu(tmp_oext->ee_block) + - ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) - return -ENODATA; - - if (orig_off < le32_to_cpu(tmp_dext->ee_block) || - le32_to_cpu(tmp_dext->ee_block) + - ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) - return -ENODATA; - - dext_old = *tmp_dext; - oext_old = *tmp_oext; - - /* When tmp_dext is too large, pick up the target range. */ - diff = donor_off - le32_to_cpu(tmp_dext->ee_block); - - ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); - le32_add_cpu(&tmp_dext->ee_block, diff); - le16_add_cpu(&tmp_dext->ee_len, -diff); - - if (max_count < ext4_ext_get_actual_len(tmp_dext)) - tmp_dext->ee_len = cpu_to_le16(max_count); - - orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); - ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); - - /* Adjust extent length if donor extent is larger than orig */ - if (ext4_ext_get_actual_len(tmp_dext) > - ext4_ext_get_actual_len(tmp_oext) - orig_diff) - tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) - - orig_diff); - - tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext)); - - copy_extent_status(&oext_old, tmp_dext); - copy_extent_status(&dext_old, tmp_oext); - - return 0; -} - -/** * mext_check_coverage - Check that all extents in range has the same type * * @inode: inode in question @@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, } ret = 1; out: - if (path) { - ext4_ext_drop_refs(path); - kfree(path); - } + ext4_ext_drop_refs(path); + kfree(path); return ret; } /** - * mext_replace_branches - Replace original extents with new extents - * - * @handle: journal handle - * @orig_inode: original inode - * @donor_inode: donor inode - * @from: block offset of orig_inode - * @count: block count to be replaced - * @err: pointer to save return value - * - * Replace original inode extents and donor inode extents page by page. - * We implement this replacement in the following three steps: - * 1. Save the block information of original and donor inodes into - * dummy extents. - * 2. Change the block information of original inode to point at the - * donor inode blocks. - * 3. Change the block information of donor inode to point at the saved - * original inode blocks in the dummy extents. - * - * Return replaced block count. - */ -static int -mext_replace_branches(handle_t *handle, struct inode *orig_inode, - struct inode *donor_inode, ext4_lblk_t from, - ext4_lblk_t count, int *err) -{ - struct ext4_ext_path *orig_path = NULL; - struct ext4_ext_path *donor_path = NULL; - struct ext4_extent *oext, *dext; - struct ext4_extent tmp_dext, tmp_oext; - ext4_lblk_t orig_off = from, donor_off = from; - int depth; - int replaced_count = 0; - int dext_alen; - - *err = ext4_es_remove_extent(orig_inode, from, count); - if (*err) - goto out; - - *err = ext4_es_remove_extent(donor_inode, from, count); - if (*err) - goto out; - - /* Get the original extent for the block "orig_off" */ - *err = get_ext_path(orig_inode, orig_off, &orig_path); - if (*err) - goto out; - - /* Get the donor extent for the head */ - *err = get_ext_path(donor_inode, donor_off, &donor_path); - if (*err) - goto out; - depth = ext_depth(orig_inode); - oext = orig_path[depth].p_ext; - tmp_oext = *oext; - - depth = ext_depth(donor_inode); - dext = donor_path[depth].p_ext; - if (unlikely(!dext)) - goto missing_donor_extent; - tmp_dext = *dext; - - *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, - donor_off, count); - if (*err) - goto out; - - /* Loop for the donor extents */ - while (1) { - /* The extent for donor must be found. */ - if (unlikely(!dext)) { - missing_donor_extent: - EXT4_ERROR_INODE(donor_inode, - "The extent for donor must be found"); - *err = -EIO; - goto out; - } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { - EXT4_ERROR_INODE(donor_inode, - "Donor offset(%u) and the first block of donor " - "extent(%u) should be equal", - donor_off, - le32_to_cpu(tmp_dext.ee_block)); - *err = -EIO; - goto out; - } - - /* Set donor extent to orig extent */ - *err = mext_leaf_block(handle, orig_inode, - orig_path, &tmp_dext, &orig_off); - if (*err) - goto out; - - /* Set orig extent to donor extent */ - *err = mext_leaf_block(handle, donor_inode, - donor_path, &tmp_oext, &donor_off); - if (*err) - goto out; - - dext_alen = ext4_ext_get_actual_len(&tmp_dext); - replaced_count += dext_alen; - donor_off += dext_alen; - orig_off += dext_alen; - - BUG_ON(replaced_count > count); - /* Already moved the expected blocks */ - if (replaced_count >= count) - break; - - if (orig_path) - ext4_ext_drop_refs(orig_path); - *err = get_ext_path(orig_inode, orig_off, &orig_path); - if (*err) - goto out; - depth = ext_depth(orig_inode); - oext = orig_path[depth].p_ext; - tmp_oext = *oext; - - if (donor_path) - ext4_ext_drop_refs(donor_path); - *err = get_ext_path(donor_inode, donor_off, &donor_path); - if (*err) - goto out; - depth = ext_depth(donor_inode); - dext = donor_path[depth].p_ext; - tmp_dext = *dext; - - *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, - donor_off, count - replaced_count); - if (*err) - goto out; - } - -out: - if (orig_path) { - ext4_ext_drop_refs(orig_path); - kfree(orig_path); - } - if (donor_path) { - ext4_ext_drop_refs(donor_path); - kfree(donor_path); - } - - return replaced_count; -} - -/** * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 * * @inode1: the inode structure * @inode2: the inode structure - * @index: page index + * @index1: page index + * @index2: page index * @page: result page vector * * Grab two locked pages for inode's by inode order */ static int mext_page_double_lock(struct inode *inode1, struct inode *inode2, - pgoff_t index, struct page *page[2]) + pgoff_t index1, pgoff_t index2, struct page *page[2]) { struct address_space *mapping[2]; unsigned fl = AOP_FLAG_NOFS; @@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2, mapping[0] = inode1->i_mapping; mapping[1] = inode2->i_mapping; } else { + pgoff_t tmp = index1; + index1 = index2; + index2 = tmp; mapping[0] = inode2->i_mapping; mapping[1] = inode1->i_mapping; } - page[0] = grab_cache_page_write_begin(mapping[0], index, fl); + page[0] = grab_cache_page_write_begin(mapping[0], index1, fl); if (!page[0]) return -ENOMEM; - page[1] = grab_cache_page_write_begin(mapping[1], index, fl); + page[1] = grab_cache_page_write_begin(mapping[1], index2, fl); if (!page[1]) { unlock_page(page[0]); page_cache_release(page[0]); @@ -893,25 +245,27 @@ out: * @o_filp: file structure of original file * @donor_inode: donor inode * @orig_page_offset: page index on original file + * @donor_page_offset: page index on donor file * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped * @unwritten: orig extent is unwritten or not * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents - * with donor inode extents by calling mext_replace_branches(). + * with donor inode extents by calling ext4_swap_extents(). * Finally, write out the saved data in new original inode blocks. Return * replaced block count. */ static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, - pgoff_t orig_page_offset, int data_offset_in_page, - int block_len_in_page, int unwritten, int *err) + pgoff_t orig_page_offset, pgoff_t donor_page_offset, + int data_offset_in_page, + int block_len_in_page, int unwritten, int *err) { struct inode *orig_inode = file_inode(o_filp); struct page *pagep[2] = {NULL, NULL}; handle_t *handle; - ext4_lblk_t orig_blk_offset; + ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int w_flags = 0; unsigned int tmp_data_size, data_size, replaced_size; @@ -939,6 +293,9 @@ again: orig_blk_offset = orig_page_offset * blocks_per_page + data_offset_in_page; + donor_blk_offset = donor_page_offset * blocks_per_page + + data_offset_in_page; + /* Calculate data_size */ if ((orig_blk_offset + block_len_in_page - 1) == ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { @@ -959,7 +316,7 @@ again: replaced_size = data_size; *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, - pagep); + donor_page_offset, pagep); if (unlikely(*err < 0)) goto stop_journal; /* @@ -978,7 +335,7 @@ again: if (*err) goto drop_data_sem; - unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, + unwritten &= mext_check_coverage(donor_inode, donor_blk_offset, block_len_in_page, 1, err); if (*err) goto drop_data_sem; @@ -994,9 +351,10 @@ again: *err = -EBUSY; goto drop_data_sem; } - replaced_count = mext_replace_branches(handle, orig_inode, - donor_inode, orig_blk_offset, - block_len_in_page, err); + replaced_count = ext4_swap_extents(handle, orig_inode, + donor_inode, orig_blk_offset, + donor_blk_offset, + block_len_in_page, 1, err); drop_data_sem: ext4_double_up_write_data_sem(orig_inode, donor_inode); goto unlock_pages; @@ -1014,9 +372,9 @@ data_copy: goto unlock_pages; } ext4_double_down_write_data_sem(orig_inode, donor_inode); - replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, - orig_blk_offset, - block_len_in_page, err); + replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode, + orig_blk_offset, donor_blk_offset, + block_len_in_page, 1, err); ext4_double_up_write_data_sem(orig_inode, donor_inode); if (*err) { if (replaced_count) { @@ -1061,9 +419,9 @@ repair_branches: * Try to swap extents to it's original places */ ext4_double_down_write_data_sem(orig_inode, donor_inode); - replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, - orig_blk_offset, - block_len_in_page, &err2); + replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode, + orig_blk_offset, donor_blk_offset, + block_len_in_page, 0, &err2); ext4_double_up_write_data_sem(orig_inode, donor_inode); if (replaced_count != block_len_in_page) { EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), @@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode, struct inode *donor_inode, __u64 orig_start, __u64 donor_start, __u64 *len) { - ext4_lblk_t orig_blocks, donor_blocks; + __u64 orig_eof, donor_eof; unsigned int blkbits = orig_inode->i_blkbits; unsigned int blocksize = 1 << blkbits; + orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits; + donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits; + + if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { ext4_debug("ext4 move extent: suid or sgid is set" " to donor file [ino:orig %lu, donor %lu]\n", @@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode, ext4_debug("ext4 move extent: The argument files should " "not be swapfile [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; + return -EBUSY; } /* Ext4 move extent supports only extent based file */ @@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode, } /* Start offset should be same */ - if (orig_start != donor_start) { + if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != + (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { ext4_debug("ext4 move extent: orig and donor's start " - "offset are not same [ino:orig %lu, donor %lu]\n", + "offset are not alligned [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } if ((orig_start >= EXT_MAX_BLOCKS) || + (donor_start >= EXT_MAX_BLOCKS) || (*len > EXT_MAX_BLOCKS) || + (donor_start + *len >= EXT_MAX_BLOCKS) || (orig_start + *len >= EXT_MAX_BLOCKS)) { ext4_debug("ext4 move extent: Can't handle over [%u] blocks " "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - - if (orig_inode->i_size > donor_inode->i_size) { - donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; - /* TODO: eliminate this artificial restriction */ - if (orig_start >= donor_blocks) { - ext4_debug("ext4 move extent: orig start offset " - "[%llu] should be less than donor file blocks " - "[%u] [ino:orig %lu, donor %lu]\n", - orig_start, donor_blocks, - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - - /* TODO: eliminate this artificial restriction */ - if (orig_start + *len > donor_blocks) { - ext4_debug("ext4 move extent: End offset [%llu] should " - "be less than donor file blocks [%u]." - "So adjust length from %llu to %llu " - "[ino:orig %lu, donor %lu]\n", - orig_start + *len, donor_blocks, - *len, donor_blocks - orig_start, - orig_inode->i_ino, donor_inode->i_ino); - *len = donor_blocks - orig_start; - } - } else { - orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits; - if (orig_start >= orig_blocks) { - ext4_debug("ext4 move extent: start offset [%llu] " - "should be less than original file blocks " - "[%u] [ino:orig %lu, donor %lu]\n", - orig_start, orig_blocks, - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - - if (orig_start + *len > orig_blocks) { - ext4_debug("ext4 move extent: Adjust length " - "from %llu to %llu. Because it should be " - "less than original file blocks " - "[ino:orig %lu, donor %lu]\n", - *len, orig_blocks - orig_start, - orig_inode->i_ino, donor_inode->i_ino); - *len = orig_blocks - orig_start; - } - } - + if (orig_eof < orig_start + *len - 1) + *len = orig_eof - orig_start; + if (donor_eof < donor_start + *len - 1) + *len = donor_eof - donor_start; if (!*len) { ext4_debug("ext4 move extent: len should not be 0 " "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, @@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode, * * @o_filp: file structure of the original file * @d_filp: file structure of the donor file - * @orig_start: start offset in block for orig - * @donor_start: start offset in block for donor + * @orig_blk: start offset in block for orig + * @donor_blk: start offset in block for donor * @len: the number of blocks to be moved * @moved_len: moved block length * * This function returns 0 and moved block length is set in moved_len * if succeed, otherwise returns error value. * - * Note: ext4_move_extents() proceeds the following order. - * 1:ext4_move_extents() calculates the last block number of moving extent - * function by the start block number (orig_start) and the number of blocks - * to be moved (len) specified as arguments. - * If the {orig, donor}_start points a hole, the extent's start offset - * pointed by ext_cur (current extent), holecheck_path, orig_path are set - * after hole behind. - * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent - * or the ext_cur exceeds the block_end which is last logical block number. - * 3:To get the length of continues area, call mext_next_extent() - * specified with the ext_cur (initial value is holecheck_path) re-cursive, - * until find un-continuous extent, the start logical block number exceeds - * the block_end or the extent points to the last extent. - * 4:Exchange the original inode data with donor inode data - * from orig_page_offset to seq_end_page. - * The start indexes of data are specified as arguments. - * That of the original inode is orig_page_offset, - * and the donor inode is also orig_page_offset - * (To easily handle blocksize != pagesize case, the offset for the - * donor inode is block unit). - * 5:Update holecheck_path and orig_path to points a next proceeding extent, - * then returns to step 2. - * 6:Release holecheck_path, orig_path and set the len to moved_len - * which shows the number of moved blocks. - * The moved_len is useful for the command to calculate the file offset - * for starting next move extent ioctl. - * 7:Return 0 on success, or a negative error value on failure. */ int -ext4_move_extents(struct file *o_filp, struct file *d_filp, - __u64 orig_start, __u64 donor_start, __u64 len, - __u64 *moved_len) +ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, + __u64 donor_blk, __u64 len, __u64 *moved_len) { struct inode *orig_inode = file_inode(o_filp); struct inode *donor_inode = file_inode(d_filp); - struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; - struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; - ext4_lblk_t block_start = orig_start; - ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; - ext4_lblk_t rest_blocks; - pgoff_t orig_page_offset = 0, seq_end_page; - int ret, depth, last_extent = 0; + struct ext4_ext_path *path = NULL; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; - int data_offset_in_page; - int block_len_in_page; - int unwritten; + ext4_lblk_t o_end, o_start = orig_blk; + ext4_lblk_t d_start = donor_blk; + int ret; if (orig_inode->i_sb != donor_inode->i_sb) { ext4_debug("ext4 move extent: The argument files " @@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ - ret = mext_check_arguments(orig_inode, donor_inode, orig_start, - donor_start, &len); + ret = mext_check_arguments(orig_inode, donor_inode, orig_blk, + donor_blk, &len); if (ret) goto out; + o_end = o_start + len; - file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; - block_end = block_start + len - 1; - if (file_end < block_end) - len -= block_end - file_end; + while (o_start < o_end) { + struct ext4_extent *ex; + ext4_lblk_t cur_blk, next_blk; + pgoff_t orig_page_index, donor_page_index; + int offset_in_page; + int unwritten, cur_len; - ret = get_ext_path(orig_inode, block_start, &orig_path); - if (ret) - goto out; - - /* Get path structure to check the hole */ - ret = get_ext_path(orig_inode, block_start, &holecheck_path); - if (ret) - goto out; - - depth = ext_depth(orig_inode); - ext_cur = holecheck_path[depth].p_ext; - - /* - * Get proper starting location of block replacement if block_start was - * within the hole. - */ - if (le32_to_cpu(ext_cur->ee_block) + - ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { - /* - * The hole exists between extents or the tail of - * original file. - */ - last_extent = mext_next_extent(orig_inode, - holecheck_path, &ext_cur); - if (last_extent < 0) { - ret = last_extent; - goto out; - } - last_extent = mext_next_extent(orig_inode, orig_path, - &ext_dummy); - if (last_extent < 0) { - ret = last_extent; + ret = get_ext_path(orig_inode, o_start, &path); + if (ret) goto out; - } - seq_start = le32_to_cpu(ext_cur->ee_block); - } else if (le32_to_cpu(ext_cur->ee_block) > block_start) - /* The hole exists at the beginning of original file. */ - seq_start = le32_to_cpu(ext_cur->ee_block); - else - seq_start = block_start; - - /* No blocks within the specified range. */ - if (le32_to_cpu(ext_cur->ee_block) > block_end) { - ext4_debug("ext4 move extent: The specified range of file " - "may be the hole\n"); - ret = -EINVAL; - goto out; - } - - /* Adjust start blocks */ - add_blocks = min(le32_to_cpu(ext_cur->ee_block) + - ext4_ext_get_actual_len(ext_cur), block_end + 1) - - max(le32_to_cpu(ext_cur->ee_block), block_start); - - while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { - seq_blocks += add_blocks; - - /* Adjust tail blocks */ - if (seq_start + seq_blocks - 1 > block_end) - seq_blocks = block_end - seq_start + 1; - - ext_prev = ext_cur; - last_extent = mext_next_extent(orig_inode, holecheck_path, - &ext_cur); - if (last_extent < 0) { - ret = last_extent; - break; - } - add_blocks = ext4_ext_get_actual_len(ext_cur); - - /* - * Extend the length of contiguous block (seq_blocks) - * if extents are contiguous. - */ - if (ext4_can_extents_be_merged(orig_inode, - ext_prev, ext_cur) && - block_end >= le32_to_cpu(ext_cur->ee_block) && - !last_extent) + ex = path[path->p_depth].p_ext; + next_blk = ext4_ext_next_allocated_block(path); + cur_blk = le32_to_cpu(ex->ee_block); + cur_len = ext4_ext_get_actual_len(ex); + /* Check hole before the start pos */ + if (cur_blk + cur_len - 1 < o_start) { + if (next_blk == EXT_MAX_BLOCKS) { + o_start = o_end; + ret = -ENODATA; + goto out; + } + d_start += next_blk - o_start; + o_start = next_blk; continue; - - /* Is original extent is unwritten */ - unwritten = ext4_ext_is_unwritten(ext_prev); - - data_offset_in_page = seq_start % blocks_per_page; - - /* - * Calculate data blocks count that should be swapped - * at the first page. - */ - if (data_offset_in_page + seq_blocks > blocks_per_page) { - /* Swapped blocks are across pages */ - block_len_in_page = - blocks_per_page - data_offset_in_page; - } else { - /* Swapped blocks are in a page */ - block_len_in_page = seq_blocks; + /* Check hole after the start pos */ + } else if (cur_blk > o_start) { + /* Skip hole */ + d_start += cur_blk - o_start; + o_start = cur_blk; + /* Extent inside requested range ?*/ + if (cur_blk >= o_end) + goto out; + } else { /* in_range(o_start, o_blk, o_len) */ + cur_len += cur_blk - o_start; } - - orig_page_offset = seq_start >> - (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); - seq_end_page = (seq_start + seq_blocks - 1) >> - (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); - seq_start = le32_to_cpu(ext_cur->ee_block); - rest_blocks = seq_blocks; - + unwritten = ext4_ext_is_unwritten(ex); + if (o_end - o_start < cur_len) + cur_len = o_end - o_start; + + orig_page_index = o_start >> (PAGE_CACHE_SHIFT - + orig_inode->i_blkbits); + donor_page_index = d_start >> (PAGE_CACHE_SHIFT - + donor_inode->i_blkbits); + offset_in_page = o_start % blocks_per_page; + if (cur_len > blocks_per_page- offset_in_page) + cur_len = blocks_per_page - offset_in_page; /* * Up semaphore to avoid following problems: * a. transaction deadlock among ext4_journal_start, @@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, * in move_extent_per_page */ ext4_double_up_write_data_sem(orig_inode, donor_inode); - - while (orig_page_offset <= seq_end_page) { - - /* Swap original branches with new branches */ - block_len_in_page = move_extent_per_page( - o_filp, donor_inode, - orig_page_offset, - data_offset_in_page, - block_len_in_page, - unwritten, &ret); - - /* Count how many blocks we have exchanged */ - *moved_len += block_len_in_page; - if (ret < 0) - break; - if (*moved_len > len) { - EXT4_ERROR_INODE(orig_inode, - "We replaced blocks too much! " - "sum of replaced: %llu requested: %llu", - *moved_len, len); - ret = -EIO; - break; - } - - orig_page_offset++; - data_offset_in_page = 0; - rest_blocks -= block_len_in_page; - if (rest_blocks > blocks_per_page) - block_len_in_page = blocks_per_page; - else - block_len_in_page = rest_blocks; - } - + /* Swap original branches with new branches */ + move_extent_per_page(o_filp, donor_inode, + orig_page_index, donor_page_index, + offset_in_page, cur_len, + unwritten, &ret); ext4_double_down_write_data_sem(orig_inode, donor_inode); if (ret < 0) break; - - /* Decrease buffer counter */ - if (holecheck_path) - ext4_ext_drop_refs(holecheck_path); - ret = get_ext_path(orig_inode, seq_start, &holecheck_path); - if (ret) - break; - depth = holecheck_path->p_depth; - - /* Decrease buffer counter */ - if (orig_path) - ext4_ext_drop_refs(orig_path); - ret = get_ext_path(orig_inode, seq_start, &orig_path); - if (ret) - break; - - ext_cur = holecheck_path[depth].p_ext; - add_blocks = ext4_ext_get_actual_len(ext_cur); - seq_blocks = 0; - + o_start += cur_len; + d_start += cur_len; } + *moved_len = o_start - orig_blk; + if (*moved_len > len) + *moved_len = len; + out: if (*moved_len) { ext4_discard_preallocations(orig_inode); ext4_discard_preallocations(donor_inode); } - if (orig_path) { - ext4_ext_drop_refs(orig_path); - kfree(orig_path); - } - if (holecheck_path) { - ext4_ext_drop_refs(holecheck_path); - kfree(holecheck_path); - } + ext4_ext_drop_refs(path); + kfree(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); ext4_inode_resume_unlocked_dio(orig_inode); ext4_inode_resume_unlocked_dio(donor_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 603e4ebbd0ac..adb559de23c1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle, ext4_lblk_t *block) { struct buffer_head *bh; - int err = 0; + int err; if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && ((inode->i_size >> 10) >= @@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle, *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - bh = ext4_bread(handle, inode, *block, 1, &err); - if (!bh) - return ERR_PTR(err); + bh = ext4_bread(handle, inode, *block, 1); + if (IS_ERR(bh)) + return bh; inode->i_size += inode->i_sb->s_blocksize; EXT4_I(inode)->i_disksize = inode->i_size; BUFFER_TRACE(bh, "get_write_access"); @@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, { struct buffer_head *bh; struct ext4_dir_entry *dirent; - int err = 0, is_dx_block = 0; + int is_dx_block = 0; - bh = ext4_bread(NULL, inode, block, 0, &err); - if (!bh) { - if (err == 0) { - ext4_error_inode(inode, __func__, line, block, - "Directory hole found"); - return ERR_PTR(-EIO); - } + bh = ext4_bread(NULL, inode, block, 0); + if (IS_ERR(bh)) { __ext4_warning(inode->i_sb, __func__, line, - "error reading directory block " - "(ino %lu, block %lu)", inode->i_ino, + "error %ld reading directory block " + "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino, (unsigned long) block); - return ERR_PTR(err); + + return bh; + } + if (!bh) { + ext4_error_inode(inode, __func__, line, block, "Directory hole found"); + return ERR_PTR(-EIO); } dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ @@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, "directory leaf block found instead of index block"); return ERR_PTR(-EIO); } - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) || + if (!ext4_has_metadata_csum(inode->i_sb) || buffer_verified(bh)) return bh; @@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir); static struct dx_frame *dx_probe(const struct qstr *d_name, struct inode *dir, struct dx_hash_info *hinfo, - struct dx_frame *frame, - int *err); + struct dx_frame *frame); static void dx_release(struct dx_frame *frames); static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, struct dx_hash_info *hinfo, struct dx_map_entry map[]); @@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, __u32 *start_hash); static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, - int *err); + struct ext4_dir_entry_2 **res_dir); static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); @@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) { struct ext4_dir_entry_tail *t; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; t = get_dirent_tail(inode, dirent); @@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode, { struct ext4_dir_entry_tail *t; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; t = get_dirent_tail(inode, dirent); @@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode, struct dx_tail *t; int count_offset, limit, count; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) struct dx_tail *t; int count_offset, limit, count; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; struct stats stats; printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); - if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; + bh = ext4_bread(NULL,dir, block, 0); + if (!bh || IS_ERR(bh)) + continue; stats = levels? dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); @@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, */ static struct dx_frame * dx_probe(const struct qstr *d_name, struct inode *dir, - struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) + struct dx_hash_info *hinfo, struct dx_frame *frame_in) { unsigned count, indirect; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; - struct buffer_head *bh; struct dx_frame *frame = frame_in; + struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; - frame->bh = NULL; - bh = ext4_read_dirblock(dir, 0, INDEX); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); - goto fail; - } - root = (struct dx_root *) bh->b_data; + frame->bh = ext4_read_dirblock(dir, 0, INDEX); + if (IS_ERR(frame->bh)) + return (struct dx_frame *) frame->bh; + + root = (struct dx_root *) frame->bh->b_data; if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", root->info.hash_version); - brelse(bh); - *err = ERR_BAD_DX_DIR; goto fail; } hinfo->hash_version = root->info.hash_version; @@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (root->info.unused_flags & 1) { ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", root->info.unused_flags); - brelse(bh); - *err = ERR_BAD_DX_DIR; goto fail; } if ((indirect = root->info.indirect_levels) > 1) { ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", root->info.indirect_levels); - brelse(bh); - *err = ERR_BAD_DX_DIR; goto fail; } @@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { ext4_warning(dir->i_sb, "dx entry: limit != root limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; goto fail; } dxtrace(printk("Look up %x", hash)); - while (1) - { + while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { ext4_warning(dir->i_sb, "dx entry: no count or count > limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail2; + goto fail; } p = entries + 1; q = entries + count - 1; - while (p <= q) - { + while (p <= q) { m = p + (q - p)/2; dxtrace(printk(".")); if (dx_get_hash(m) > hash) @@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, p = m + 1; } - if (0) // linear search cross check - { + if (0) { // linear search cross check unsigned n = count - 1; at = entries; while (n--) @@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir, at = p - 1; dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); - frame->bh = bh; frame->entries = entries; frame->at = at; - if (!indirect--) return frame; - bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); - goto fail2; + if (!indirect--) + return frame; + frame++; + frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + if (IS_ERR(frame->bh)) { + ret_err = (struct dx_frame *) frame->bh; + frame->bh = NULL; + goto fail; } - entries = ((struct dx_node *) bh->b_data)->entries; + entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { ext4_warning(dir->i_sb, "dx entry: limit != node limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail2; + goto fail; } - frame++; - frame->bh = NULL; } -fail2: +fail: while (frame >= frame_in) { brelse(frame->bh); frame--; } -fail: - if (*err == ERR_BAD_DX_DIR) + if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) ext4_warning(dir->i_sb, "Corrupt dir inode %lu, running e2fsck is " "recommended.", dir->i_ino); - return NULL; + return ret_err; } static void dx_release (struct dx_frame *frames) @@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } hinfo.hash = start_hash; hinfo.minor_hash = 0; - frame = dx_probe(NULL, dir, &hinfo, frames, &err); - if (!frame) - return err; + frame = dx_probe(NULL, dir, &hinfo, frames); + if (IS_ERR(frame)) + return PTR_ERR(frame); /* Add '.' and '..' from the htree header */ if (!start_hash && !start_minor_hash) { @@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, buffer */ int num = 0; ext4_lblk_t nblocks; - int i, err = 0; - int namelen; + int i, namelen; *res_dir = NULL; sb = dir->i_sb; @@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto restart; } if (is_dx(dir)) { - bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); + bh = ext4_dx_find_entry(dir, d_name, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the * old fashioned way. */ - if (err == -ENOENT) - return NULL; - if (err && err != ERR_BAD_DX_DIR) - return ERR_PTR(err); - if (bh) + if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR) return bh; dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " "falling back\n")); @@ -1298,10 +1268,10 @@ restart: break; } num++; - bh = ext4_getblk(NULL, dir, b++, 0, &err); - if (unlikely(err)) { + bh = ext4_getblk(NULL, dir, b++, 0); + if (unlikely(IS_ERR(bh))) { if (ra_max == 0) - return ERR_PTR(err); + return bh; break; } bh_use[ra_max] = bh; @@ -1366,7 +1336,7 @@ cleanup_and_exit: } static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, int *err) + struct ext4_dir_entry_2 **res_dir) { struct super_block * sb = dir->i_sb; struct dx_hash_info hinfo; @@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q ext4_lblk_t block; int retval; - if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) - return NULL; + frame = dx_probe(d_name, dir, &hinfo, frames); + if (IS_ERR(frame)) + return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); bh = ext4_read_dirblock(dir, block, DIRENT); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); + if (IS_ERR(bh)) goto errout; - } + retval = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); - if (retval == 1) { /* Success! */ - dx_release(frames); - return bh; - } + if (retval == 1) + goto success; brelse(bh); if (retval == -1) { - *err = ERR_BAD_DX_DIR; + bh = ERR_PTR(ERR_BAD_DX_DIR); goto errout; } @@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q frames, NULL); if (retval < 0) { ext4_warning(sb, - "error reading index page in directory #%lu", - dir->i_ino); - *err = retval; + "error %d reading index page in directory #%lu", + retval, dir->i_ino); + bh = ERR_PTR(retval); goto errout; } } while (retval == 1); - *err = -ENOENT; + bh = NULL; errout: dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); - dx_release (frames); - return NULL; +success: + dx_release(frames); + return bh; } static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi dentry); return ERR_PTR(-EIO); } - inode = ext4_iget(dir->i_sb, ino); + inode = ext4_iget_normal(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", @@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child) return ERR_PTR(-EIO); } - return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); + return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino)); } /* @@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize) */ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, - struct dx_hash_info *hinfo, int *error) + struct dx_hash_info *hinfo) { unsigned blocksize = dir->i_sb->s_blocksize; unsigned count, continued; @@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, int csum_size = 0; int err = 0, i; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); bh2 = ext4_append(handle, dir, &newblock); if (IS_ERR(bh2)) { brelse(*bh); *bh = NULL; - *error = PTR_ERR(bh2); - return NULL; + return (struct ext4_dir_entry_2 *) bh2; } BUFFER_TRACE(*bh, "get_write_access"); @@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); /* Which block gets the new entry? */ - if (hinfo->hash >= hash2) - { + if (hinfo->hash >= hash2) { swap(*bh, bh2); de = de2; } @@ -1638,8 +1604,7 @@ journal_error: brelse(bh2); *bh = NULL; ext4_std_error(dir->i_sb, err); - *error = err; - return NULL; + return ERR_PTR(err); } int ext4_find_dest_de(struct inode *dir, struct inode *inode, @@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, int csum_size = 0; int err; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (!de) { @@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; int csum_size = 0; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; @@ -1862,8 +1825,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, ext4_handle_dirty_dx_node(handle, dir, frame->bh); ext4_handle_dirty_dirent_node(handle, dir, bh); - de = do_split(handle,dir, &bh, frame, &hinfo, &retval); - if (!de) { + de = do_split(handle,dir, &bh, frame, &hinfo); + if (IS_ERR(de)) { /* * Even if the block split failed, we have to properly write * out all the changes we did so far. Otherwise we can end up @@ -1871,7 +1834,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, */ ext4_mark_inode_dirty(handle, dir); dx_release(frames); - return retval; + return PTR_ERR(de); } dx_release(frames); @@ -1904,8 +1867,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ext4_lblk_t block, blocks; int csum_size = 0; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; @@ -1982,9 +1944,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; int err; - frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); - if (!frame) - return err; + frame = dx_probe(&dentry->d_name, dir, &hinfo, frames); + if (IS_ERR(frame)) + return PTR_ERR(frame); entries = frame->entries; at = frame->at; bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); @@ -2095,9 +2057,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto cleanup; } } - de = do_split(handle, dir, &bh, frame, &hinfo, &err); - if (!de) + de = do_split(handle, dir, &bh, frame, &hinfo); + if (IS_ERR(de)) { + err = PTR_ERR(de); goto cleanup; + } err = add_dirent_to_buf(handle, dentry, inode, de, bh); goto cleanup; @@ -2167,8 +2131,7 @@ static int ext4_delete_entry(handle_t *handle, return err; } - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); BUFFER_TRACE(bh, "get_write_access"); @@ -2387,8 +2350,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, int csum_size = 0; int err; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -2403,10 +2365,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); - BUFFER_TRACE(dir_block, "get_write_access"); - err = ext4_journal_get_write_access(handle, dir_block); - if (err) - goto out; de = (struct ext4_dir_entry_2 *)dir_block->b_data; ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); set_nlink(inode, 2); @@ -2573,7 +2531,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) int err = 0, rc; bool dirty = false; - if (!sbi->s_journal) + if (!sbi->s_journal || is_bad_inode(inode)) return 0; WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1e43b905ff98..f298c60f907d 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb, { struct buffer_head *bh; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 0; bh = ext4_get_bitmap(sb, group_data->inode_bitmap); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 05c159218bc2..1eda6ab0ef9d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb, static void ext4_clear_journal_err(struct super_block *sb, struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); static int ext4_remount(struct super_block *sb, int *flags, char *data); static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); @@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb, static int ext4_superblock_csum_verify(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return es->s_checksum == ext4_superblock_csum(sb, es); @@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; es->s_checksum = ext4_superblock_csum(sb, es); @@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif @@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ext4_es_init_tree(&ei->i_es_tree); rwlock_init(&ei->i_es_lock); INIT_LIST_HEAD(&ei->i_es_lru); + ei->i_es_all_nr = 0; ei->i_es_lru_nr = 0; ei->i_touch_when = 0; ei->i_reserved_data_blocks = 0; @@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = ext4_iget(sb, ino); + inode = ext4_iget_normal(sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) { @@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = { .bdev_try_to_free_page = bdev_try_to_free_page, }; -static const struct super_operations ext4_nojournal_sops = { - .alloc_inode = ext4_alloc_inode, - .destroy_inode = ext4_destroy_inode, - .write_inode = ext4_write_inode, - .dirty_inode = ext4_dirty_inode, - .drop_inode = ext4_drop_inode, - .evict_inode = ext4_evict_inode, - .sync_fs = ext4_sync_fs_nojournal, - .put_super = ext4_put_super, - .statfs = ext4_statfs, - .remount_fs = ext4_remount, - .show_options = ext4_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ext4_quota_read, - .quota_write = ext4_quota_write, -#endif - .bdev_try_to_free_page = bdev_try_to_free_page, -}; - static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, @@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb, "not specified"); return 0; } - } else { - if (sbi->s_jquota_fmt) { - ext4_msg(sb, KERN_ERR, "journaled quota format " - "specified with no journaling " - "enabled"); - return 0; - } } #endif if (test_opt(sb, DIOREAD_NOLOCK)) { @@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { + if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ __le16 save_csum; __u32 csum32; @@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, } /* old crc16 code */ + if (!(sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + return 0; + offset = offsetof(struct ext4_group_desc, bg_checksum); crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); @@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { - jbd_debug(1, "Errors on filesystem, " + ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list.\n"); es->s_last_orphan = 0; } @@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) @@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) dquot_quota_off(sb, i); } @@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, return count; } +static ssize_t es_ui_show(struct ext4_attr *a, + struct ext4_sb_info *sbi, char *buf) +{ + + unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) + + a->u.offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + static ssize_t reserved_clusters_show(struct ext4_attr *a, struct ext4_sb_info *sbi, char *buf) { @@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = { \ .offset = offsetof(struct ext4_sb_info, _elname),\ }, \ } + +#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \ +static struct ext4_attr ext4_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .u = { \ + .offset = offsetof(struct ext4_super_block, _elname), \ + }, \ +} + #define EXT4_ATTR(name, mode, show, store) \ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) + +#define EXT4_RO_ATTR_ES_UI(name, elname) \ + EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname) #define EXT4_RW_ATTR_SBI_UI(name, elname) \ EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) + #define ATTR_LIST(name) &ext4_attr_##name.attr #define EXT4_DEPRECATED_ATTR(_name, _val) \ static struct ext4_attr ext4_attr_##_name = { \ @@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); +EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); +EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); +EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(warning_ratelimit_burst), ATTR_LIST(msg_ratelimit_interval_ms), ATTR_LIST(msg_ratelimit_burst), + ATTR_LIST(errors_count), + ATTR_LIST(first_error_time), + ATTR_LIST(last_error_time), NULL, }; @@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj) complete(&ext4_feat->f_kobj_unregister); } +static ssize_t ext4_feat_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "supported\n"); +} + +/* + * We can not use ext4_attr_show/store because it relies on the kobject + * being embedded in the ext4_sb_info structure which is definitely not + * true in this case. + */ +static const struct sysfs_ops ext4_feat_ops = { + .show = ext4_feat_show, + .store = NULL, +}; + static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_attr_ops, + .sysfs_ops = &ext4_feat_ops, .release = ext4_feat_release, }; @@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) int compat, incompat; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { /* journal checksum v3 */ compat = 0; incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; @@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb) incompat = 0; } + jbd2_journal_clear_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_CSUM_V3 | + JBD2_FEATURE_INCOMPAT_CSUM_V2); if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ret = jbd2_journal_set_features(sbi->s_journal, compat, 0, @@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb) jbd2_journal_clear_features(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | - JBD2_FEATURE_INCOMPAT_CSUM_V3 | - JBD2_FEATURE_INCOMPAT_CSUM_V2); + jbd2_journal_clear_features(sbi->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } return ret; @@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) logical_sb_block = sb_block; } - if (!(bh = sb_bread(sb, logical_sb_block))) { + if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } @@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); @@ -3519,8 +3539,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sb, ERRORS_CONT); else set_opt(sb, ERRORS_RO); - if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) - set_opt(sb, BLOCK_VALIDITY); + /* block_validity enabled by default; disable with noblock_validity */ + set_opt(sb, BLOCK_VALIDITY); if (def_mount_opts & EXT4_DEFM_DISCARD) set_opt(sb, DISCARD); @@ -3646,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); - bh = sb_bread(sb, logical_sb_block); + bh = sb_bread_unmovable(sb, logical_sb_block); if (!bh) { ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); @@ -3868,7 +3888,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); + sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); if (!sbi->s_group_desc[i]) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); @@ -3890,13 +3910,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_err_report.data = (unsigned long) sb; /* Register extent status tree shrinker */ - ext4_es_register_shrinker(sbi); - - err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); - if (err) { - ext4_msg(sb, KERN_ERR, "insufficient memory"); + if (ext4_es_register_shrinker(sbi)) goto failed_mount3; - } sbi->s_stripe = ext4_get_stripe_size(sbi); sbi->s_extent_max_zeroout_kb = 32; @@ -3904,11 +3919,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* * set up enough so that it can read an inode */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) - sb->s_op = &ext4_sops; - else - sb->s_op = &ext4_nojournal_sops; + sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA @@ -4229,10 +4240,9 @@ failed_mount_wq: jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } -failed_mount3: ext4_es_unregister_shrinker(sbi); +failed_mount3: del_timer_sync(&sbi->s_err_report); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: @@ -4247,7 +4257,7 @@ failed_mount: remove_proc_entry(sb->s_id, ext4_proc_root); } #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif ext4_blkdev_remove(sbi); @@ -4375,6 +4385,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, goto out_bdev; } + if ((le32_to_cpu(es->s_feature_ro_compat) & + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "external journal has " + "corrupt superblock"); + brelse(bh); + goto out_bdev; + } + if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); @@ -4677,15 +4696,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait) * being sent at the end of the function. But we can skip it if * transaction_commit will do it for us. */ - target = jbd2_get_latest_transaction(sbi->s_journal); - if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && - !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + if (sbi->s_journal) { + target = jbd2_get_latest_transaction(sbi->s_journal); + if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + needs_barrier = true; + + if (jbd2_journal_start_commit(sbi->s_journal, &target)) { + if (wait) + ret = jbd2_log_wait_commit(sbi->s_journal, + target); + } + } else if (wait && test_opt(sb, BARRIER)) needs_barrier = true; - - if (jbd2_journal_start_commit(sbi->s_journal, &target)) { - if (wait) - ret = jbd2_log_wait_commit(sbi->s_journal, target); - } if (needs_barrier) { int err; err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); @@ -4696,19 +4719,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) return ret; } -static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) -{ - int ret = 0; - - trace_ext4_sync_fs(sb, wait); - flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); - dquot_writeback_dquots(sb, -1); - if (wait && test_opt(sb, BARRIER)) - ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); - - return ret; -} - /* * LVM calls this function before a (read-only) snapshot is created. This * gives us a chance to flush the journal completely and mark the fs clean. @@ -4727,23 +4737,26 @@ static int ext4_freeze(struct super_block *sb) journal = EXT4_SB(sb)->s_journal; - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + if (journal) { + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* - * Don't clear the needs_recovery flag if we failed to flush - * the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to + * flush the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) + goto out; + } /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: - /* we rely on upper layer to stop further updates */ - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (journal) + /* we rely on upper layer to stop further updates */ + jbd2_journal_unlock_updates(journal); return error; } @@ -4774,7 +4787,7 @@ struct ext4_mount_options { u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; + char *s_qf_names[EXT4_MAXQUOTAS]; #endif }; @@ -4804,7 +4817,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA old_opts.s_jquota_fmt = sbi->s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], GFP_KERNEL); @@ -4965,7 +4978,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #ifdef CONFIG_QUOTA /* Release old quota file names */ - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(old_opts.s_qf_names[i]); if (enable_quota) { if (sb_any_quota_suspended(sb)) @@ -4994,7 +5007,7 @@ restore_opts: sbi->s_max_batch_time = old_opts.s_max_batch_time; #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { kfree(sbi->s_qf_names[i]); sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } @@ -5197,7 +5210,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, { int err; struct inode *qf_inode; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; @@ -5225,13 +5238,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, static int ext4_enable_quotas(struct super_block *sb) { int type, err = 0; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; - for (type = 0; type < MAXQUOTAS; type++) { + for (type = 0; type < EXT4_MAXQUOTAS; type++) { if (qf_inums[type]) { err = ext4_quota_enable(sb, type, QFMT_VFS_V1, DQUOT_USAGE_ENABLED); @@ -5309,7 +5322,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; @@ -5324,9 +5336,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, while (toread > 0) { tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; - bh = ext4_bread(NULL, inode, blk, 0, &err); - if (err) - return err; + bh = ext4_bread(NULL, inode, blk, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) /* A hole? */ memset(data, 0, tocopy); else @@ -5347,8 +5359,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); + int err, offset = off & (sb->s_blocksize - 1); struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -5369,14 +5380,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, return -EIO; } - bh = ext4_bread(handle, inode, blk, 1, &err); + bh = ext4_bread(handle, inode, blk, 1); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) goto out; BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); - goto out; + return err; } lock_buffer(bh); memcpy(bh->b_data+offset, data, len); @@ -5385,8 +5398,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); out: - if (err) - return err; if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e7387337060c..1e09fc77395c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode, sector_t block_nr, struct ext4_xattr_header *hdr) { - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + if (ext4_has_metadata_csum(inode->i_sb) && (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) return 0; return 1; @@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode, sector_t block_nr, struct ext4_xattr_header *hdr) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); @@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) } static int -ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) +ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, + void *value_start) { - while (!IS_LAST_ENTRY(entry)) { - struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); + struct ext4_xattr_entry *e = entry; + + while (!IS_LAST_ENTRY(e)) { + struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EIO; - entry = next; + e = next; } + + while (!IS_LAST_ENTRY(entry)) { + if (entry->e_value_size != 0 && + (value_start + le16_to_cpu(entry->e_value_offs) < + (void *)e + sizeof(__u32) || + value_start + le16_to_cpu(entry->e_value_offs) + + le32_to_cpu(entry->e_value_size) > end)) + return -EIO; + entry = EXT4_XATTR_NEXT(entry); + } + return 0; } @@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) return -EIO; if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) return -EIO; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); + error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, + bh->b_data); if (!error) set_buffer_verified(bh); return error; @@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(entry, end); + error = ext4_xattr_check_names(entry, end, entry); if (error) goto cleanup; error = ext4_xattr_find_entry(&entry, name_index, name, @@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(IFIRST(header), end); + error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); if (error) goto cleanup; error = ext4_xattr_list_entries(dentry, IFIRST(header), @@ -899,14 +912,8 @@ inserted: if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - /* - * take i_data_sem because we will test - * i_delalloc_reserved_flag in ext4_mb_new_blocks - */ - down_read(&EXT4_I(inode)->i_data_sem); block = ext4_new_meta_blocks(handle, inode, goal, 0, NULL, &error); - up_read((&EXT4_I(inode)->i_data_sem)); if (error) goto cleanup; @@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { - error = ext4_xattr_check_names(IFIRST(header), is->s.end); + error = ext4_xattr_check_names(IFIRST(header), is->s.end, + IFIRST(header)); if (error) return error; /* Find the named attribute. */ diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 06fe11e0abfa..aab8549591e7 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode) goto out_err; } - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) { printk(KERN_ERR "%s: Cannot get buffer for journal superblock\n", diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 7f34f4716165..988b32ed4c87 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_transaction == NULL && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { - /* - * Get our reference so that bh cannot be freed before - * we unlock it - */ - get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; - BUFFER_TRACE(bh, "release"); - __brelse(bh); } return ret; } @@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) nblocks = jbd2_space_needed(journal); while (jbd2_log_space_left(journal) < nblocks) { - if (journal->j_flags & JBD2_ABORT) - return; write_unlock(&journal->j_state_lock); mutex_lock(&journal->j_checkpoint_mutex); @@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal) * trace for forensic evidence. */ write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) { + mutex_unlock(&journal->j_checkpoint_mutex); + return; + } spin_lock(&journal->j_list_lock); nblocks = jbd2_space_needed(journal); space_left = jbd2_log_space_left(journal); @@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) } } -/* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. - * - * Return 0 on success, and return <0 if some buffers have failed - * to be written out. - * - * Called with j_list_lock held. - */ -static int __wait_cp_io(journal_t *journal, transaction_t *transaction) -{ - struct journal_head *jh; - struct buffer_head *bh; - tid_t this_tid; - int released = 0; - int ret = 0; - - this_tid = transaction->t_tid; -restart: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return ret; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - - /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list - */ - released = __jbd2_journal_remove_checkpoint(jh); - __brelse(bh); - } - - return ret; -} - static void __flush_batch(journal_t *journal, int *batch_count) { @@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count) } /* - * Try to flush one buffer from the checkpoint list to disk. - * - * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. Return <0 if the buffer has failed to - * be written out. - * - * Called with j_list_lock held and drops it if 1 is returned - */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - int *batch_count, transaction_t *transaction) -{ - struct buffer_head *bh = jh2bh(jh); - int ret = 0; - - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - ret = 1; - } else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - transaction->t_chp_stats.cs_forced_to_close++; - spin_unlock(&journal->j_list_lock); - if (unlikely(journal->j_flags & JBD2_UNMOUNT)) - /* - * The journal thread is dead; so starting and - * waiting for a commit to finish will cause - * us to wait for a _very_ long time. - */ - printk(KERN_ERR "JBD2: %s: " - "Waiting for Godot: block %llu\n", - journal->j_devname, - (unsigned long long) bh->b_blocknr); - jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - ret = 1; - } else if (!buffer_dirty(bh)) { - ret = 1; - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - get_bh(bh); - BUFFER_TRACE(bh, "remove from checkpoint"); - __jbd2_journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - __brelse(bh); - } else { - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal lock. - * We cannot afford to let the transaction logic start - * messing around with this buffer before we write it to - * disk, as that would break recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - journal->j_chkpt_bhs[*batch_count] = bh; - __buffer_relink_io(jh); - transaction->t_chp_stats.cs_written++; - (*batch_count)++; - if (*batch_count == JBD2_NR_BATCH) { - spin_unlock(&journal->j_list_lock); - __flush_batch(journal, batch_count); - ret = 1; - } - } - return ret; -} - -/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. @@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, */ int jbd2_log_do_checkpoint(journal_t *journal) { - transaction_t *transaction; - tid_t this_tid; - int result; + struct journal_head *jh; + struct buffer_head *bh; + transaction_t *transaction; + tid_t this_tid; + int result, batch_count = 0; jbd_debug(1, "Start checkpoint\n"); @@ -374,45 +244,117 @@ restart: * done (maybe it's a new transaction, but it fell at the same * address). */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct journal_head *jh; - int retry = 0, err; - - while (!retry && transaction->t_checkpoint_list) { - jh = transaction->t_checkpoint_list; - retry = __process_buffer(journal, jh, &batch_count, - transaction); - if (retry < 0 && !result) - result = retry; - if (!retry && (need_resched() || - spin_needbreak(&journal->j_list_lock))) { - spin_unlock(&journal->j_list_lock); - retry = 1; - break; - } + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + goto out; + + /* checkpoint all of the transaction's buffers */ + while (transaction->t_checkpoint_list) { + jh = transaction->t_checkpoint_list; + bh = jh2bh(jh); + + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + get_bh(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + goto retry; } + if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; - if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } - __flush_batch(journal, &batch_count); + transaction->t_chp_stats.cs_forced_to_close++; + spin_unlock(&journal->j_list_lock); + if (unlikely(journal->j_flags & JBD2_UNMOUNT)) + /* + * The journal thread is dead; so + * starting and waiting for a commit + * to finish will cause us to wait for + * a _very_ long time. + */ + printk(KERN_ERR + "JBD2: %s: Waiting for Godot: block %llu\n", + journal->j_devname, (unsigned long long) bh->b_blocknr); + + jbd2_log_start_commit(journal, tid); + jbd2_log_wait_commit(journal, tid); + goto retry; + } + if (!buffer_dirty(bh)) { + if (unlikely(buffer_write_io_error(bh)) && !result) + result = -EIO; + BUFFER_TRACE(bh, "remove from checkpoint"); + if (__jbd2_journal_remove_checkpoint(jh)) + /* The transaction was released; we're done */ + goto out; + continue; } + /* + * Important: we are about to write the buffer, and + * possibly block, while still holding the journal + * lock. We cannot afford to let the transaction + * logic start messing around with this buffer before + * we write it to disk, as that would break + * recoverability. + */ + BUFFER_TRACE(bh, "queue"); + get_bh(bh); + J_ASSERT_BH(bh, !buffer_jwrite(bh)); + journal->j_chkpt_bhs[batch_count++] = bh; + __buffer_relink_io(jh); + transaction->t_chp_stats.cs_written++; + if ((batch_count == JBD2_NR_BATCH) || + need_resched() || + spin_needbreak(&journal->j_list_lock)) + goto unlock_and_flush; + } - if (retry) { + if (batch_count) { + unlock_and_flush: + spin_unlock(&journal->j_list_lock); + retry: + if (batch_count) + __flush_batch(journal, &batch_count); spin_lock(&journal->j_list_lock); goto restart; + } + + /* + * Now we issued all of the transaction's buffers, let's deal + * with the buffers that are out for I/O. + */ +restart2: + /* Did somebody clean up the transaction in the meanwhile? */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + goto out; + + while (transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; + bh = jh2bh(jh); + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + get_bh(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + spin_lock(&journal->j_list_lock); + goto restart2; } + if (unlikely(buffer_write_io_error(bh)) && !result) + result = -EIO; + /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one + * Now in whatever state the buffer currently is, we + * know that it has been written out and so we can + * drop it from the list */ - err = __wait_cp_io(journal, transaction); - if (!result) - result = err; + if (__jbd2_journal_remove_checkpoint(jh)) + break; } out: spin_unlock(&journal->j_list_lock); @@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * Find all the written-back checkpoint buffers in the given list and * release them. * - * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) + * Returns 1 if we freed the transaction, 0 otherwise. */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +static int journal_clean_one_cp_list(struct journal_head *jh) { struct journal_head *last_jh; struct journal_head *next_jh = jh; - int ret, freed = 0; + int ret; + int freed = 0; - *released = 0; if (!jh) return 0; @@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) jh = next_jh; next_jh = jh->b_cpnext; ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } + if (!ret) + return freed; + if (ret == 2) + return 1; + freed = 1; /* * This function only frees up some memory * if possible so we dont have an obligation @@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) * * Find all the written-back checkpoint buffers in the journal and release them. * - * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) */ - -int __jbd2_journal_clean_checkpoint_list(journal_t *journal) +void __jbd2_journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; - int released; + int ret; transaction = journal->j_checkpoint_transactions; if (!transaction) - goto out; + return; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); + ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if * preemption requested: */ if (need_resched()) - goto out; - if (released) + return; + if (ret) continue; /* * It is essential that we are as careful as in the case of * t_checkpoint_list with removing the buffer from the list as * we can possibly see not yet submitted buffers on io_list */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); + ret = journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list); if (need_resched()) - goto out; + return; + /* + * Stop scanning if we couldn't free the transaction. This + * avoids pointless scanning of transactions which still + * weren't checkpointed. + */ + if (!ret) + return; } while (transaction != last_transaction); -out: - return ret; } /* diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 19d74d86d99c..e4dc74713a43 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) goto out_err; } - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) { printk(KERN_ERR "%s: Cannot get buffer for journal superblock\n", @@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (jbd2_journal_has_csum_v2or3(journal) && - JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { - /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " - "at the same time!\n"); - goto out; - } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { /* Can't have checksum v2 and v3 at the same time! */ @@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (jbd2_journal_has_csum_v2or3(journal) && + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { + /* Can't have checksum v1 and v2 on at the same time! */ + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " + "at the same time!\n"); + goto out; + } + if (!jbd2_verify_csum_type(journal, sb)) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9b329b55ffe3..bcbef08a4d8f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal, !jbd2_descr_block_csum_verify(journal, bh->b_data)) { err = -EIO; + brelse(bh); goto failed; } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 324329ceea1e..73b45225a7ca 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); -struct buffer_head *__getblk(struct block_device *bdev, sector_t block, - unsigned size); +struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); void __breadahead(struct block_device *, sector_t block, unsigned int size); -struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size); +struct buffer_head *__bread_gfp(struct block_device *, + sector_t block, unsigned size, gfp_t gfp); void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh) static inline struct buffer_head * sb_bread(struct super_block *sb, sector_t block) { - return __bread(sb->s_bdev, block, sb->s_blocksize); + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); +} + +static inline struct buffer_head * +sb_bread_unmovable(struct super_block *sb, sector_t block) +{ + return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0); } static inline void @@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block) static inline struct buffer_head * sb_getblk(struct super_block *sb, sector_t block) { - return __getblk(sb->s_bdev, block, sb->s_blocksize); + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } static inline struct buffer_head * @@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh) __lock_buffer(bh); } +static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, 0); +} + +static inline struct buffer_head *__getblk(struct block_device *bdev, + sector_t block, + unsigned size) +{ + return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); +} + +/** + * __bread() - reads a specified block and returns the bh + * @bdev: the block_device to read from + * @block: number of block + * @size: size (in bytes) to read + * + * Reads a specified block, and returns buffer head that contains it. + * The page cache is allocated from movable area so that it can be migrated. + * It returns NULL if the block was unreadable. + */ +static inline struct buffer_head * +__bread(struct block_device *bdev, sector_t block, unsigned size) +{ + return __bread_gfp(bdev, block, size, __GFP_MOVABLE); +} + extern int __set_page_dirty_buffers(struct page *page); #else /* CONFIG_BLOCK */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0dae71e9971c..704b9a599b26 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -int __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal); int __jbd2_journal_remove_checkpoint(struct journal_head *); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); diff --git a/include/linux/leds.h b/include/linux/leds.h index e43686472197..a57611d0c94e 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,8 +13,8 @@ #define __LINUX_LEDS_H_INCLUDED #include <linux/list.h> -#include <linux/spinlock.h> #include <linux/rwsem.h> +#include <linux/spinlock.h> #include <linux/timer.h> #include <linux/workqueue.h> @@ -31,8 +31,8 @@ enum led_brightness { struct led_classdev { const char *name; - int brightness; - int max_brightness; + enum led_brightness brightness; + enum led_brightness max_brightness; int flags; /* Lower 16 bits reflect status */ @@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, */ extern void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness); +/** + * led_update_brightness - update LED brightness + * @led_cdev: the LED to query + * + * Get an LED's current brightness and update led_cdev->brightness + * member with the obtained value. + * + * Returns: 0 on success or negative error value on failure + */ +extern int led_update_brightness(struct led_classdev *led_cdev); /* * LED Triggers diff --git a/include/linux/mm.h b/include/linux/mm.h index 02d11ee7f19d..27eb1bfbe704 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1176,6 +1176,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d4f70a7fe876..ff4bd1b35246 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2369,7 +2369,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit, show_extent_status(__entry->found ? __entry->status : 0)) ); -TRACE_EVENT(ext4_es_shrink_enter, +DECLARE_EVENT_CLASS(ext4__es_shrink_enter, TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), TP_ARGS(sb, nr_to_scan, cache_cnt), @@ -2391,26 +2391,38 @@ TRACE_EVENT(ext4_es_shrink_enter, __entry->nr_to_scan, __entry->cache_cnt) ); -TRACE_EVENT(ext4_es_shrink_exit, - TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt), +DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_count, + TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), - TP_ARGS(sb, shrunk_nr, cache_cnt), + TP_ARGS(sb, nr_to_scan, cache_cnt) +); + +DEFINE_EVENT(ext4__es_shrink_enter, ext4_es_shrink_scan_enter, + TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt), + + TP_ARGS(sb, nr_to_scan, cache_cnt) +); + +TRACE_EVENT(ext4_es_shrink_scan_exit, + TP_PROTO(struct super_block *sb, int nr_shrunk, int cache_cnt), + + TP_ARGS(sb, nr_shrunk, cache_cnt), TP_STRUCT__entry( __field( dev_t, dev ) - __field( int, shrunk_nr ) + __field( int, nr_shrunk ) __field( int, cache_cnt ) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->shrunk_nr = shrunk_nr; + __entry->nr_shrunk = nr_shrunk; __entry->cache_cnt = cache_cnt; ), - TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d", + TP_printk("dev %d,%d nr_shrunk %d cache_cnt %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->shrunk_nr, __entry->cache_cnt) + __entry->nr_shrunk, __entry->cache_cnt) ); TRACE_EVENT(ext4_collapse_range, @@ -2438,6 +2450,37 @@ TRACE_EVENT(ext4_collapse_range, __entry->offset, __entry->len) ); +TRACE_EVENT(ext4_es_shrink, + TP_PROTO(struct super_block *sb, int nr_shrunk, u64 scan_time, + int skip_precached, int nr_skipped, int retried), + + TP_ARGS(sb, nr_shrunk, scan_time, skip_precached, nr_skipped, retried), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, nr_shrunk ) + __field( unsigned long long, scan_time ) + __field( int, skip_precached ) + __field( int, nr_skipped ) + __field( int, retried ) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->nr_shrunk = nr_shrunk; + __entry->scan_time = div_u64(scan_time, 1000); + __entry->skip_precached = skip_precached; + __entry->nr_skipped = nr_skipped; + __entry->retried = retried; + ), + + TP_printk("dev %d,%d nr_shrunk %d, scan_time %llu skip_precached %d " + "nr_skipped %d retried %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->nr_shrunk, + __entry->scan_time, __entry->skip_precached, + __entry->nr_skipped, __entry->retried) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ diff --git a/mm/truncate.c b/mm/truncate.c index 96d167372d89..261eaf6e5a19 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -20,6 +20,7 @@ #include <linux/buffer_head.h> /* grr. try_to_release_page, do_invalidatepage */ #include <linux/cleancache.h> +#include <linux/rmap.h> #include "internal.h" static void clear_exceptional_entry(struct address_space *mapping, @@ -719,12 +720,68 @@ EXPORT_SYMBOL(truncate_pagecache); */ void truncate_setsize(struct inode *inode, loff_t newsize) { + loff_t oldsize = inode->i_size; + i_size_write(inode, newsize); + if (newsize > oldsize) + pagecache_isize_extended(inode, oldsize, newsize); truncate_pagecache(inode, newsize); } EXPORT_SYMBOL(truncate_setsize); /** + * pagecache_isize_extended - update pagecache after extension of i_size + * @inode: inode for which i_size was extended + * @from: original inode size + * @to: new inode size + * + * Handle extension of inode size either caused by extending truncate or by + * write starting after current i_size. We mark the page straddling current + * i_size RO so that page_mkwrite() is called on the nearest write access to + * the page. This way filesystem can be sure that page_mkwrite() is called on + * the page before user writes to the page via mmap after the i_size has been + * changed. + * + * The function must be called after i_size is updated so that page fault + * coming after we unlock the page will already see the new i_size. + * The function must be called while we still hold i_mutex - this not only + * makes sure i_size is stable but also that userspace cannot observe new + * i_size value before we are prepared to store mmap writes at new inode size. + */ +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) +{ + int bsize = 1 << inode->i_blkbits; + loff_t rounded_from; + struct page *page; + pgoff_t index; + + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + WARN_ON(to > inode->i_size); + + if (from >= to || bsize == PAGE_CACHE_SIZE) + return; + /* Page straddling @from will not have any hole block created? */ + rounded_from = round_up(from, bsize); + if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1))) + return; + + index = from >> PAGE_CACHE_SHIFT; + page = find_lock_page(inode->i_mapping, index); + /* Page not cached? Nothing to do */ + if (!page) + return; + /* + * See clear_page_dirty_for_io() for details why set_page_dirty() + * is needed. + */ + if (page_mkclean(page)) + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +} +EXPORT_SYMBOL(pagecache_isize_extended); + +/** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole |