diff options
198 files changed, 4241 insertions, 1579 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-rssd b/Documentation/ABI/testing/sysfs-block-rssd index 679ce3543122..beef30c046b0 100644 --- a/Documentation/ABI/testing/sysfs-block-rssd +++ b/Documentation/ABI/testing/sysfs-block-rssd @@ -1,26 +1,5 @@ -What: /sys/block/rssd*/registers -Date: March 2012 -KernelVersion: 3.3 -Contact: Asai Thambi S P <asamymuthupa@micron.com> -Description: This is a read-only file. Dumps below driver information and - hardware registers. - - S ACTive - - Command Issue - - Completed - - PORT IRQ STAT - - HOST IRQ STAT - - Allocated - - Commands in Q - What: /sys/block/rssd*/status Date: April 2012 KernelVersion: 3.4 Contact: Asai Thambi S P <asamymuthupa@micron.com> Description: This is a read-only file. Indicates the status of the device. - -What: /sys/block/rssd*/flags -Date: May 2012 -KernelVersion: 3.5 -Contact: Asai Thambi S P <asamymuthupa@micron.com> -Description: This is a read-only file. Dumps the flags in port and driver - data structure diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 32e48797a14f..9884681535ee 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -7,39 +7,39 @@ This target is read-only. Construction Parameters ======================= - <version> <dev> <hash_dev> <hash_start> + <version> <dev> <hash_dev> <data_block_size> <hash_block_size> <num_data_blocks> <hash_start_block> <algorithm> <digest> <salt> <version> - This is the version number of the on-disk format. + This is the type of the on-disk hash format. 0 is the original format used in the Chromium OS. - The salt is appended when hashing, digests are stored continuously and - the rest of the block is padded with zeros. + The salt is appended when hashing, digests are stored continuously and + the rest of the block is padded with zeros. 1 is the current format that should be used for new devices. - The salt is prepended when hashing and each digest is - padded with zeros to the power of two. + The salt is prepended when hashing and each digest is + padded with zeros to the power of two. <dev> - This is the device containing the data the integrity of which needs to be + This is the device containing data, the integrity of which needs to be checked. It may be specified as a path, like /dev/sdaX, or a device number, <major>:<minor>. <hash_dev> - This is the device that that supplies the hash tree data. It may be + This is the device that supplies the hash tree data. It may be specified similarly to the device path and may be the same device. If the - same device is used, the hash_start should be outside of the dm-verity - configured device size. + same device is used, the hash_start should be outside the configured + dm-verity device. <data_block_size> - The block size on a data device. Each block corresponds to one digest on - the hash device. + The block size on a data device in bytes. + Each block corresponds to one digest on the hash device. <hash_block_size> - The size of a hash block. + The size of a hash block in bytes. <num_data_blocks> The number of data blocks on the data device. Additional blocks are @@ -65,7 +65,7 @@ Construction Parameters Theory of operation =================== -dm-verity is meant to be setup as part of a verified boot path. This +dm-verity is meant to be set up as part of a verified boot path. This may be anything ranging from a boot using tboot or trustedgrub to just booting from a known-good device (like a USB drive or CD). @@ -73,20 +73,20 @@ When a dm-verity device is configured, it is expected that the caller has been authenticated in some way (cryptographic signatures, etc). After instantiation, all hashes will be verified on-demand during disk access. If they cannot be verified up to the root node of the -tree, the root hash, then the I/O will fail. This should identify +tree, the root hash, then the I/O will fail. This should detect tampering with any data on the device and the hash data. Cryptographic hashes are used to assert the integrity of the device on a -per-block basis. This allows for a lightweight hash computation on first read -into the page cache. Block hashes are stored linearly-aligned to the nearest -block the size of a page. +per-block basis. This allows for a lightweight hash computation on first read +into the page cache. Block hashes are stored linearly, aligned to the nearest +block size. Hash Tree --------- Each node in the tree is a cryptographic hash. If it is a leaf node, the hash -is of some block data on disk. If it is an intermediary node, then the hash is -of a number of child nodes. +of some data block on disk is calculated. If it is an intermediary node, +the hash of a number of child nodes is calculated. Each entry in the tree is a collection of neighboring nodes that fit in one block. The number is determined based on block_size and the size of the @@ -110,63 +110,23 @@ alg = sha256, num_blocks = 32768, block_size = 4096 On-disk format ============== -Below is the recommended on-disk format. The verity kernel code does not -read the on-disk header. It only reads the hash blocks which directly -follow the header. It is expected that a user-space tool will verify the -integrity of the verity_header and then call dmsetup with the correct -parameters. Alternatively, the header can be omitted and the dmsetup -parameters can be passed via the kernel command-line in a rooted chain -of trust where the command-line is verified. +The verity kernel code does not read the verity metadata on-disk header. +It only reads the hash blocks which directly follow the header. +It is expected that a user-space tool will verify the integrity of the +verity header. -The on-disk format is especially useful in cases where the hash blocks -are on a separate partition. The magic number allows easy identification -of the partition contents. Alternatively, the hash blocks can be stored -in the same partition as the data to be verified. In such a configuration -the filesystem on the partition would be sized a little smaller than -the full-partition, leaving room for the hash blocks. - -struct superblock { - uint8_t signature[8] - "verity\0\0"; - - uint8_t version; - 1 - current format - - uint8_t data_block_bits; - log2(data block size) - - uint8_t hash_block_bits; - log2(hash block size) - - uint8_t pad1[1]; - zero padding - - uint16_t salt_size; - big-endian salt size - - uint8_t pad2[2]; - zero padding - - uint32_t data_blocks_hi; - big-endian high 32 bits of the 64-bit number of data blocks - - uint32_t data_blocks_lo; - big-endian low 32 bits of the 64-bit number of data blocks - - uint8_t algorithm[16]; - cryptographic algorithm - - uint8_t salt[384]; - salt (the salt size is specified above) - - uint8_t pad3[88]; - zero padding to 512-byte boundary -} +Alternatively, the header can be omitted and the dmsetup parameters can +be passed via the kernel command-line in a rooted chain of trust where +the command-line is verified. Directly following the header (and with sector number padded to the next hash block boundary) are the hash blocks which are stored a depth at a time (starting from the root), sorted in order of increasing index. +The full specification of kernel parameters and on-disk metadata format +is available at the cryptsetup project's wiki page + http://code.google.com/p/cryptsetup/wiki/DMVerity + Status ====== V (for Valid) is returned if every check performed so far was valid. @@ -174,21 +134,22 @@ If any check failed, C (for Corruption) is returned. Example ======= - -Setup a device: - dmsetup create vroot --table \ - "0 2097152 "\ - "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\ +Set up a device: + # dmsetup create vroot --readonly --table \ + "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\ "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\ "1234000000000000000000000000000000000000000000000000000000000000" A command line tool veritysetup is available to compute or verify -the hash tree or activate the kernel driver. This is available from -the LVM2 upstream repository and may be supplied as a package called -device-mapper-verity-tools: - git://sources.redhat.com/git/lvm2 - http://sourceware.org/git/?p=lvm2.git - http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2 - -veritysetup -a vroot /dev/sda1 /dev/sda2 \ - 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 +the hash tree or activate the kernel device. This is available from +the cryptsetup upstream repository http://code.google.com/p/cryptsetup/ +(as a libcryptsetup extension). + +Create hash on the device: + # veritysetup format /dev/sda1 /dev/sda2 + ... + Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 + +Activate the device: + # veritysetup create vroot /dev/sda1 /dev/sda2 \ + 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 diff --git a/Documentation/prctl/no_new_privs.txt b/Documentation/prctl/no_new_privs.txt new file mode 100644 index 000000000000..cb705ec69abe --- /dev/null +++ b/Documentation/prctl/no_new_privs.txt @@ -0,0 +1,50 @@ +The execve system call can grant a newly-started program privileges that +its parent did not have. The most obvious examples are setuid/setgid +programs and file capabilities. To prevent the parent program from +gaining these privileges as well, the kernel and user code must be +careful to prevent the parent from doing anything that could subvert the +child. For example: + + - The dynamic loader handles LD_* environment variables differently if + a program is setuid. + + - chroot is disallowed to unprivileged processes, since it would allow + /etc/passwd to be replaced from the point of view of a process that + inherited chroot. + + - The exec code has special handling for ptrace. + +These are all ad-hoc fixes. The no_new_privs bit (since Linux 3.5) is a +new, generic mechanism to make it safe for a process to modify its +execution environment in a manner that persists across execve. Any task +can set no_new_privs. Once the bit is set, it is inherited across fork, +clone, and execve and cannot be unset. With no_new_privs set, execve +promises not to grant the privilege to do anything that could not have +been done without the execve call. For example, the setuid and setgid +bits will no longer change the uid or gid; file capabilities will not +add to the permitted set, and LSMs will not relax constraints after +execve. + +Note that no_new_privs does not prevent privilege changes that do not +involve execve. An appropriately privileged task can still call +setuid(2) and receive SCM_RIGHTS datagrams. + +There are two main use cases for no_new_privs so far: + + - Filters installed for the seccomp mode 2 sandbox persist across + execve and can change the behavior of newly-executed programs. + Unprivileged users are therefore only allowed to install such filters + if no_new_privs is set. + + - By itself, no_new_privs can be used to reduce the attack surface + available to an unprivileged user. If everything running with a + given uid has no_new_privs set, then that uid will be unable to + escalate its privileges by directly attacking setuid, setgid, and + fcap-using binaries; it will need to compromise something without the + no_new_privs bit set first. + +In the future, other potentially dangerous kernel features could become +available to unprivileged tasks if no_new_privs is set. In principle, +several options to unshare(2) and clone(2) would be safe when +no_new_privs is set, and no_new_privs + chroot is considerable less +dangerous than chroot by itself. diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt index f0ab5cf28fca..4a7b54bd37e8 100644 --- a/Documentation/stable_kernel_rules.txt +++ b/Documentation/stable_kernel_rules.txt @@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the marked CONFIG_BROKEN), an oops, a hang, data corruption, a real security issue, or some "oh, that's not good" issue. In short, something critical. + - Serious issues as reported by a user of a distribution kernel may also + be considered if they fix a notable performance or interactivity issue. + As these fixes are not as obvious and have a higher risk of a subtle + regression they should only be submitted by a distribution kernel + maintainer and include an addendum linking to a bugzilla entry if it + exists and additional information on the user-visible impact. - New device IDs and quirks are also accepted. - No "theoretical race condition" issues, unless an explanation of how the race can be exploited is also provided. diff --git a/MAINTAINERS b/MAINTAINERS index d350dae2ce1c..8da1373bd6d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4655,8 +4655,8 @@ L: netfilter@vger.kernel.org L: coreteam@netfilter.org W: http://www.netfilter.org/ W: http://www.iptables.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-2.6.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next-2.6.git +T: git git://1984.lsi.us.es/nf +T: git git://1984.lsi.us.es/nf-next S: Supported F: include/linux/netfilter* F: include/linux/netfilter/ @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Saber-toothed Squirrel # *DOCUMENTATION* diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 43a31fb06318..36ff15bbfdd4 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -183,7 +183,9 @@ SECTIONS } #endif +#ifdef CONFIG_SMP PERCPU_SECTION(L1_CACHE_BYTES) +#endif #ifdef CONFIG_XIP_KERNEL __data_loc = ALIGN(4); /* location in binary */ diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 573be57d3d28..6f6d13f91e4c 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -212,7 +212,7 @@ config MACH_SMDKV310 select EXYNOS_DEV_SYSMMU select EXYNOS4_DEV_AHCI select SAMSUNG_DEV_KEYPAD - select EXYNOS4_DEV_DMA + select EXYNOS_DEV_DMA select SAMSUNG_DEV_PWM select EXYNOS4_DEV_USB_OHCI select EXYNOS4_SETUP_FIMD0 @@ -264,7 +264,7 @@ config MACH_UNIVERSAL_C210 select S5P_DEV_ONENAND select S5P_DEV_TV select EXYNOS_DEV_SYSMMU - select EXYNOS4_DEV_DMA + select EXYNOS_DEV_DMA select EXYNOS_DEV_DRM select EXYNOS4_SETUP_FIMD0 select EXYNOS4_SETUP_I2C1 @@ -303,7 +303,7 @@ config MACH_NURI select S5P_DEV_MFC select S5P_DEV_USB_EHCI select S5P_SETUP_MIPIPHY - select EXYNOS4_DEV_DMA + select EXYNOS_DEV_DMA select EXYNOS_DEV_DRM select EXYNOS4_SETUP_FIMC select EXYNOS4_SETUP_FIMD0 @@ -341,7 +341,7 @@ config MACH_ORIGEN select SAMSUNG_DEV_PWM select EXYNOS_DEV_DRM select EXYNOS_DEV_SYSMMU - select EXYNOS4_DEV_DMA + select EXYNOS_DEV_DMA select EXYNOS4_DEV_USB_OHCI select EXYNOS4_SETUP_FIMD0 select EXYNOS4_SETUP_SDHCI diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 17dc66a085a5..e1a17ac7b3b4 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -152,13 +152,14 @@ enum mx6q_clks { ssi2, ssi3, uart_ipg, uart_serial, usboh3, usdhc1, usdhc2, usdhc3, usdhc4, vdo_axi, vpu_axi, cko1, pll1_sys, pll2_bus, pll3_usb_otg, pll4_audio, pll5_video, pll6_mlb, pll7_usb_host, pll8_enet, ssi1_ipg, - ssi2_ipg, ssi3_ipg, clk_max + ssi2_ipg, ssi3_ipg, rom, + clk_max }; static struct clk *clk[clk_max]; static enum mx6q_clks const clks_init_on[] __initconst = { - mmdc_ch0_axi, mmdc_ch1_axi, + mmdc_ch0_axi, rom, }; int __init mx6q_clocks_init(void) @@ -364,6 +365,7 @@ int __init mx6q_clocks_init(void) clk[gpmi_bch] = imx_clk_gate2("gpmi_bch", "usdhc4", base + 0x78, 26); clk[gpmi_io] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28); clk[gpmi_apb] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30); + clk[rom] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0); clk[sata] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4); clk[sdma] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); clk[spba] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c index 70a81f900bb5..53c39d239d6e 100644 --- a/arch/arm/mach-omap2/board-flash.c +++ b/arch/arm/mach-omap2/board-flash.c @@ -97,11 +97,6 @@ __init board_onenand_init(struct mtd_partition *onenand_parts, gpmc_onenand_init(&board_onenand_data); } -#else -void -__init board_onenand_init(struct mtd_partition *nor_parts, u8 nr_parts, u8 cs) -{ -} #endif /* CONFIG_MTD_ONENAND_OMAP2 || CONFIG_MTD_ONENAND_OMAP2_MODULE */ #if defined(CONFIG_MTD_NAND_OMAP2) || \ diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c index e2b701e164f6..ba6f9a0a43e9 100644 --- a/arch/arm/mach-omap2/clock44xx_data.c +++ b/arch/arm/mach-omap2/clock44xx_data.c @@ -3417,9 +3417,12 @@ int __init omap4xxx_clk_init(void) if (cpu_is_omap443x()) { cpu_mask = RATE_IN_4430; cpu_clkflg = CK_443X; - } else if (cpu_is_omap446x()) { + } else if (cpu_is_omap446x() || cpu_is_omap447x()) { cpu_mask = RATE_IN_4460 | RATE_IN_4430; cpu_clkflg = CK_446X | CK_443X; + + if (cpu_is_omap447x()) + pr_warn("WARNING: OMAP4470 clock data incomplete!\n"); } else { return 0; } diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index 9e37026ef9dd..9bd135531d76 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -779,6 +779,7 @@ DT_MACHINE_START(ARMADILLO800EVA_DT, "armadillo800eva") .init_irq = r8a7740_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = eva_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, .dt_compat = eva_boards_compat_dt, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-kzm9d.c b/arch/arm/mach-shmobile/board-kzm9d.c index 7bc5e7d39f9b..6a33cf393428 100644 --- a/arch/arm/mach-shmobile/board-kzm9d.c +++ b/arch/arm/mach-shmobile/board-kzm9d.c @@ -80,6 +80,7 @@ DT_MACHINE_START(KZM9D_DT, "kzm9d") .init_irq = emev2_init_irq, .handle_irq = gic_handle_irq, .init_machine = kzm9d_add_standard_devices, + .init_late = shmobile_init_late, .timer = &shmobile_timer, .dt_compat = kzm9d_boards_compat_dt, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index d8e33b682832..c0ae815e7beb 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -455,6 +455,7 @@ DT_MACHINE_START(KZM9G_DT, "kzm9g") .init_irq = sh73a0_init_irq, .handle_irq = gic_handle_irq, .init_machine = kzm_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, .dt_compat = kzm9g_boards_compat_dt, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index b577f7c44678..150122a44630 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1512,6 +1512,9 @@ static void __init mackerel_init(void) gpio_request(GPIO_FN_SDHID0_1, NULL); gpio_request(GPIO_FN_SDHID0_0, NULL); + /* SDHI0 PORT172 card-detect IRQ26 */ + gpio_request(GPIO_FN_IRQ26_172, NULL); + #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) /* enable SDHI1 */ gpio_request(GPIO_FN_SDHICMD1, NULL); diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c index 472d1f5361e5..3946c4ba2aa8 100644 --- a/arch/arm/mach-shmobile/clock-sh73a0.c +++ b/arch/arm/mach-shmobile/clock-sh73a0.c @@ -475,9 +475,9 @@ static struct clk *late_main_clks[] = { enum { MSTP001, MSTP129, MSTP128, MSTP127, MSTP126, MSTP125, MSTP118, MSTP116, MSTP100, - MSTP219, + MSTP219, MSTP218, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, - MSTP331, MSTP329, MSTP325, MSTP323, MSTP318, + MSTP331, MSTP329, MSTP325, MSTP323, MSTP314, MSTP313, MSTP312, MSTP311, MSTP303, MSTP302, MSTP301, MSTP300, MSTP411, MSTP410, MSTP403, @@ -497,6 +497,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */ [MSTP100] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP219] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 19, 0), /* SCIFA7 */ + [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* SY-DMAC */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -508,7 +509,6 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */ [MSTP325] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 25, 0), /* IrDA */ [MSTP323] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 23, 0), /* IIC1 */ - [MSTP318] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 18, 0), /* SY-DMAC */ [MSTP314] = MSTP(&div6_clks[DIV6_SDHI0], SMSTPCR3, 14, 0), /* SDHI0 */ [MSTP313] = MSTP(&div6_clks[DIV6_SDHI1], SMSTPCR3, 13, 0), /* SDHI1 */ [MSTP312] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 12, 0), /* MMCIF0 */ @@ -552,6 +552,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */ CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */ + CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* SY-DMAC */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ @@ -563,7 +564,6 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ CLKDEV_DEV_ID("sh_irda.0", &mstp_clks[MSTP325]), /* IrDA */ CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* I2C1 */ - CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP318]), /* SY-DMAC */ CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */ CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */ CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMCIF0 */ diff --git a/arch/arm/mach-shmobile/intc-r8a7779.c b/arch/arm/mach-shmobile/intc-r8a7779.c index 550b23df4fd4..f04fad4ec4fb 100644 --- a/arch/arm/mach-shmobile/intc-r8a7779.c +++ b/arch/arm/mach-shmobile/intc-r8a7779.c @@ -35,6 +35,9 @@ #define INT2SMSKCR3 0xfe7822ac #define INT2SMSKCR4 0xfe7822b0 +#define INT2NTSR0 0xfe700060 +#define INT2NTSR1 0xfe700064 + static int r8a7779_set_wake(struct irq_data *data, unsigned int on) { return 0; /* always allow wakeup */ @@ -49,6 +52,10 @@ void __init r8a7779_init_irq(void) gic_init(0, 29, gic_dist_base, gic_cpu_base); gic_arch_extn.irq_set_wake = r8a7779_set_wake; + /* route all interrupts to ARM */ + __raw_writel(0xffffffff, INT2NTSR0); + __raw_writel(0x3fffffff, INT2NTSR1); + /* unmask all known interrupts in INTCS2 */ __raw_writel(0xfffffff0, INT2SMSKCR0); __raw_writel(0xfff7ffff, INT2SMSKCR1); diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c index bacdd667e3b1..e859fcdb3d58 100644 --- a/arch/arm/mach-shmobile/platsmp.c +++ b/arch/arm/mach-shmobile/platsmp.c @@ -25,7 +25,12 @@ #define is_sh73a0() (machine_is_ag5evm() || machine_is_kota2() || \ of_machine_is_compatible("renesas,sh73a0")) #define is_r8a7779() machine_is_marzen() + +#ifdef CONFIG_ARCH_EMEV2 #define is_emev2() of_machine_is_compatible("renesas,emev2") +#else +#define is_emev2() (0) +#endif static unsigned int __init shmobile_smp_get_core_count(void) { diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index 6a4bd582c028..fafce9ce8218 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -484,7 +484,7 @@ static const struct sh_dmae_slave_config sh7372_dmae_slaves[] = { }, }; -#define SH7372_CHCLR 0x220 +#define SH7372_CHCLR (0x220 - 0x20) static const struct sh_dmae_channel sh7372_dmae_channels[] = { { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5dad60b558b..cf4528d51774 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -791,6 +791,79 @@ void __init iotable_init(struct map_desc *io_desc, int nr) } } +#ifndef CONFIG_ARM_LPAE + +/* + * The Linux PMD is made of two consecutive section entries covering 2MB + * (see definition in include/asm/pgtable-2level.h). However a call to + * create_mapping() may optimize static mappings by using individual + * 1MB section mappings. This leaves the actual PMD potentially half + * initialized if the top or bottom section entry isn't used, leaving it + * open to problems if a subsequent ioremap() or vmalloc() tries to use + * the virtual space left free by that unused section entry. + * + * Let's avoid the issue by inserting dummy vm entries covering the unused + * PMD halves once the static mappings are in place. + */ + +static void __init pmd_empty_section_gap(unsigned long addr) +{ + struct vm_struct *vm; + + vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); + vm->addr = (void *)addr; + vm->size = SECTION_SIZE; + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->caller = pmd_empty_section_gap; + vm_area_add_early(vm); +} + +static void __init fill_pmd_gaps(void) +{ + struct vm_struct *vm; + unsigned long addr, next = 0; + pmd_t *pmd; + + /* we're still single threaded hence no lock needed here */ + for (vm = vmlist; vm; vm = vm->next) { + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + addr = (unsigned long)vm->addr; + if (addr < next) + continue; + + /* + * Check if this vm starts on an odd section boundary. + * If so and the first section entry for this PMD is free + * then we block the corresponding virtual address. + */ + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr); + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr & PMD_MASK); + } + + /* + * Then check if this vm ends on an odd section boundary. + * If so and the second section entry for this PMD is empty + * then we block the corresponding virtual address. + */ + addr += vm->size; + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr) + 1; + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr); + } + + /* no need to look at any vm entry until we hit the next PMD */ + next = (addr + PMD_SIZE - 1) & PMD_MASK; + } +} + +#else +#define fill_pmd_gaps() do { } while (0) +#endif + static void * __initdata vmalloc_min = (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); @@ -1072,6 +1145,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc) */ if (mdesc->map_io) mdesc->map_io(); + fill_pmd_gaps(); /* * Finally flush the caches and tlb to ensure that we're in a diff --git a/arch/arm/plat-samsung/include/plat/map-s3c.h b/arch/arm/plat-samsung/include/plat/map-s3c.h index 7d048759b772..c0c70a895ca8 100644 --- a/arch/arm/plat-samsung/include/plat/map-s3c.h +++ b/arch/arm/plat-samsung/include/plat/map-s3c.h @@ -22,7 +22,7 @@ #define S3C24XX_VA_WATCHDOG S3C_VA_WATCHDOG #define S3C2412_VA_SSMC S3C_ADDR_CPU(0x00000000) -#define S3C2412_VA_EBI S3C_ADDR_CPU(0x00010000) +#define S3C2412_VA_EBI S3C_ADDR_CPU(0x00100000) #define S3C2410_PA_UART (0x50000000) #define S3C24XX_PA_UART S3C2410_PA_UART diff --git a/arch/arm/plat-samsung/include/plat/watchdog-reset.h b/arch/arm/plat-samsung/include/plat/watchdog-reset.h index f19aff19205c..bc4db9b04e36 100644 --- a/arch/arm/plat-samsung/include/plat/watchdog-reset.h +++ b/arch/arm/plat-samsung/include/plat/watchdog-reset.h @@ -25,7 +25,7 @@ static inline void arch_wdt_reset(void) __raw_writel(0, S3C2410_WTCON); /* disable watchdog, to be safe */ - if (s3c2410_wdtclk) + if (!IS_ERR(s3c2410_wdtclk)) clk_enable(s3c2410_wdtclk); /* put initial values into count and data */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 32b394f3b854..6eb75b80488c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -103,6 +103,11 @@ static inline void hard_irq_disable(void) /* include/linux/interrupt.h needs hard_irq_disable to be a macro */ #define hard_irq_disable hard_irq_disable +static inline bool lazy_irq_pending(void) +{ + return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS); +} + /* * This is called by asynchronous interrupts to conditionally * re-enable hard interrupts when soft-disabled after having diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index ed1718feb9d9..5971c85df136 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -558,27 +558,54 @@ _GLOBAL(ret_from_except_lite) mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ -#ifdef CONFIG_PREEMPT clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ - li r0,_TIF_NEED_RESCHED /* bits to check */ ld r3,_MSR(r1) ld r4,TI_FLAGS(r9) - /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */ - rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING - and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */ - bne do_work - -#else /* !CONFIG_PREEMPT */ - ld r3,_MSR(r1) /* Returning to user mode? */ andi. r3,r3,MSR_PR - beq restore /* if not, just restore regs and return */ + beq resume_kernel /* Check current_thread_info()->flags */ + andi. r0,r4,_TIF_USER_WORK_MASK + beq restore + + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + bl .restore_interrupts + bl .schedule + b .ret_from_except_lite + +1: bl .save_nvgprs + bl .restore_interrupts + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_notify_resume + b .ret_from_except + +resume_kernel: +#ifdef CONFIG_PREEMPT + /* Check if we need to preempt */ + andi. r0,r4,_TIF_NEED_RESCHED + beq+ restore + /* Check that preempt_count() == 0 and interrupts are enabled */ + lwz r8,TI_PREEMPT(r9) + cmpwi cr1,r8,0 + ld r0,SOFTE(r1) + cmpdi r0,0 + crandc eq,cr1*4+eq,eq + bne restore + + /* + * Here we are preempting the current task. We want to make + * sure we are soft-disabled first + */ + SOFT_DISABLE_INTS(r3,r4) +1: bl .preempt_schedule_irq + + /* Re-test flags and eventually loop */ clrrdi r9,r1,THREAD_SHIFT ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_USER_WORK_MASK - bne do_work -#endif /* !CONFIG_PREEMPT */ + andi. r0,r4,_TIF_NEED_RESCHED + bne 1b +#endif /* CONFIG_PREEMPT */ .globl fast_exc_return_irq fast_exc_return_irq: @@ -759,50 +786,6 @@ restore_check_irq_replay: #endif /* CONFIG_PPC_BOOK3E */ 1: b .ret_from_except /* What else to do here ? */ - - -3: -do_work: -#ifdef CONFIG_PREEMPT - andi. r0,r3,MSR_PR /* Returning to user mode? */ - bne user_work - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr1,r8,0 - ld r0,SOFTE(r1) - cmpdi r0,0 - crandc eq,cr1*4+eq,eq - bne restore - - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first - */ - SOFT_DISABLE_INTS(r3,r4) -1: bl .preempt_schedule_irq - - /* Re-test flags and eventually loop */ - clrrdi r9,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b - b restore - -user_work: -#endif /* CONFIG_PREEMPT */ - - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - bl .restore_interrupts - bl .schedule - b .ret_from_except_lite - -1: bl .save_nvgprs - bl .restore_interrupts - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_notify_resume - b .ret_from_except - unrecov_restore: addi r3,r1,STACK_FRAME_OVERHEAD bl .unrecoverable_exception diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7835a5e1ea5f..1b415027ec0e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -277,7 +277,7 @@ EXPORT_SYMBOL(arch_local_irq_restore); * NOTE: This is called with interrupts hard disabled but not marked * as such in paca->irq_happened, so we need to resync this. */ -void restore_interrupts(void) +void notrace restore_interrupts(void) { if (irqs_disabled()) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 1b488e5305c5..0794a3017b1b 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1312,7 +1312,7 @@ static struct opal_secondary_data { extern char opal_secondary_entry; -static void prom_query_opal(void) +static void __init prom_query_opal(void) { long rc; @@ -1436,7 +1436,7 @@ static void __init prom_opal_hold_cpus(void) prom_debug("prom_opal_hold_cpus: end...\n"); } -static void prom_opal_takeover(void) +static void __init prom_opal_takeover(void) { struct opal_secondary_data *data = &RELOC(opal_secondary_data); struct opal_takeover_args *args = &data->args; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index a84aafce2a12..a1044f43becd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -810,7 +810,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) lwz r3,VCORE_NAPPING_THREADS(r5) lwz r4,VCPU_PTID(r9) li r0,1 - sldi r0,r0,r4 + sld r0,r0,r4 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ beq 43f mulli r4,r4,PACA_SIZE /* get paca for thread 0 */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b6edbb3b4a54..6e8f677f5646 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -635,7 +635,7 @@ static inline int __init read_usm_ranges(const u32 **usm) */ static void __init parse_drconf_memory(struct device_node *memory) { - const u32 *dm, *usm; + const u32 *uninitialized_var(dm), *usm; unsigned int n, rc, ranges, is_kexec_kdump = 0; unsigned long lmb_size, base, size, sz; int nid; diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S index 55ba3855a97f..7d3a3b5619a2 100644 --- a/arch/powerpc/net/bpf_jit_64.S +++ b/arch/powerpc/net/bpf_jit_64.S @@ -105,6 +105,7 @@ sk_load_byte_msh_positive_offset: mr r4, r_addr; \ li r6, SIZE; \ bl skb_copy_bits; \ + nop; \ /* R3 = 0 on success */ \ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ ld r0, 16(r1); \ @@ -156,6 +157,7 @@ bpf_slow_path_byte_msh: mr r4, r_addr; \ li r5, SIZE; \ bl bpf_internal_load_pointer_neg_helper; \ + nop; \ /* R3 != 0 on success */ \ addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ ld r0, 16(r1); \ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0915b1ad66ce..2d311c0caf8e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -106,7 +106,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, tcep++; } - if (tbl->it_type == TCE_PCI_SWINV_CREATE) + if (tbl->it_type & TCE_PCI_SWINV_CREATE) tce_invalidate_pSeries_sw(tbl, tces, tcep - 1); return 0; } @@ -121,7 +121,7 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) while (npages--) *(tcep++) = 0; - if (tbl->it_type == TCE_PCI_SWINV_FREE) + if (tbl->it_type & TCE_PCI_SWINV_FREE) tce_invalidate_pSeries_sw(tbl, tces, tcep - 1); } diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 41a34bc4a9a2..e61483e8e960 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -106,7 +106,7 @@ static void check_and_cede_processor(void) * we first hard disable then check. */ hard_irq_disable(); - if (get_paca()->irq_happened == 0) + if (!lazy_irq_pending()) cede_processor(); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0f3ab06d2222..eab3492a45c5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -971,7 +971,7 @@ static int cpu_cmd(void) /* print cpus waiting or in xmon */ printf("cpus stopped:"); count = 0; - for (cpu = 0; cpu < NR_CPUS; ++cpu) { + for_each_possible_cpu(cpu) { if (cpumask_test_cpu(cpu, &cpus_in_xmon)) { if (count == 0) printf(" %x", cpu); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index daeca56211e3..673ac9b63d6b 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -38,7 +38,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err = 0; - bool ia32 = is_ia32_task(); + bool ia32 = test_thread_flag(TIF_IA32); if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 340ee49961a6..f91e80f4f180 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -176,7 +176,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ /* Virtualization flags: Linux defined, word 8 */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8afb69319815..b2297e58c6ed 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (intsrc->source_irq == 0) { if (acpi_skip_timer_override) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + printk(PREFIX "BIOS IRQ0 override ignored.\n"); return 0; } - if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + + if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity + && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); } @@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) } /* - * Force ignoring BIOS IRQ0 pin2 override + * Force ignoring BIOS IRQ0 override */ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) { - /* - * The ati_ixp4x0_rev() early PCI quirk should have set - * the acpi_skip_timer_override flag already: - */ if (!acpi_skip_timer_override) { - WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); - pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", + pr_notice("%s detected: Ignoring BIOS IRQ0 override\n", d->ident); acpi_skip_timer_override = 1; } @@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { * is enabled. This input is incorrectly designated the * ISA IRQ 0 via an interrupt source override even though * it is wired to the output of the master 8259A and INTIN0 - * is not connected at all. Force ignoring BIOS IRQ0 pin2 + * is not connected at all. Force ignoring BIOS IRQ0 * override in that cases. */ { @@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "FUJITSU SIEMENS", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), + }, + }, {} }; diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index dfea390e1608..c7b3fe2d72e0 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w # # Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h # @@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; print OUT "#include <asm/cpufeature.h>\n\n"; print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; +%features = (); +$err = 0; + while (defined($line = <IN>)) { if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) { $macro = $1; - $feature = $2; + $feature = "\L$2"; $tail = $3; if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { - $feature = $1; + $feature = "\L$1"; } - if ($feature ne '') { - printf OUT "\t%-32s = \"%s\",\n", - "[$macro]", "\L$feature"; + next if ($feature eq ''); + + if ($features{$feature}++) { + print STDERR "$in: duplicate feature name: $feature\n"; + $err++; } + printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature; } } print OUT "};\n"; close(IN); close(OUT); + +if ($err) { + unlink($out); + exit(1); +} + +exit(0); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index addf9e82a7f2..ee8e9abc859f 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { - { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, + { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8bfb6146f753..3f61904365cf 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags) /** * kgdb_arch_handle_exception - Handle architecture specific GDB packets. - * @vector: The error vector of the exception that happened. + * @e_vector: The error vector of the exception that happened. * @signo: The signal number of the exception that happened. * @err_code: The error code of the exception that happened. - * @remcom_in_buffer: The buffer of the packet we have read. - * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. - * @regs: The &struct pt_regs of the current process. + * @remcomInBuffer: The buffer of the packet we have read. + * @remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into. + * @linux_regs: The &struct pt_regs of the current process. * * This function MUST handle the 'c' and 's' command packets, * as well packets to set / remove a hardware breakpoint, if used. diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 25b48edb847c..5de92f1abd76 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -451,6 +451,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), }, }, + { /* Handle problems with rebooting on the Precision M6600. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), + }, + }, { } }; diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 459b58a8a15c..25b7ae8d058a 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(csum_partial_copy_to_user); * @src: source address * @dst: destination address * @len: number of bytes to be copied. - * @isum: initial sum that is added into the result (32bit unfolded) + * @sum: initial sum that is added into the result (32bit unfolded) * * Returns an 32bit unfolded checksum of the buffer. */ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 02cf6335e9bd..e7dee617358e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -125,12 +125,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) blkg->pd[i] = pd; pd->blkg = blkg; - } - - /* invoke per-policy init */ - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; + /* invoke per-policy init */ if (blkcg_policy_enabled(blkg->q, pol)) pol->pd_init_fn(blkg); } @@ -245,10 +241,9 @@ EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { - struct request_queue *q = blkg->q; struct blkcg *blkcg = blkg->blkcg; - lockdep_assert_held(q->queue_lock); + lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); /* Something wrong if we are trying to remove same group twice */ diff --git a/block/blk-core.c b/block/blk-core.c index 3c923a7aeb56..93eb3e4f88ce 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -361,9 +361,10 @@ EXPORT_SYMBOL(blk_put_queue); */ void blk_drain_queue(struct request_queue *q, bool drain_all) { + int i; + while (true) { bool drain = false; - int i; spin_lock_irq(q->queue_lock); @@ -408,6 +409,18 @@ void blk_drain_queue(struct request_queue *q, bool drain_all) break; msleep(10); } + + /* + * With queue marked dead, any woken up waiter will fail the + * allocation path, so the wakeup chaining is lost and we're + * left with hung waiters. We need to wake up those waiters. + */ + if (q->request_fn) { + spin_lock_irq(q->queue_lock); + for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++) + wake_up_all(&q->rq.wait[i]); + spin_unlock_irq(q->queue_lock); + } } /** @@ -467,7 +480,6 @@ void blk_cleanup_queue(struct request_queue *q) /* mark @q DEAD, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); - spin_lock_irq(lock); /* @@ -485,10 +497,6 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_NOMERGES, q); queue_flag_set(QUEUE_FLAG_NOXMERGES, q); queue_flag_set(QUEUE_FLAG_DEAD, q); - - if (q->queue_lock != &q->__queue_lock) - q->queue_lock = &q->__queue_lock; - spin_unlock_irq(lock); mutex_unlock(&q->sysfs_lock); @@ -499,6 +507,11 @@ void blk_cleanup_queue(struct request_queue *q) del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); blk_sync_queue(q); + spin_lock_irq(lock); + if (q->queue_lock != &q->__queue_lock) + q->queue_lock = &q->__queue_lock; + spin_unlock_irq(lock); + /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 780354888958..6e4744cbfb56 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -197,44 +197,3 @@ void blk_add_timer(struct request *req) mod_timer(&q->timeout, expiry); } -/** - * blk_abort_queue -- Abort all request on given queue - * @queue: pointer to queue - * - */ -void blk_abort_queue(struct request_queue *q) -{ - unsigned long flags; - struct request *rq, *tmp; - LIST_HEAD(list); - - /* - * Not a request based block device, nothing to abort - */ - if (!q->request_fn) - return; - - spin_lock_irqsave(q->queue_lock, flags); - - elv_abort_queue(q); - - /* - * Splice entries to local list, to avoid deadlocking if entries - * get readded to the timeout list by error handling - */ - list_splice_init(&q->timeout_list, &list); - - list_for_each_entry_safe(rq, tmp, &list, timeout_list) - blk_abort_request(rq); - - /* - * Occasionally, blk_abort_request() will return without - * deleting the element from the list. Make sure we add those back - * instead of leaving them on the local stack list. - */ - list_splice(&list, &q->timeout_list); - - spin_unlock_irqrestore(q->queue_lock, flags); - -} -EXPORT_SYMBOL_GPL(blk_abort_queue); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 673c977cc2bf..fb52df9744f5 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -17,8 +17,6 @@ #include "blk.h" #include "blk-cgroup.h" -static struct blkcg_policy blkcg_policy_cfq __maybe_unused; - /* * tunables */ @@ -418,11 +416,6 @@ static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd) return pd ? container_of(pd, struct cfq_group, pd) : NULL; } -static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) -{ - return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); -} - static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg) { return pd_to_blkg(&cfqg->pd); @@ -572,6 +565,13 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { } #ifdef CONFIG_CFQ_GROUP_IOSCHED +static struct blkcg_policy blkcg_policy_cfq; + +static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) +{ + return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); +} + static inline void cfqg_get(struct cfq_group *cfqg) { return blkg_get(cfqg_to_blkg(cfqg)); @@ -3951,10 +3951,11 @@ static void cfq_exit_queue(struct elevator_queue *e) cfq_shutdown_timer_wq(cfqd); -#ifndef CONFIG_CFQ_GROUP_IOSCHED +#ifdef CONFIG_CFQ_GROUP_IOSCHED + blkcg_deactivate_policy(q, &blkcg_policy_cfq); +#else kfree(cfqd->root_group); #endif - blkcg_deactivate_policy(q, &blkcg_policy_cfq); kfree(cfqd); } @@ -4194,14 +4195,15 @@ static int __init cfq_init(void) #ifdef CONFIG_CFQ_GROUP_IOSCHED if (!cfq_group_idle) cfq_group_idle = 1; -#else - cfq_group_idle = 0; -#endif ret = blkcg_policy_register(&blkcg_policy_cfq); if (ret) return ret; +#else + cfq_group_idle = 0; +#endif + ret = -ENOMEM; cfq_pool = KMEM_CACHE(cfq_queue, 0); if (!cfq_pool) goto err_pol_unreg; @@ -4215,13 +4217,17 @@ static int __init cfq_init(void) err_free_pool: kmem_cache_destroy(cfq_pool); err_pol_unreg: +#ifdef CONFIG_CFQ_GROUP_IOSCHED blkcg_policy_unregister(&blkcg_policy_cfq); +#endif return ret; } static void __exit cfq_exit(void) { +#ifdef CONFIG_CFQ_GROUP_IOSCHED blkcg_policy_unregister(&blkcg_policy_cfq); +#endif elv_unregister(&iosched_cfq); kmem_cache_destroy(cfq_pool); } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 260fa80ef575..9a87daa6f4fb 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) break; } + if (capable(CAP_SYS_RAWIO)) + return 0; + /* In particular, rule out all resets and host-specific ioctls. */ printk_ratelimited(KERN_WARNING "%s: sending ioctl %x to a partition!\n", current->comm, cmd); - return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD; + return -ENOIOCTLCMD; } EXPORT_SYMBOL(scsi_verify_blk_ioctl); diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index a43fa1a57d57..1502c50273b5 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -36,6 +36,7 @@ #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 static DEFINE_MUTEX(isolated_cpus_lock); +static DEFINE_MUTEX(round_robin_lock); static unsigned long power_saving_mwait_eax; @@ -107,7 +108,7 @@ static void round_robin_cpu(unsigned int tsk_index) if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) return; - mutex_lock(&isolated_cpus_lock); + mutex_lock(&round_robin_lock); cpumask_clear(tmp); for_each_cpu(cpu, pad_busy_cpus) cpumask_or(tmp, tmp, topology_thread_cpumask(cpu)); @@ -116,7 +117,7 @@ static void round_robin_cpu(unsigned int tsk_index) if (cpumask_empty(tmp)) cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus); if (cpumask_empty(tmp)) { - mutex_unlock(&isolated_cpus_lock); + mutex_unlock(&round_robin_lock); return; } for_each_cpu(cpu, tmp) { @@ -131,7 +132,7 @@ static void round_robin_cpu(unsigned int tsk_index) tsk_in_cpu[tsk_index] = preferred_cpu; cpumask_set_cpu(preferred_cpu, pad_busy_cpus); cpu_weight[preferred_cpu]++; - mutex_unlock(&isolated_cpus_lock); + mutex_unlock(&round_robin_lock); set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu)); } diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 5577762daee1..6686b1eaf13e 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -243,7 +243,7 @@ static int pre_map_gar_callback(struct apei_exec_context *ctx, u8 ins = entry->instruction; if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) - return acpi_os_map_generic_address(&entry->register_region); + return apei_map_generic_address(&entry->register_region); return 0; } @@ -276,7 +276,7 @@ static int post_unmap_gar_callback(struct apei_exec_context *ctx, u8 ins = entry->instruction; if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER) - acpi_os_unmap_generic_address(&entry->register_region); + apei_unmap_generic_address(&entry->register_region); return 0; } @@ -606,6 +606,19 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr, return 0; } +int apei_map_generic_address(struct acpi_generic_address *reg) +{ + int rc; + u32 access_bit_width; + u64 address; + + rc = apei_check_gar(reg, &address, &access_bit_width); + if (rc) + return rc; + return acpi_os_map_generic_address(reg); +} +EXPORT_SYMBOL_GPL(apei_map_generic_address); + /* read GAR in interrupt (including NMI) or process context */ int apei_read(u64 *val, struct acpi_generic_address *reg) { diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index cca240a33038..f220d642136e 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -7,6 +7,8 @@ #define APEI_INTERNAL_H #include <linux/cper.h> +#include <linux/acpi.h> +#include <linux/acpi_io.h> struct apei_exec_context; @@ -68,6 +70,13 @@ static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 actio /* IP has been set in instruction function */ #define APEI_EXEC_SET_IP 1 +int apei_map_generic_address(struct acpi_generic_address *reg); + +static inline void apei_unmap_generic_address(struct acpi_generic_address *reg) +{ + acpi_os_unmap_generic_address(reg); +} + int apei_read(u64 *val, struct acpi_generic_address *reg); int apei_write(u64 val, struct acpi_generic_address *reg); diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9b3cac0abecc..1599566ed1fe 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -301,7 +301,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) if (!ghes) return ERR_PTR(-ENOMEM); ghes->generic = generic; - rc = acpi_os_map_generic_address(&generic->error_status_address); + rc = apei_map_generic_address(&generic->error_status_address); if (rc) goto err_free; error_block_length = generic->error_block_length; @@ -321,7 +321,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) return ghes; err_unmap: - acpi_os_unmap_generic_address(&generic->error_status_address); + apei_unmap_generic_address(&generic->error_status_address); err_free: kfree(ghes); return ERR_PTR(rc); @@ -330,7 +330,7 @@ err_free: static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); - acpi_os_unmap_generic_address(&ghes->generic->error_status_address); + apei_unmap_generic_address(&ghes->generic->error_status_address); } enum { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f3decb30223f..47a8caa89dbe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -224,6 +224,7 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, /* * Suspend / resume control */ +static int acpi_idle_suspend; static u32 saved_bm_rld; static void acpi_idle_bm_rld_save(void) @@ -242,13 +243,21 @@ static void acpi_idle_bm_rld_restore(void) int acpi_processor_suspend(struct acpi_device * device, pm_message_t state) { + if (acpi_idle_suspend == 1) + return 0; + acpi_idle_bm_rld_save(); + acpi_idle_suspend = 1; return 0; } int acpi_processor_resume(struct acpi_device * device) { + if (acpi_idle_suspend == 0) + return 0; + acpi_idle_bm_rld_restore(); + acpi_idle_suspend = 0; return 0; } @@ -754,6 +763,12 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, local_irq_disable(); + if (acpi_idle_suspend) { + local_irq_enable(); + cpu_relax(); + return -EBUSY; + } + lapic_timer_state_broadcast(pr, cx, 1); kt1 = ktime_get_real(); acpi_idle_do_entry(cx); @@ -823,6 +838,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, local_irq_disable(); + if (acpi_idle_suspend) { + local_irq_enable(); + cpu_relax(); + return -EBUSY; + } + if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -907,14 +928,21 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, drv, drv->safe_state_index); } else { local_irq_disable(); - acpi_safe_halt(); + if (!acpi_idle_suspend) + acpi_safe_halt(); local_irq_enable(); - return -EINVAL; + return -EBUSY; } } local_irq_disable(); + if (acpi_idle_suspend) { + local_irq_enable(); + cpu_relax(); + return -EBUSY; + } + if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 9f66181c814e..240a24400976 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp) { int result = 0; - if (!strncmp(val, "enable", strlen("enable") - 1)) { + if (!strncmp(val, "enable", strlen("enable"))) { result = acpi_debug_trace(trace_method_name, trace_debug_level, trace_debug_layer, 0); if (result) @@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp) goto exit; } - if (!strncmp(val, "disable", strlen("disable") - 1)) { + if (!strncmp(val, "disable", strlen("disable"))) { int name = 0; result = acpi_debug_trace((char *)&name, trace_debug_level, trace_debug_layer, 0); diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index a576575617d7..1e0a9e17c31d 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -558,6 +558,8 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag) union acpi_object arg0 = { ACPI_TYPE_INTEGER }; struct acpi_object_list args = { 1, &arg0 }; + if (!video->cap._DOS) + return 0; if (bios_flag < 0 || bios_flag > 3 || lcd_flag < 0 || lcd_flag > 1) return -EINVAL; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e0fb5b0435a3..9cb845e49334 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1031,7 +1031,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dpm_wait_for_children(dev, async); if (async_error) - return 0; + goto Complete; pm_runtime_get_noresume(dev); if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) @@ -1040,7 +1040,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (pm_wakeup_pending()) { pm_runtime_put_sync(dev); async_error = -EBUSY; - return 0; + goto Complete; } device_lock(dev); @@ -1097,6 +1097,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } device_unlock(dev); + + Complete: complete_all(&dev->power.completion); if (error) { diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index b5c5ff53cb57..fcb956bb4b4c 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1475,10 +1475,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi first_word = 0; spin_lock_irq(&b->bm_lock); } - /* last page (respectively only page, for first page == last page) */ last_word = MLPP(el >> LN2_BPL); - bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); + + /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples). + * ==> e = 32767, el = 32768, last_page = 2, + * and now last_word = 0. + * We do not want to touch last_page in this case, + * as we did not allocate it, it is not present in bitmap->bm_pages. + */ + if (last_word) + bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word); /* possibly trailing bits. * example: (e & 63) == 63, el will be e+1. diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 9c5c84946b05..8e93a6ac9bb6 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -472,12 +472,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + if (req->rq_state & RQ_LOCAL_ABORTED) { + _req_may_be_done(req, m); + break; + } __drbd_chk_io_error(mdev, false); goto_queue_for_net_read: + D_ASSERT(!(req->rq_state & RQ_NET_MASK)); + /* no point in retrying if there is no good remote data, * or we have no connection. */ if (mdev->state.pdsk != D_UP_TO_DATE) { @@ -765,6 +770,40 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr); } +static void maybe_pull_ahead(struct drbd_conf *mdev) +{ + int congested = 0; + + /* If I don't even have good local storage, we can not reasonably try + * to pull ahead of the peer. We also need the local reference to make + * sure mdev->act_log is there. + * Note: caller has to make sure that net_conf is there. + */ + if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) + return; + + if (mdev->net_conf->cong_fill && + atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { + dev_info(DEV, "Congestion-fill threshold reached\n"); + congested = 1; + } + + if (mdev->act_log->used >= mdev->net_conf->cong_extents) { + dev_info(DEV, "Congestion-extents threshold reached\n"); + congested = 1; + } + + if (congested) { + queue_barrier(mdev); /* last barrier, after mirrored writes */ + + if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) + _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); + else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ + _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); + } + put_ldev(mdev); +} + static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time) { const int rw = bio_rw(bio); @@ -972,29 +1011,8 @@ allocate_barrier: _req_mod(req, queue_for_send_oos); if (remote && - mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) { - int congested = 0; - - if (mdev->net_conf->cong_fill && - atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) { - dev_info(DEV, "Congestion-fill threshold reached\n"); - congested = 1; - } - - if (mdev->act_log->used >= mdev->net_conf->cong_extents) { - dev_info(DEV, "Congestion-extents threshold reached\n"); - congested = 1; - } - - if (congested) { - queue_barrier(mdev); /* last barrier, after mirrored writes */ - - if (mdev->net_conf->on_congestion == OC_PULL_AHEAD) - _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL); - else /*mdev->net_conf->on_congestion == OC_DISCONNECT */ - _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL); - } - } + mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) + maybe_pull_ahead(mdev); spin_unlock_irq(&mdev->req_lock); kfree(b); /* if someone else has beaten us to it... */ diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index cce7df367b79..553f43a90953 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -671,6 +671,7 @@ static void __reschedule_timeout(int drive, const char *message) if (drive == current_reqD) drive = current_drive; + __cancel_delayed_work(&fd_timeout); if (drive < 0 || drive >= N_DRIVE) { delay = 20UL * HZ; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 264bc77dcb91..a8fddeb3d638 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -37,6 +37,7 @@ #include <linux/kthread.h> #include <../drivers/ata/ahci.h> #include <linux/export.h> +#include <linux/debugfs.h> #include "mtip32xx.h" #define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32) @@ -85,6 +86,7 @@ static int instance; * allocated in mtip_init(). */ static int mtip_major; +static struct dentry *dfs_parent; static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -2546,7 +2548,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, } /* - * Sysfs register/status dump. + * Sysfs status dump. * * @dev Pointer to the device structure, passed by the kernrel. * @attr Pointer to the device_attribute structure passed by the kernel. @@ -2555,45 +2557,68 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, * return value * The size, in bytes, of the data copied into buf. */ -static ssize_t mtip_hw_show_registers(struct device *dev, +static ssize_t mtip_hw_show_status(struct device *dev, struct device_attribute *attr, char *buf) { - u32 group_allocated; struct driver_data *dd = dev_to_disk(dev)->private_data; int size = 0; + + if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "thermal_shutdown\n"); + else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag)) + size += sprintf(buf, "%s", "write_protect\n"); + else + size += sprintf(buf, "%s", "online\n"); + + return size; +} + +static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); + +static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) +{ + struct driver_data *dd = (struct driver_data *)f->private_data; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + u32 group_allocated; + int size = *offset; int n; - size += sprintf(&buf[size], "Hardware\n--------\n"); - size += sprintf(&buf[size], "S ACTive : [ 0x"); + if (!len || size) + return 0; + + if (size < 0) + return -EINVAL; + + size += sprintf(&buf[size], "H/ S ACTive : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) size += sprintf(&buf[size], "%08X ", readl(dd->port->s_active[n])); size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "Command Issue : [ 0x"); + size += sprintf(&buf[size], "H/ Command Issue : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) size += sprintf(&buf[size], "%08X ", readl(dd->port->cmd_issue[n])); size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "Completed : [ 0x"); + size += sprintf(&buf[size], "H/ Completed : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) size += sprintf(&buf[size], "%08X ", readl(dd->port->completed[n])); size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "PORT IRQ STAT : [ 0x%08X ]\n", + size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n", readl(dd->port->mmio + PORT_IRQ_STAT)); - size += sprintf(&buf[size], "HOST IRQ STAT : [ 0x%08X ]\n", + size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n", readl(dd->mmio + HOST_IRQ_STAT)); size += sprintf(&buf[size], "\n"); - size += sprintf(&buf[size], "Local\n-----\n"); - size += sprintf(&buf[size], "Allocated : [ 0x"); + size += sprintf(&buf[size], "L/ Allocated : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) { if (sizeof(long) > sizeof(u32)) @@ -2605,7 +2630,7 @@ static ssize_t mtip_hw_show_registers(struct device *dev, } size += sprintf(&buf[size], "]\n"); - size += sprintf(&buf[size], "Commands in Q: [ 0x"); + size += sprintf(&buf[size], "L/ Commands in Q : [ 0x"); for (n = dd->slot_groups-1; n >= 0; n--) { if (sizeof(long) > sizeof(u32)) @@ -2617,44 +2642,53 @@ static ssize_t mtip_hw_show_registers(struct device *dev, } size += sprintf(&buf[size], "]\n"); - return size; + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; } -static ssize_t mtip_hw_show_status(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, + size_t len, loff_t *offset) { - struct driver_data *dd = dev_to_disk(dev)->private_data; - int size = 0; + struct driver_data *dd = (struct driver_data *)f->private_data; + char buf[MTIP_DFS_MAX_BUF_SIZE]; + int size = *offset; - if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag)) - size += sprintf(buf, "%s", "thermal_shutdown\n"); - else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag)) - size += sprintf(buf, "%s", "write_protect\n"); - else - size += sprintf(buf, "%s", "online\n"); - - return size; -} + if (!len || size) + return 0; -static ssize_t mtip_hw_show_flags(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct driver_data *dd = dev_to_disk(dev)->private_data; - int size = 0; + if (size < 0) + return -EINVAL; - size += sprintf(&buf[size], "Flag in port struct : [ %08lX ]\n", + size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n", dd->port->flags); - size += sprintf(&buf[size], "Flag in dd struct : [ %08lX ]\n", + size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n", dd->dd_flag); - return size; + *offset = size <= len ? size : len; + size = copy_to_user(ubuf, buf, *offset); + if (size) + return -EFAULT; + + return *offset; } -static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL); -static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL); -static DEVICE_ATTR(flags, S_IRUGO, mtip_hw_show_flags, NULL); +static const struct file_operations mtip_regs_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_registers, + .llseek = no_llseek, +}; + +static const struct file_operations mtip_flags_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = mtip_hw_read_flags, + .llseek = no_llseek, +}; /* * Create the sysfs related attributes. @@ -2671,15 +2705,9 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj) if (!kobj || !dd) return -EINVAL; - if (sysfs_create_file(kobj, &dev_attr_registers.attr)) - dev_warn(&dd->pdev->dev, - "Error creating 'registers' sysfs entry\n"); if (sysfs_create_file(kobj, &dev_attr_status.attr)) dev_warn(&dd->pdev->dev, "Error creating 'status' sysfs entry\n"); - if (sysfs_create_file(kobj, &dev_attr_flags.attr)) - dev_warn(&dd->pdev->dev, - "Error creating 'flags' sysfs entry\n"); return 0; } @@ -2698,13 +2726,39 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj) if (!kobj || !dd) return -EINVAL; - sysfs_remove_file(kobj, &dev_attr_registers.attr); sysfs_remove_file(kobj, &dev_attr_status.attr); - sysfs_remove_file(kobj, &dev_attr_flags.attr); return 0; } +static int mtip_hw_debugfs_init(struct driver_data *dd) +{ + if (!dfs_parent) + return -1; + + dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent); + if (IS_ERR_OR_NULL(dd->dfs_node)) { + dev_warn(&dd->pdev->dev, + "Error creating node %s under debugfs\n", + dd->disk->disk_name); + dd->dfs_node = NULL; + return -1; + } + + debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd, + &mtip_flags_fops); + debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd, + &mtip_regs_fops); + + return 0; +} + +static void mtip_hw_debugfs_exit(struct driver_data *dd) +{ + debugfs_remove_recursive(dd->dfs_node); +} + + /* * Perform any init/resume time hardware setup * @@ -3730,6 +3784,7 @@ skip_create_disk: mtip_hw_sysfs_init(dd, kobj); kobject_put(kobj); } + mtip_hw_debugfs_init(dd); if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); @@ -3755,6 +3810,8 @@ start_service_thread: return rv; kthread_run_error: + mtip_hw_debugfs_exit(dd); + /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); @@ -3805,6 +3862,7 @@ static int mtip_block_remove(struct driver_data *dd) kobject_put(kobj); } } + mtip_hw_debugfs_exit(dd); /* * Delete our gendisk structure. This also removes the device @@ -4152,10 +4210,20 @@ static int __init mtip_init(void) } mtip_major = error; + if (!dfs_parent) { + dfs_parent = debugfs_create_dir("rssd", NULL); + if (IS_ERR_OR_NULL(dfs_parent)) { + printk(KERN_WARNING "Error creating debugfs parent\n"); + dfs_parent = NULL; + } + } + /* Register our PCI operations. */ error = pci_register_driver(&mtip_pci_driver); - if (error) + if (error) { + debugfs_remove(dfs_parent); unregister_blkdev(mtip_major, MTIP_DRV_NAME); + } return error; } @@ -4172,6 +4240,8 @@ static int __init mtip_init(void) */ static void __exit mtip_exit(void) { + debugfs_remove_recursive(dfs_parent); + /* Release the allocated major block device number. */ unregister_blkdev(mtip_major, MTIP_DRV_NAME); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b2c88da26b2a..f51fc23d17bb 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -26,7 +26,6 @@ #include <linux/ata.h> #include <linux/interrupt.h> #include <linux/genhd.h> -#include <linux/version.h> /* Offset of Subsystem Device ID in pci confoguration space */ #define PCI_SUBSYSTEM_DEVICEID 0x2E @@ -111,6 +110,8 @@ #define dbg_printk(format, arg...) #endif +#define MTIP_DFS_MAX_BUF_SIZE 1024 + #define __force_bit2int (unsigned int __force) enum { @@ -447,6 +448,8 @@ struct driver_data { unsigned long dd_flag; /* NOTE: use atomic bit operations on this */ struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ + + struct dentry *dfs_node; }; #endif diff --git a/drivers/block/umem.c b/drivers/block/umem.c index aa2712060bfb..9a72277a31df 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -513,6 +513,44 @@ static void process_page(unsigned long data) } } +struct mm_plug_cb { + struct blk_plug_cb cb; + struct cardinfo *card; +}; + +static void mm_unplug(struct blk_plug_cb *cb) +{ + struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb); + + spin_lock_irq(&mmcb->card->lock); + activate(mmcb->card); + spin_unlock_irq(&mmcb->card->lock); + kfree(mmcb); +} + +static int mm_check_plugged(struct cardinfo *card) +{ + struct blk_plug *plug = current->plug; + struct mm_plug_cb *mmcb; + + if (!plug) + return 0; + + list_for_each_entry(mmcb, &plug->cb_list, cb.list) { + if (mmcb->cb.callback == mm_unplug && mmcb->card == card) + return 1; + } + /* Not currently on the callback list */ + mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC); + if (!mmcb) + return 0; + + mmcb->card = card; + mmcb->cb.callback = mm_unplug; + list_add(&mmcb->cb.list, &plug->cb_list); + return 1; +} + static void mm_make_request(struct request_queue *q, struct bio *bio) { struct cardinfo *card = q->queuedata; @@ -523,6 +561,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; + if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card)) + activate(card); spin_unlock_irq(&card->lock); return; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 773cf27dc23f..9ad3b5ec1dc1 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, break; case BLKIF_OP_DISCARD: dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; @@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, break; case BLKIF_OP_DISCARD: dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; dst->u.discard.sector_number = src->u.discard.sector_number; dst->u.discard.nr_sectors = src->u.discard.nr_sectors; break; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 60eed4bdd2e4..e4fb3374dcd2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info) return free; } -static void add_id_to_freelist(struct blkfront_info *info, +static int add_id_to_freelist(struct blkfront_info *info, unsigned long id) { + if (info->shadow[id].req.u.rw.id != id) + return -EINVAL; + if (info->shadow[id].request == NULL) + return -EINVAL; info->shadow[id].req.u.rw.id = info->shadow_free; info->shadow[id].request = NULL; info->shadow_free = id; + return 0; } +static const char *op_name(int op) +{ + static const char *const names[] = { + [BLKIF_OP_READ] = "read", + [BLKIF_OP_WRITE] = "write", + [BLKIF_OP_WRITE_BARRIER] = "barrier", + [BLKIF_OP_FLUSH_DISKCACHE] = "flush", + [BLKIF_OP_DISCARD] = "discard" }; + + if (op < 0 || op >= ARRAY_SIZE(names)) + return "unknown"; + + if (!names[op]) + return "reserved"; + + return names[op]; +} static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) { unsigned int end = minor + nr; @@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; + /* + * The backend has messed up and given us an id that we would + * never have given to it (we stamp it up to BLK_RING_SIZE - + * look in get_id_from_freelist. + */ + if (id >= BLK_RING_SIZE) { + WARN(1, "%s: response to %s has incorrect id (%ld)\n", + info->gd->disk_name, op_name(bret->operation), id); + /* We can't safely get the 'struct request' as + * the id is busted. */ + continue; + } req = info->shadow[id].request; if (bret->operation != BLKIF_OP_DISCARD) blkif_completion(&info->shadow[id]); - add_id_to_freelist(info, id); + if (add_id_to_freelist(info, id)) { + WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", + info->gd->disk_name, op_name(bret->operation), id); + continue; + } error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: discard op failed\n", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: write %s op failed\n", - info->flush_op == BLKIF_OP_WRITE_BARRIER ? - "barrier" : "flush disk cache", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", - info->flush_op == BLKIF_OP_WRITE_BARRIER ? - "barrier" : "flush disk cache", - info->gd->disk_name); + printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret->operation)); error = -EOPNOTSUPP; } if (unlikely(error)) { diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index dcbe05616090..9a1eb0cfa95f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1067,26 +1067,24 @@ static int __clk_set_parent(struct clk *clk, struct clk *parent) old_parent = clk->parent; - /* find index of new parent clock using cached parent ptrs */ - if (clk->parents) - for (i = 0; i < clk->num_parents; i++) - if (clk->parents[i] == parent) - break; - else + if (!clk->parents) clk->parents = kzalloc((sizeof(struct clk*) * clk->num_parents), GFP_KERNEL); /* - * find index of new parent clock using string name comparison - * also try to cache the parent to avoid future calls to __clk_lookup + * find index of new parent clock using cached parent ptrs, + * or if not yet cached, use string name comparison and cache + * them now to avoid future calls to __clk_lookup. */ - if (i == clk->num_parents) - for (i = 0; i < clk->num_parents; i++) - if (!strcmp(clk->parent_names[i], parent->name)) { - if (clk->parents) - clk->parents[i] = __clk_lookup(parent->name); - break; - } + for (i = 0; i < clk->num_parents; i++) { + if (clk->parents && clk->parents[i] == parent) + break; + else if (!strcmp(clk->parent_names[i], parent->name)) { + if (clk->parents) + clk->parents[i] = __clk_lookup(parent->name); + break; + } + } if (i == clk->num_parents) { pr_debug("%s: clock %s is not a possible parent of clock %s\n", diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 5873e481e5d2..a8743c399e83 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1039,6 +1039,24 @@ mode_in_range(const struct drm_display_mode *mode, struct edid *edid, return true; } +static bool valid_inferred_mode(const struct drm_connector *connector, + const struct drm_display_mode *mode) +{ + struct drm_display_mode *m; + bool ok = false; + + list_for_each_entry(m, &connector->probed_modes, head) { + if (mode->hdisplay == m->hdisplay && + mode->vdisplay == m->vdisplay && + drm_mode_vrefresh(mode) == drm_mode_vrefresh(m)) + return false; /* duplicated */ + if (mode->hdisplay <= m->hdisplay && + mode->vdisplay <= m->vdisplay) + ok = true; + } + return ok; +} + static int drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid, struct detailed_timing *timing) @@ -1048,7 +1066,8 @@ drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid, struct drm_device *dev = connector->dev; for (i = 0; i < drm_num_dmt_modes; i++) { - if (mode_in_range(drm_dmt_modes + i, edid, timing)) { + if (mode_in_range(drm_dmt_modes + i, edid, timing) && + valid_inferred_mode(connector, drm_dmt_modes + i)) { newmode = drm_mode_duplicate(dev, &drm_dmt_modes[i]); if (newmode) { drm_mode_probed_add(connector, newmode); @@ -1088,7 +1107,8 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid, return modes; fixup_mode_1366x768(newmode); - if (!mode_in_range(newmode, edid, timing)) { + if (!mode_in_range(newmode, edid, timing) || + !valid_inferred_mode(connector, newmode)) { drm_mode_destroy(dev, newmode); continue; } @@ -1116,7 +1136,8 @@ drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid, return modes; fixup_mode_1366x768(newmode); - if (!mode_in_range(newmode, edid, timing)) { + if (!mode_in_range(newmode, edid, timing) || + !valid_inferred_mode(connector, newmode)) { drm_mode_destroy(dev, newmode); continue; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f94792626b94..36822b924eb1 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1401,6 +1401,27 @@ i915_mtrr_setup(struct drm_i915_private *dev_priv, unsigned long base, } } +static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) +{ + struct apertures_struct *ap; + struct pci_dev *pdev = dev_priv->dev->pdev; + bool primary; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = dev_priv->dev->agp->base; + ap->ranges[0].size = + dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + primary = + pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; + + remove_conflicting_framebuffers(ap, "inteldrmfb", primary); + + kfree(ap); +} + /** * i915_driver_load - setup chip and create an initial config * @dev: DRM device @@ -1446,6 +1467,15 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + ret = -ENODEV; + goto put_bridge; + } + + i915_kick_out_firmware_fb(dev_priv); + pci_set_master(dev->pdev); /* overlay on gen2 is broken and can't address above 1G */ @@ -1471,13 +1501,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto put_bridge; } - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - ret = -ENODEV; - goto out_rmmap; - } - aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; dev_priv->mm.gtt_mapping = diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 59d44937dd9f..84b648a7ddd8 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -289,8 +289,9 @@ int radeon_vm_manager_init(struct radeon_device *rdev) rdev->vm_manager.enabled = false; /* mark first vm as always in use, it's the system one */ + /* allocate enough for 2 full VM pts */ r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - rdev->vm_manager.max_pfn * 8, + rdev->vm_manager.max_pfn * 8 * 2, RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", @@ -633,7 +634,15 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); - vm->last_pfn = 0; + /* SI requires equal sized PTs for all VMs, so always set + * last_pfn to max_pfn. cayman allows variable sized + * pts so we can grow then as needed. Once we switch + * to two level pts we can unify this again. + */ + if (rdev->family >= CHIP_TAHITI) + vm->last_pfn = rdev->vm_manager.max_pfn; + else + vm->last_pfn = 0; /* map the ib pool buffer at 0 in virtual address space, set * read only */ diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index f28bd4b7ef98..21ec9f5653ce 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -292,6 +292,7 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -317,13 +318,14 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, break; } drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -336,10 +338,10 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, robj = gem_to_radeon_bo(gobj); r = radeon_bo_wait(robj, NULL, false); /* callback hw specific functions if any */ - if (robj->rdev->asic->ioctl_wait_idle) - robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj); + if (rdev->asic->ioctl_wait_idle) + robj->rdev->asic->ioctl_wait_idle(rdev, robj); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c7b61f16ecfd..0b0279291a73 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2365,12 +2365,12 @@ int si_pcie_gart_enable(struct radeon_device *rdev) WREG32(0x15DC, 0); /* empty context1-15 */ - /* FIXME start with 1G, once using 2 level pt switch to full + /* FIXME start with 4G, once using 2 level pt switch to full * vm size space */ /* set vm size, must be a multiple of 4 */ WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); - WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE); + WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); for (i = 1; i < 16; i++) { if (i < 8) WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 7f1feb2f467a..637c51c11b44 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -693,7 +693,7 @@ static void __cpuinit get_core_online(unsigned int cpu) * sensors. We check this bit only, all the early CPUs * without thermal sensors will be filtered out. */ - if (!cpu_has(c, X86_FEATURE_DTS)) + if (!cpu_has(c, X86_FEATURE_DTHERM)) return; if (!pdev) { @@ -794,7 +794,7 @@ static struct notifier_block coretemp_cpu_notifier __refdata = { }; static const struct x86_cpu_id coretemp_ids[] = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTS }, + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, {} }; MODULE_DEVICE_TABLE(x86cpu, coretemp_ids); diff --git a/drivers/ieee802154/at86rf230.c b/drivers/ieee802154/at86rf230.c index 4d033d4c4ddc..902e38bb382f 100644 --- a/drivers/ieee802154/at86rf230.c +++ b/drivers/ieee802154/at86rf230.c @@ -585,7 +585,6 @@ err: static int at86rf230_rx(struct at86rf230_local *lp) { u8 len = 128, lqi = 0; - int rc; struct sk_buff *skb; skb = alloc_skb(len, GFP_KERNEL); @@ -607,7 +606,7 @@ static int at86rf230_rx(struct at86rf230_local *lp) ieee802154_rx_irqsafe(lp->dev, skb, lqi); - dev_dbg(&lp->spi->dev, "READ_FBUF: %d %d %x\n", rc, len, lqi); + dev_dbg(&lp->spi->dev, "READ_FBUF: %d %x\n", len, lqi); return 0; err: diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 740dcc065cf2..77b6b182778a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1374,7 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL); + l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1942,7 +1942,8 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail3; } ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL); + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, + &cm_id->remote_addr.sin_addr.s_addr); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3530c41fcd1f..8a3a2037b005 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, return ret; } +struct mlx4_ib_steering { + struct list_head list; + u64 reg_id; + union ib_gid gid; +}; + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + return -ENOMEM; + } - err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, - !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6); + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + MLX4_PROT_IB_IPV6, ®_id); if (err) - return err; + goto err_malloc; err = add_gid_entry(ibqp, gid); if (err) goto err_add; + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + ib_steering->reg_id = reg_id; + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + } return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); +err_malloc: + kfree(ib_steering); + return err; } @@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; + u64 reg_id = 0; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &mqp->steering_rules, list) { + if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { + list_del(&ib_steering->list); + break; + } + } + mutex_unlock(&mqp->mutex); + if (&ib_steering->list == &mqp->steering_rules) { + pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + reg_id = ib_steering->reg_id; + kfree(ib_steering); + } - err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + MLX4_PROT_IB_IPV6, reg_id); if (err) return err; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ff36655d23d3..42df4f7a6a5b 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -163,6 +163,7 @@ struct mlx4_ib_qp { u8 state; int mlx_type; struct list_head gid_list; + struct list_head steering_rules; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8d4ed24aef93..6af19f6c2b11 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3974c290b667..bbee4b2d7a13 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -715,7 +715,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); if (likely(skb_dst(skb))) { - n = dst_get_neighbour_noref(skb_dst(skb)); + n = dst_neigh_lookup_skb(skb_dst(skb), skb); if (!n) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -797,6 +797,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) } } unlock: + if (n) + neigh_release(n); rcu_read_unlock(); return NETDEV_TX_OK; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 20ebc6fd1bb9..7cecb16d3d48 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -658,9 +658,15 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); + struct dst_entry *dst = skb_dst(skb); struct ipoib_mcast *mcast; + struct neighbour *n; unsigned long flags; + n = NULL; + if (dst) + n = dst_neigh_lookup_skb(dst, skb); + spin_lock_irqsave(&priv->lock, flags); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || @@ -715,29 +721,28 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = NULL; - - rcu_read_lock(); - if (dst) - n = dst_get_neighbour_noref(dst); - if (n && !*to_ipoib_neigh(n)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, - skb->dev); - - if (neigh) { - kref_get(&mcast->ah->ref); - neigh->ah = mcast->ah; - list_add_tail(&neigh->list, &mcast->neigh_list); + if (n) { + if (!*to_ipoib_neigh(n)) { + struct ipoib_neigh *neigh; + + neigh = ipoib_neigh_alloc(n, skb->dev); + if (neigh) { + kref_get(&mcast->ah->ref); + neigh->ah = mcast->ah; + list_add_tail(&neigh->list, + &mcast->neigh_list); + } } + neigh_release(n); } - rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); return; } unlock: + if (n) + neigh_release(n); spin_unlock_irqrestore(&priv->lock, flags); } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 37fdaf81bd1f..ce59824fb414 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2292,6 +2292,13 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; + r = dm_pool_commit_metadata(pool->pmd); + if (r) { + DMERR("%s: dm_pool_commit_metadata() failed, error = %d", + __func__, r); + return r; + } + r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) DMWARN("reserve_metadata_snap message failed."); diff --git a/drivers/md/md.c b/drivers/md/md.c index 1c2f9048e1ae..a4c219e3c859 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5784,8 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) super_types[mddev->major_version]. validate_super(mddev, rdev); if ((info->state & (1<<MD_DISK_SYNC)) && - (!test_bit(In_sync, &rdev->flags) || - rdev->raid_disk != info->raid_disk)) { + rdev->raid_disk != info->raid_disk) { /* This was a hot-add request, but events doesn't * match, so reject it. */ @@ -6751,7 +6750,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev thread->tsk = kthread_run(md_thread, thread, "%s_%s", mdname(thread->mddev), - name ?: mddev->pers->name); + name); if (IS_ERR(thread->tsk)) { kfree(thread); return NULL; @@ -7298,6 +7297,7 @@ void md_do_sync(struct mddev *mddev) int skipped = 0; struct md_rdev *rdev; char *desc; + struct blk_plug plug; /* just incase thread restarts... */ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) @@ -7447,6 +7447,7 @@ void md_do_sync(struct mddev *mddev) } mddev->curr_resync_completed = j; + blk_start_plug(&plug); while (j < max_sectors) { sector_t sectors; @@ -7552,6 +7553,7 @@ void md_do_sync(struct mddev *mddev) * this also signals 'finished resyncing' to md_stop */ out: + blk_finish_plug(&plug); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); /* tell personality that we are finished */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 9339e67fcc79..61a1833ebaf3 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev) } { - mddev->thread = md_register_thread(multipathd, mddev, NULL); + mddev->thread = md_register_thread(multipathd, mddev, + "multipath"); if (!mddev->thread) { printk(KERN_ERR "multipath: couldn't allocate thread" " for %s\n", mdname(mddev)); diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c index 50ed53bf4aa2..fc90c11620ad 100644 --- a/drivers/md/persistent-data/dm-space-map-checker.c +++ b/drivers/md/persistent-data/dm-space-map-checker.c @@ -8,6 +8,7 @@ #include <linux/device-mapper.h> #include <linux/export.h> +#include <linux/vmalloc.h> #ifdef CONFIG_DM_DEBUG_SPACE_MAPS @@ -89,13 +90,23 @@ static int ca_create(struct count_array *ca, struct dm_space_map *sm) ca->nr = nr_blocks; ca->nr_free = nr_blocks; - ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL); - if (!ca->counts) - return -ENOMEM; + + if (!nr_blocks) + ca->counts = NULL; + else { + ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks); + if (!ca->counts) + return -ENOMEM; + } return 0; } +static void ca_destroy(struct count_array *ca) +{ + vfree(ca->counts); +} + static int ca_load(struct count_array *ca, struct dm_space_map *sm) { int r; @@ -126,12 +137,14 @@ static int ca_load(struct count_array *ca, struct dm_space_map *sm) static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) { dm_block_t nr_blocks = ca->nr + extra_blocks; - uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL); + uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks); if (!counts) return -ENOMEM; - memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); - kfree(ca->counts); + if (ca->counts) { + memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); + ca_destroy(ca); + } ca->nr = nr_blocks; ca->nr_free += extra_blocks; ca->counts = counts; @@ -151,11 +164,6 @@ static int ca_commit(struct count_array *old, struct count_array *new) return 0; } -static void ca_destroy(struct count_array *ca) -{ - kfree(ca->counts); -} - /*----------------------------------------------------------------*/ struct sm_checker { @@ -343,25 +351,25 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) int r; struct sm_checker *smc; - if (!sm) - return NULL; + if (IS_ERR_OR_NULL(sm)) + return ERR_PTR(-EINVAL); smc = kmalloc(sizeof(*smc), GFP_KERNEL); if (!smc) - return NULL; + return ERR_PTR(-ENOMEM); memcpy(&smc->sm, &ops_, sizeof(smc->sm)); r = ca_create(&smc->old_counts, sm); if (r) { kfree(smc); - return NULL; + return ERR_PTR(r); } r = ca_create(&smc->counts, sm); if (r) { ca_destroy(&smc->old_counts); kfree(smc); - return NULL; + return ERR_PTR(r); } smc->real_sm = sm; @@ -371,7 +379,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) ca_destroy(&smc->counts); ca_destroy(&smc->old_counts); kfree(smc); - return NULL; + return ERR_PTR(r); } r = ca_commit(&smc->old_counts, &smc->counts); @@ -379,7 +387,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) ca_destroy(&smc->counts); ca_destroy(&smc->old_counts); kfree(smc); - return NULL; + return ERR_PTR(r); } return &smc->sm; @@ -391,25 +399,25 @@ struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) int r; struct sm_checker *smc; - if (!sm) - return NULL; + if (IS_ERR_OR_NULL(sm)) + return ERR_PTR(-EINVAL); smc = kmalloc(sizeof(*smc), GFP_KERNEL); if (!smc) - return NULL; + return ERR_PTR(-ENOMEM); memcpy(&smc->sm, &ops_, sizeof(smc->sm)); r = ca_create(&smc->old_counts, sm); if (r) { kfree(smc); - return NULL; + return ERR_PTR(r); } r = ca_create(&smc->counts, sm); if (r) { ca_destroy(&smc->old_counts); kfree(smc); - return NULL; + return ERR_PTR(r); } smc->real_sm = sm; diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index fc469ba9f627..3d0ed5332883 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -290,7 +290,16 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, dm_block_t nr_blocks) { struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); - return dm_sm_checker_create_fresh(sm); + struct dm_space_map *smc; + + if (IS_ERR_OR_NULL(sm)) + return sm; + + smc = dm_sm_checker_create_fresh(sm); + if (IS_ERR(smc)) + dm_sm_destroy(sm); + + return smc; } EXPORT_SYMBOL_GPL(dm_sm_disk_create); diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 400fe144c0cd..e5604b32d91f 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -138,6 +138,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone); void dm_tm_destroy(struct dm_transaction_manager *tm) { + if (!tm->is_clone) + wipe_shadow_table(tm); + kfree(tm); } EXPORT_SYMBOL_GPL(dm_tm_destroy); @@ -344,8 +347,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, } *sm = dm_sm_checker_create(inner); - if (!*sm) + if (IS_ERR(*sm)) { + r = PTR_ERR(*sm); goto bad2; + } } else { r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, @@ -364,8 +369,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, } *sm = dm_sm_checker_create(inner); - if (!*sm) + if (IS_ERR(*sm)) { + r = PTR_ERR(*sm); goto bad2; + } } return 0; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a9c7981ddd24..8c2754f835ef 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect int bad_sectors; int disk = start_disk + i; - if (disk >= conf->raid_disks) - disk -= conf->raid_disks; + if (disk >= conf->raid_disks * 2) + disk -= conf->raid_disks * 2; rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED @@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); struct md_rdev *blocked_rdev; - int plugged; int first_clone; int sectors_handled; int max_sectors; @@ -1034,7 +1033,6 @@ read_again: * the bad blocks. Each set of writes gets it's own r1bio * with a set of bios attached. */ - plugged = mddev_check_plugged(mddev); disks = conf->raid_disks * 2; retry_write: @@ -1191,6 +1189,8 @@ read_again: bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); + if (!mddev_check_plugged(mddev)) + md_wakeup_thread(mddev->thread); } /* Mustn't call r1_bio_write_done before this next test, * as it could result in the bio being freed. @@ -1213,9 +1213,6 @@ read_again: /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); - - if (do_sync || !bitmap || !plugged) - md_wakeup_thread(mddev->thread); } static void status(struct seq_file *seq, struct mddev *mddev) @@ -2621,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) goto abort; } err = -ENOMEM; - conf->thread = md_register_thread(raid1d, mddev, NULL); + conf->thread = md_register_thread(raid1d, mddev, "raid1"); if (!conf->thread) { printk(KERN_ERR "md/raid1:%s: couldn't allocate thread\n", diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 99ae6068e456..8da6282254c3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) const unsigned long do_fua = (bio->bi_rw & REQ_FUA); unsigned long flags; struct md_rdev *blocked_rdev; - int plugged; int sectors_handled; int max_sectors; int sectors; @@ -1239,7 +1238,6 @@ read_again: * of r10_bios is recored in bio->bi_phys_segments just as with * the read case. */ - plugged = mddev_check_plugged(mddev); r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ raid10_find_phys(conf, r10_bio); @@ -1396,6 +1394,8 @@ retry_write: bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); + if (!mddev_check_plugged(mddev)) + md_wakeup_thread(mddev->thread); if (!r10_bio->devs[i].repl_bio) continue; @@ -1423,6 +1423,8 @@ retry_write: bio_list_add(&conf->pending_bio_list, mbio); conf->pending_count++; spin_unlock_irqrestore(&conf->device_lock, flags); + if (!mddev_check_plugged(mddev)) + md_wakeup_thread(mddev->thread); } /* Don't remove the bias on 'remaining' (one_write_done) until @@ -1448,9 +1450,6 @@ retry_write: /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); - - if (do_sync || !mddev->bitmap || !plugged) - md_wakeup_thread(mddev->thread); } static void status(struct seq_file *seq, struct mddev *mddev) @@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, WRITE) + s, conf->tmppage, WRITE) == 0) { /* Well, this device is dead */ printk(KERN_NOTICE @@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, + s, conf->tmppage, READ)) { case 0: /* Well, this device is dead */ @@ -2512,7 +2511,7 @@ read_more: slot = r10_bio->read_slot; printk_ratelimited( KERN_ERR - "md/raid10:%s: %s: redirecting" + "md/raid10:%s: %s: redirecting " "sector %llu to another mirror\n", mdname(mddev), bdevname(rdev->bdev, b), @@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev) blk_start_plug(&plug); for (;;) { - flush_pending_writes(conf); + if (atomic_read(&mddev->plug_cnt) == 0) + flush_pending_writes(conf); spin_lock_irqsave(&conf->device_lock, flags); if (list_empty(head)) { @@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); + if (sect >= mddev->resync_max_sectors) { + /* last stripe is not complete - don't + * try to recover this sector. + */ + continue; + } /* Unless we are doing a full sync, or a replacement * we only need to recover the block if it is set in * the bitmap @@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier); - conf->thread = md_register_thread(raid10d, mddev, NULL); + conf->thread = md_register_thread(raid10d, mddev, "raid10"); if (!conf->thread) goto out; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d26767246d26..04348d76bb30 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) BUG_ON(!list_empty(&sh->lru)); BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { - if (test_bit(STRIPE_DELAYED, &sh->state)) + if (test_bit(STRIPE_DELAYED, &sh->state) && + !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && sh->bm_seq - conf->seq_write > 0) list_add_tail(&sh->lru, &conf->bitmap_list); else { + clear_bit(STRIPE_DELAYED, &sh->state); clear_bit(STRIPE_BIT_DELAY, &sh->state); list_add_tail(&sh->lru, &conf->handle_list); } @@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) * a chance*/ md_check_recovery(conf->mddev); } + /* + * Because md_wait_for_blocked_rdev + * will dec nr_pending, we must + * increment it first. + */ + atomic_inc(&rdev->nr_pending); md_wait_for_blocked_rdev(rdev, conf->mddev); } else { /* Acknowledged bad block - skip the write */ @@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error) } else { const char *bdn = bdevname(rdev->bdev, b); int retry = 0; + int set_bad = 0; clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); @@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - else if (conf->mddev->degraded >= conf->max_degraded) + else if (conf->mddev->degraded >= conf->max_degraded) { + set_bad = 1; printk_ratelimited( KERN_WARNING "md/raid:%s: read error not correctable " @@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) + } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { /* Oh, no!!! */ + set_bad = 1; printk_ratelimited( KERN_WARNING "md/raid:%s: read error NOT corrected!! " @@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error) mdname(conf->mddev), (unsigned long long)s, bdn); - else if (atomic_read(&rdev->read_errors) + } else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING "md/raid:%s: Too many read errors, failing device %s.\n", @@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error) else { clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); - md_error(conf->mddev, rdev); + if (!(set_bad + && test_bit(In_sync, &rdev->flags) + && rdev_set_badblocks( + rdev, sh->sector, STRIPE_SECTORS, 0))) + md_error(conf->mddev, rdev); } } rdev_dec_pending(rdev, conf->mddev); @@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh) finish: /* wait for this device to become unblocked */ - if (conf->mddev->external && unlikely(s.blocked_rdev)) - md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); + if (unlikely(s.blocked_rdev)) { + if (conf->mddev->external) + md_wait_for_blocked_rdev(s.blocked_rdev, + conf->mddev); + else + /* Internal metadata will immediately + * be written by raid5d, so we don't + * need to wait here. + */ + rdev_dec_pending(s.blocked_rdev, + conf->mddev); + } if (s.handle_bad_blocks) for (i = disks; i--; ) { @@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); - /* No reshape active, so we can trust rdev->data_offset */ - align_bi->bi_sector += rdev->data_offset; if (!bio_fits_rdev(align_bi) || is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, @@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) return 0; } + /* No reshape active, so we can trust rdev->data_offset */ + align_bi->bi_sector += rdev->data_offset; + spin_lock_irq(&conf->device_lock); wait_event_lock_irq(conf->wait_for_stripe, conf->quiesce == 0, @@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) struct stripe_head *sh; const int rw = bio_data_dir(bi); int remaining; - int plugged; if (unlikely(bi->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bi); @@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) bi->bi_next = NULL; bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ - plugged = mddev_check_plugged(mddev); for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { DEFINE_WAIT(w); int previous; @@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); + mddev_check_plugged(mddev); release_stripe(sh); } else { /* cannot get stripe for read-ahead, just give-up */ @@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); break; } - } - if (!plugged) - md_wakeup_thread(mddev->thread); spin_lock_irq(&conf->device_lock); remaining = raid5_dec_bi_phys_segments(bi); @@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) int raid_disk, memory, max_disks; struct md_rdev *rdev; struct disk_info *disk; + char pers_name[6]; if (mddev->new_level != 5 && mddev->new_level != 4 @@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) printk(KERN_INFO "md/raid:%s: allocated %dkB\n", mdname(mddev), memory); - conf->thread = md_register_thread(raid5d, mddev, NULL); + sprintf(pers_name, "raid%d", mddev->new_level); + conf->thread = md_register_thread(raid5d, mddev, pers_name); if (!conf->thread) { printk(KERN_ERR "md/raid:%s: couldn't allocate thread.\n", @@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && conf->disks[rdev->saved_raid_disk].rdev == NULL) - disk = rdev->saved_raid_disk; - else - disk = first; - for ( ; disk <= last ; disk++) { + first = rdev->saved_raid_disk; + + for (disk = first; disk <= last; disk++) { p = conf->disks + disk; if (p->rdev == NULL) { clear_bit(In_sync, &rdev->flags); @@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk != disk) conf->fullsync = 1; rcu_assign_pointer(p->rdev, rdev); - break; + goto out; } + } + for (disk = first; disk <= last; disk++) { + p = conf->disks + disk; if (test_bit(WantReplacement, &p->rdev->flags) && p->replacement == NULL) { clear_bit(In_sync, &rdev->flags); @@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } } +out: print_raid5_conf(conf); return err; } diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 9eb7624639b9..1901da153312 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6250,7 +6250,7 @@ bnx2_enable_msix(struct bnx2 *bp, int msix_vecs) static int bnx2_setup_int_mode(struct bnx2 *bp, int dis_msi) { - int cpus = num_online_cpus(); + int cpus = netif_get_num_default_rss_queues(); int msix_vecs; if (!bp->num_req_rx_rings) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index daa894bd772a..53659f321d51 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -822,7 +822,8 @@ static inline int bnx2x_calc_num_queues(struct bnx2x *bp) { return num_queues ? min_t(int, num_queues, BNX2X_MAX_QUEUES(bp)) : - min_t(int, num_online_cpus(), BNX2X_MAX_QUEUES(bp)); + min_t(int, netif_get_num_default_rss_queues(), + BNX2X_MAX_QUEUES(bp)); } static inline void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index e47ff8be1d7b..6cbab0369738 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -9908,7 +9908,7 @@ static bool tg3_enable_msix(struct tg3 *tp) int i, rc; struct msix_entry msix_ent[tp->irq_max]; - tp->irq_cnt = num_online_cpus(); + tp->irq_cnt = netif_get_num_default_rss_queues(); if (tp->irq_cnt > 1) { /* We want as many rx rings enabled as there are cpus. * In multiqueue MSI-X mode, the first MSI-X vector diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index abb6ce7c1b7e..9b0874957df5 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -3050,7 +3050,7 @@ static struct pci_error_handlers t3_err_handler = { static void set_nqsets(struct adapter *adap) { int i, j = 0; - int num_cpus = num_online_cpus(); + int num_cpus = netif_get_num_default_rss_queues(); int hwports = adap->params.nports; int nqsets = adap->msix_nvectors - 1; diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 55cf72af69ce..2dbbcbb450d3 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -62,7 +62,9 @@ static const unsigned int MAX_ATIDS = 64 * 1024; static const unsigned int ATID_BASE = 0x10000; static void cxgb_neigh_update(struct neighbour *neigh); -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new); +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh, + const void *daddr); static inline int offload_activated(struct t3cdev *tdev) { @@ -968,8 +970,10 @@ static int nb_callback(struct notifier_block *self, unsigned long event, } case (NETEVENT_REDIRECT):{ struct netevent_redirect *nr = ctx; - cxgb_redirect(nr->old, nr->new); - cxgb_neigh_update(dst_get_neighbour_noref(nr->new)); + cxgb_redirect(nr->old, nr->old_neigh, + nr->new, nr->new_neigh, + nr->daddr); + cxgb_neigh_update(nr->new_neigh); break; } default: @@ -1107,10 +1111,11 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e) tdev->send(tdev, skb); } -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh, + const void *daddr) { struct net_device *olddev, *newdev; - struct neighbour *n; struct tid_info *ti; struct t3cdev *tdev; u32 tid; @@ -1118,15 +1123,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) struct l2t_entry *e; struct t3c_tid_entry *te; - n = dst_get_neighbour_noref(old); - if (!n) - return; - olddev = n->dev; - - n = dst_get_neighbour_noref(new); - if (!n) - return; - newdev = n->dev; + olddev = old_neigh->dev; + newdev = new_neigh->dev; if (!is_offloading(olddev)) return; @@ -1144,7 +1142,7 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) } /* Add new L2T entry */ - e = t3_l2t_get(tdev, new, newdev); + e = t3_l2t_get(tdev, new, newdev, daddr); if (!e) { printk(KERN_ERR "%s: couldn't allocate new l2t entry!\n", __func__); diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c index 3fa3c8833ed7..8d53438638b2 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c @@ -299,7 +299,7 @@ static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh) } struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, - struct net_device *dev) + struct net_device *dev, const void *daddr) { struct l2t_entry *e = NULL; struct neighbour *neigh; @@ -311,7 +311,7 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, int smt_idx; rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + neigh = dst_neigh_lookup(dst, daddr); if (!neigh) goto done_rcu; @@ -360,6 +360,8 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, done_unlock: write_unlock_bh(&d->lock); done_rcu: + if (neigh) + neigh_release(neigh); rcu_read_unlock(); return e; } diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h index c4e864369751..8cffcdfd5678 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h +++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h @@ -110,7 +110,7 @@ static inline void set_arp_failure_handler(struct sk_buff *skb, void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e); void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh); struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst, - struct net_device *dev); + struct net_device *dev, const void *daddr); int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb, struct l2t_entry *e); void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e1f96fbb48c1..5ed49af23d6a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3493,8 +3493,8 @@ static void __devinit cfg_queues(struct adapter *adap) */ if (n10g) q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g; - if (q10g > num_online_cpus()) - q10g = num_online_cpus(); + if (q10g > netif_get_num_default_rss_queues()) + q10g = netif_get_num_default_rss_queues(); for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index edce7af97c87..2141bd784751 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2172,12 +2172,14 @@ static void be_msix_disable(struct be_adapter *adapter) static uint be_num_rss_want(struct be_adapter *adapter) { + u32 num = 0; if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && !sriov_want(adapter) && be_physfn(adapter) && - !be_is_mc(adapter)) - return (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; - else - return 0; + !be_is_mc(adapter)) { + num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; + num = min_t(u32, num, (u32)netif_get_num_default_rss_queues()); + } + return num; } static void be_msix_enable(struct be_adapter *adapter) diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 351a4097b2ba..76edbc1be33b 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -103,6 +103,7 @@ #define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ #define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ #define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ +#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ #define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ #define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ba86b3f8a404..a166efc2fead 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -496,7 +496,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter, * @sk_buff: socket buffer with received data **/ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, - __le16 csum, struct sk_buff *skb) + struct sk_buff *skb) { u16 status = (u16)status_err; u8 errors = (u8)(status_err >> 24); @@ -511,8 +511,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, if (status & E1000_RXD_STAT_IXSM) return; - /* TCP/UDP checksum error bit is set */ - if (errors & E1000_RXD_ERR_TCPE) { + /* TCP/UDP checksum error bit or IP checksum error bit is set */ + if (errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) { /* let the stack verify checksum errors */ adapter->hw_csum_err++; return; @@ -523,19 +523,7 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err, return; /* It must be a TCP or UDP packet with a valid checksum */ - if (status & E1000_RXD_STAT_TCPCS) { - /* TCP checksum is good */ - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else { - /* - * IP fragment with UDP payload - * Hardware complements the payload checksum, so we undo it - * and then put the value in host order for further stack use. - */ - __sum16 sum = (__force __sum16)swab16((__force u16)csum); - skb->csum = csum_unfold(~sum); - skb->ip_summed = CHECKSUM_COMPLETE; - } + skb->ip_summed = CHECKSUM_UNNECESSARY; adapter->hw_csum_good++; } @@ -954,8 +942,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, skb_put(skb, length); /* Receive Checksum Offload */ - e1000_rx_checksum(adapter, staterr, - rx_desc->wb.lower.hi_dword.csum_ip.csum, skb); + e1000_rx_checksum(adapter, staterr, skb); e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb); @@ -1341,8 +1328,7 @@ copydone: total_rx_bytes += skb->len; total_rx_packets++; - e1000_rx_checksum(adapter, staterr, - rx_desc->wb.lower.hi_dword.csum_ip.csum, skb); + e1000_rx_checksum(adapter, staterr, skb); e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb); @@ -1512,9 +1498,8 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, } } - /* Receive Checksum Offload XXX recompute due to CRC strip? */ - e1000_rx_checksum(adapter, staterr, - rx_desc->wb.lower.hi_dword.csum_ip.csum, skb); + /* Receive Checksum Offload */ + e1000_rx_checksum(adapter, staterr, skb); e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb); @@ -3098,19 +3083,10 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) /* Enable Receive Checksum Offload for TCP and UDP */ rxcsum = er32(RXCSUM); - if (adapter->netdev->features & NETIF_F_RXCSUM) { + if (adapter->netdev->features & NETIF_F_RXCSUM) rxcsum |= E1000_RXCSUM_TUOFL; - - /* - * IPv4 payload checksum for UDP fragments must be - * used in conjunction with packet-split. - */ - if (adapter->rx_ps_pages) - rxcsum |= E1000_RXCSUM_IPPCSE; - } else { + else rxcsum &= ~E1000_RXCSUM_TUOFL; - /* no need to clear IPPCSE as it defaults to 0 */ - } ew32(RXCSUM, rxcsum); if (adapter->hw.mac.type == e1000_pch2lan) { @@ -5241,22 +5217,10 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; /* Jumbo frame support */ - if (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) { - if (!(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { - e_err("Jumbo Frames not supported.\n"); - return -EINVAL; - } - - /* - * IP payload checksum (enabled with jumbos/packet-split when - * Rx checksum is enabled) and generation of RSS hash is - * mutually exclusive in the hardware. - */ - if ((netdev->features & NETIF_F_RXCSUM) && - (netdev->features & NETIF_F_RXHASH)) { - e_err("Jumbo frames cannot be enabled when both receive checksum offload and receive hashing are enabled. Disable one of the receive offload features before enabling jumbos.\n"); - return -EINVAL; - } + if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) && + !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) { + e_err("Jumbo Frames not supported.\n"); + return -EINVAL; } /* Supported frame sizes */ @@ -6030,17 +5994,6 @@ static int e1000_set_features(struct net_device *netdev, NETIF_F_RXALL))) return 0; - /* - * IP payload checksum (enabled with jumbos/packet-split when Rx - * checksum is enabled) and generation of RSS hash is mutually - * exclusive in the hardware. - */ - if (adapter->rx_ps_pages && - (features & NETIF_F_RXCSUM) && (features & NETIF_F_RXHASH)) { - e_err("Enabling both receive checksum offload and receive hashing is not possible with jumbo frames. Disable jumbos or enable only one of the receive offload features.\n"); - return -EINVAL; - } - if (changed & NETIF_F_RXFCS) { if (features & NETIF_F_RXFCS) { adapter->flags2 &= ~FLAG2_CRC_STRIPPING; diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 8ce67064b9c5..90eef07943f4 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -357,21 +357,28 @@ static int igbvf_set_coalesce(struct net_device *netdev, struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - if ((ec->rx_coalesce_usecs > IGBVF_MAX_ITR_USECS) || - ((ec->rx_coalesce_usecs > 3) && - (ec->rx_coalesce_usecs < IGBVF_MIN_ITR_USECS)) || - (ec->rx_coalesce_usecs == 2)) - return -EINVAL; - - /* convert to rate of irq's per second */ - if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) { + if ((ec->rx_coalesce_usecs >= IGBVF_MIN_ITR_USECS) && + (ec->rx_coalesce_usecs <= IGBVF_MAX_ITR_USECS)) { + adapter->current_itr = ec->rx_coalesce_usecs << 2; + adapter->requested_itr = 1000000000 / + (adapter->current_itr * 256); + } else if ((ec->rx_coalesce_usecs == 3) || + (ec->rx_coalesce_usecs == 2)) { adapter->current_itr = IGBVF_START_ITR; adapter->requested_itr = ec->rx_coalesce_usecs; - } else { - adapter->current_itr = ec->rx_coalesce_usecs << 2; + } else if (ec->rx_coalesce_usecs == 0) { + /* + * The user's desire is to turn off interrupt throttling + * altogether, but due to HW limitations, we can't do that. + * Instead we set a very small value in EITR, which would + * allow ~967k interrupts per second, but allow the adapter's + * internal clocking to still function properly. + */ + adapter->current_itr = 4; adapter->requested_itr = 1000000000 / (adapter->current_itr * 256); - } + } else + return -EINVAL; writel(adapter->current_itr, hw->hw_addr + adapter->rx_ring->itr_register); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 842c8ce9494e..7e94987d030c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1080,6 +1080,25 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = NULL }, + /* flow steering commands */ + { + .opcode = MLX4_QP_FLOW_STEERING_ATTACH, + .has_inbox = true, + .has_outbox = false, + .out_is_imm = true, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper + }, + { + .opcode = MLX4_QP_FLOW_STEERING_DETACH, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 72901ce2b088..dd6a77b21149 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -38,6 +38,10 @@ #include "mlx4_en.h" #include "en_port.h" +#define EN_ETHTOOL_QP_ATTACH (1ull << 63) +#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL +#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) +#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) static void mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -599,16 +603,369 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev, return err; } +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int mlx4_en_validate_flow(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_tcpip4_spec *l4_mask; + struct ethhdr *eth_mask; + u64 full_mac = ~0ull; + u64 zero_mac = 0; + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + switch (cmd->fs.flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (cmd->fs.m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + /* don't allow mask which isn't all 0 or 1 */ + if (!all_zeros_or_all_ones(l4_mask->ip4src) || + !all_zeros_or_all_ones(l4_mask->ip4dst) || + !all_zeros_or_all_ones(l4_mask->psrc) || + !all_zeros_or_all_ones(l4_mask->pdst)) + return -EINVAL; + break; + case IP_USER_FLOW: + l3_mask = &cmd->fs.m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 || + (!l3_mask->ip4src && !l3_mask->ip4dst) || + !all_zeros_or_all_ones(l3_mask->ip4src) || + !all_zeros_or_all_ones(l3_mask->ip4dst)) + return -EINVAL; + break; + case ETHER_FLOW: + eth_mask = &cmd->fs.m_u.ether_spec; + /* source mac mask must not be set */ + if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN)) + return -EINVAL; + + /* dest mac mask must be ff:ff:ff:ff:ff:ff */ + if (memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)) + return -EINVAL; + + if (!all_zeros_or_all_ones(eth_mask->h_proto)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if ((cmd->fs.flow_type & FLOW_EXT)) { + if (cmd->fs.m_ext.vlan_etype || + !(cmd->fs.m_ext.vlan_tci == 0 || + cmd->fs.m_ext.vlan_tci == cpu_to_be16(0xfff))) + return -EINVAL; + } + + return 0; +} + +static int add_ip_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h) +{ + struct mlx4_spec_list *spec_l3; + struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec; + + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); + if (!spec_l3) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + return -ENOMEM; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src; + if (l3_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst; + if (l3_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + list_add_tail(&spec_l3->list, list_h); + + return 0; +} + +static int add_tcp_udp_rule(struct mlx4_en_priv *priv, + struct ethtool_rxnfc *cmd, + struct list_head *list_h, int proto) +{ + struct mlx4_spec_list *spec_l3; + struct mlx4_spec_list *spec_l4; + struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec; + + spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL); + spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL); + if (!spec_l4 || !spec_l3) { + en_err(priv, "Fail to alloc ethtool rule.\n"); + kfree(spec_l3); + kfree(spec_l4); + return -ENOMEM; + } + + spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4; + + if (proto == TCP_V4_FLOW) { + spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst; + } else { + spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP; + spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src; + spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst; + spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc; + spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst; + } + + if (l4_mask->ip4src) + spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK; + if (l4_mask->ip4dst) + spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK; + + if (l4_mask->psrc) + spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK; + if (l4_mask->pdst) + spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK; + + list_add_tail(&spec_l3->list, list_h); + list_add_tail(&spec_l4->list, list_h); + + return 0; +} + +static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev, + struct ethtool_rxnfc *cmd, + struct list_head *rule_list_h) +{ + int err; + u64 mac; + __be64 be_mac; + struct ethhdr *eth_spec; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_spec_list *spec_l2; + __be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16); + + err = mlx4_en_validate_flow(dev, cmd); + if (err) + return err; + + spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL); + if (!spec_l2) + return -ENOMEM; + + mac = priv->mac & EN_ETHTOOL_MAC_MASK; + be_mac = cpu_to_be64(mac << 16); + + spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN); + if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW) + memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN); + + if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) { + spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci; + spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff); + } + + list_add_tail(&spec_l2->list, rule_list_h); + + switch (cmd->fs.flow_type & ~FLOW_EXT) { + case ETHER_FLOW: + eth_spec = &cmd->fs.h_u.ether_spec; + memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN); + spec_l2->eth.ether_type = eth_spec->h_proto; + if (eth_spec->h_proto) + spec_l2->eth.ether_type_enable = 1; + break; + case IP_USER_FLOW: + err = add_ip_rule(priv, cmd, rule_list_h); + break; + case TCP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW); + break; + case UDP_V4_FLOW: + err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW); + break; + } + + return err; +} + +static int mlx4_en_flow_replace(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ethtool_flow_id *loc_rule; + struct mlx4_spec_list *spec, *tmp_spec; + u32 qpn; + u64 reg_id; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + }; + + rule.port = priv->port; + rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location; + INIT_LIST_HEAD(&rule.list); + + /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */ + if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC) + qpn = priv->drop_qp.qpn; + else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { + qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); + } else { + if (cmd->fs.ring_cookie >= priv->rx_ring_num) { + en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn; + if (!qpn) { + en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n", + cmd->fs.ring_cookie); + return -EINVAL; + } + } + rule.qpn = qpn; + err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list); + if (err) + goto out_free_list; + + loc_rule = &priv->ethtool_rules[cmd->fs.location]; + if (loc_rule->id) { + err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n", + cmd->fs.location, loc_rule->id); + goto out_free_list; + } + loc_rule->id = 0; + memset(&loc_rule->flow_spec, 0, + sizeof(struct ethtool_rx_flow_spec)); + } + err = mlx4_flow_attach(priv->mdev->dev, &rule, ®_id); + if (err) { + en_err(priv, "Fail to attach network rule at location %d.\n", + cmd->fs.location); + goto out_free_list; + } + loc_rule->id = reg_id; + memcpy(&loc_rule->flow_spec, &cmd->fs, + sizeof(struct ethtool_rx_flow_spec)); + +out_free_list: + list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) { + list_del(&spec->list); + kfree(spec); + } + return err; +} + +static int mlx4_en_flow_detach(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[cmd->fs.location]; + if (!rule->id) { + err = -ENOENT; + goto out; + } + + err = mlx4_flow_detach(priv->mdev->dev, rule->id); + if (err) { + en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n", + cmd->fs.location, rule->id); + goto out; + } + rule->id = 0; + memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec)); +out: + return err; + +} + +static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd, + int loc) +{ + int err = 0; + struct ethtool_flow_id *rule; + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES) + return -EINVAL; + + rule = &priv->ethtool_rules[loc]; + if (rule->id) + memcpy(&cmd->fs, &rule->flow_spec, + sizeof(struct ethtool_rx_flow_spec)); + else + err = -ENOENT; + + return err; +} + +static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv) +{ + + int i, res = 0; + for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) { + if (priv->ethtool_rules[i].id) + res++; + } + return res; + +} + static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; int err = 0; + int i = 0, priority = 0; + + if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT || + cmd->cmd == ETHTOOL_GRXCLSRULE || + cmd->cmd == ETHTOOL_GRXCLSRLALL) && + mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EINVAL; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: cmd->data = priv->rx_ring_num; break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = mlx4_en_get_num_flows(priv); + break; + case ETHTOOL_GRXCLSRULE: + err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { + err = mlx4_en_get_flow(dev, cmd, i); + if (!err) + rule_locs[priority++] = i; + i++; + } + err = 0; + break; default: err = -EOPNOTSUPP; break; @@ -617,6 +974,30 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return err; } +static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EINVAL; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx4_en_flow_replace(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx4_en_flow_detach(dev, cmd); + break; + default: + en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd); + return -EINVAL; + } + + return err; +} + const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, @@ -637,6 +1018,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_ringparam = mlx4_en_get_ringparam, .set_ringparam = mlx4_en_set_ringparam, .get_rxnfc = mlx4_en_get_rxnfc, + .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, .get_rxfh_indir = mlx4_en_get_rxfh_indir, .set_rxfh_indir = mlx4_en_set_rxfh_indir, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 073b85b45fc5..94375a8c6d42 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -170,33 +170,81 @@ static void mlx4_en_do_set_mac(struct work_struct *work) static void mlx4_en_clear_list(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_mc_list *tmp, *mc_to_del; - kfree(priv->mc_addrs); - priv->mc_addrs = NULL; - priv->mc_addrs_cnt = 0; + list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { + list_del(&mc_to_del->list); + kfree(mc_to_del); + } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct netdev_hw_addr *ha; - char *mc_addrs; - int mc_addrs_cnt = netdev_mc_count(dev); - int i; + struct mlx4_en_mc_list *tmp; - mc_addrs = kmalloc(mc_addrs_cnt * ETH_ALEN, GFP_ATOMIC); - if (!mc_addrs) { - en_err(priv, "failed to allocate multicast list\n"); - return; - } - i = 0; - netdev_for_each_mc_addr(ha, dev) - memcpy(mc_addrs + i++ * ETH_ALEN, ha->addr, ETH_ALEN); mlx4_en_clear_list(dev); - priv->mc_addrs = mc_addrs; - priv->mc_addrs_cnt = mc_addrs_cnt; + netdev_for_each_mc_addr(ha, dev) { + tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); + if (!tmp) { + en_err(priv, "failed to allocate multicast list\n"); + mlx4_en_clear_list(dev); + return; + } + memcpy(tmp->addr, ha->addr, ETH_ALEN); + list_add_tail(&tmp->list, &priv->mc_list); + } } +static void update_mclist_flags(struct mlx4_en_priv *priv, + struct list_head *dst, + struct list_head *src) +{ + struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; + bool found; + + /* Find all the entries that should be removed from dst, + * These are the entries that are not found in src + */ + list_for_each_entry(dst_tmp, dst, list) { + found = false; + list_for_each_entry(src_tmp, src, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + found = true; + break; + } + } + if (!found) + dst_tmp->action = MCLIST_REM; + } + + /* Add entries that exist in src but not in dst + * mark them as need to add + */ + list_for_each_entry(src_tmp, src, list) { + found = false; + list_for_each_entry(dst_tmp, dst, list) { + if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + dst_tmp->action = MCLIST_NONE; + found = true; + break; + } + } + if (!found) { + new_mc = kmalloc(sizeof(struct mlx4_en_mc_list), + GFP_KERNEL); + if (!new_mc) { + en_err(priv, "Failed to allocate current multicast list\n"); + return; + } + memcpy(new_mc, src_tmp, + sizeof(struct mlx4_en_mc_list)); + new_mc->action = MCLIST_ADD; + list_add_tail(&new_mc->list, dst); + } + } +} static void mlx4_en_set_multicast(struct net_device *dev) { @@ -214,9 +262,10 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) mcast_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; + struct mlx4_en_mc_list *mclist, *tmp; u64 mcast_addr = 0; u8 mc_list[16] = {0}; - int err; + int err = 0; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { @@ -251,16 +300,46 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ - if (!(mdev->dev->caps.flags & - MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 1); - else - err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_PROMISC_UPLINK); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_add(mdev->dev, + priv->base_qpn, priv->port); - if (err) - en_err(priv, "Failed enabling " - "promiscuous mode\n"); + if (err) + en_err(priv, "Failed enabling unicast promiscuous mode\n"); + + /* Add the default qp number as multicast + * promisc + */ + if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed enabling multicast promiscuous mode\n"); + priv->flags |= MLX4_EN_FLAG_MC_PROMISC; + } + break; + + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, + 1); + if (err) + en_err(priv, "Failed enabling promiscuous mode\n"); + break; + } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, @@ -269,15 +348,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) en_err(priv, "Failed disabling " "multicast filter\n"); - /* Add the default qp number as multicast promisc */ - if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { - err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, - priv->port); - if (err) - en_err(priv, "Failed entering multicast promisc mode\n"); - priv->flags |= MLX4_EN_FLAG_MC_PROMISC; - } - /* Disable port VLAN filter */ err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) @@ -296,22 +366,40 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ - if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, - priv->base_qpn, 0); - else - err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_UPLINK); + if (err) + en_err(priv, "Failed disabling promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_unicast_promisc_remove(mdev->dev, + priv->base_qpn, priv->port); - if (err) - en_err(priv, "Failed disabling promiscuous mode\n"); + if (err) + en_err(priv, "Failed disabling unicast promiscuous mode\n"); + /* Disable Multicast promisc */ + if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + if (err) + en_err(priv, "Failed disabling multicast promiscuous mode\n"); + priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + } + break; - /* Disable Multicast promisc */ - if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { - err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, - priv->port); + case MLX4_STEERING_MODE_A0: + err = mlx4_SET_PORT_qpn_calc(mdev->dev, + priv->port, + priv->base_qpn, 0); if (err) - en_err(priv, "Failed disabling multicast promiscuous mode\n"); - priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; + en_err(priv, "Failed disabling promiscuous mode\n"); + break; } /* Enable port VLAN filter */ @@ -329,18 +417,46 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { - err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, - priv->port); + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_add(mdev->dev, + priv->port, + priv->base_qpn, + MLX4_FS_PROMISC_ALL_MULTI); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_add(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { - int i; /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { - err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, - priv->port); + switch (mdev->dev->caps.steering_mode) { + case MLX4_STEERING_MODE_DEVICE_MANAGED: + err = mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_ALL_MULTI); + break; + + case MLX4_STEERING_MODE_B0: + err = mlx4_multicast_promisc_remove(mdev->dev, + priv->base_qpn, + priv->port); + break; + + case MLX4_STEERING_MODE_A0: + break; + } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; @@ -351,13 +467,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) if (err) en_err(priv, "Failed disabling multicast filter\n"); - /* Detach our qp from all the multicast addresses */ - for (i = 0; i < priv->mc_addrs_cnt; i++) { - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); - mc_list[5] = priv->port; - mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, MLX4_PROT_ETH); - } /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); @@ -367,13 +476,8 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) netif_tx_lock_bh(dev); mlx4_en_cache_mclist(dev); netif_tx_unlock_bh(dev); - for (i = 0; i < priv->mc_addrs_cnt; i++) { - mcast_addr = - mlx4_en_mac_to_u64(priv->mc_addrs + i * ETH_ALEN); - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); - mc_list[5] = priv->port; - mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, 0, MLX4_PROT_ETH); + list_for_each_entry(mclist, &priv->mc_list, list) { + mcast_addr = mlx4_en_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } @@ -381,6 +485,42 @@ static void mlx4_en_do_set_multicast(struct work_struct *work) 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); + + update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + if (mclist->action == MCLIST_REM) { + /* detach this address and delete from list */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + mc_list[5] = priv->port; + err = mlx4_multicast_detach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, + MLX4_PROT_ETH, + mclist->reg_id); + if (err) + en_err(priv, "Fail to detach multicast address\n"); + + /* remove from list */ + list_del(&mclist->list); + kfree(mclist); + } + + if (mclist->action == MCLIST_ADD) { + /* attach the address */ + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); + /* needed for B0 steering support */ + mc_list[5] = priv->port; + err = mlx4_multicast_attach(mdev->dev, + &priv->rss_map.indir_qp, + mc_list, + priv->port, 0, + MLX4_PROT_ETH, + &mclist->reg_id); + if (err) + en_err(priv, "Fail to attach multicast address\n"); + + } + } } out: mutex_unlock(&mdev->state_lock); @@ -605,6 +745,9 @@ int mlx4_en_start_port(struct net_device *dev) return 0; } + INIT_LIST_HEAD(&priv->mc_list); + INIT_LIST_HEAD(&priv->curr_list); + /* Calculate Rx buf size */ dev->mtu = min(dev->mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); @@ -653,6 +796,10 @@ int mlx4_en_start_port(struct net_device *dev) goto mac_err; } + err = mlx4_en_create_drop_qp(priv); + if (err) + goto rss_err; + /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ @@ -720,13 +867,23 @@ int mlx4_en_start_port(struct net_device *dev) /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); - mc_list[5] = priv->port; + mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, - 0, MLX4_PROT_ETH)) + priv->port, 0, MLX4_PROT_ETH, + &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_UPLINK); + mlx4_flow_steer_promisc_remove(mdev->dev, + priv->port, + MLX4_FS_PROMISC_ALL_MULTI); + } /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->mcast_task); @@ -742,7 +899,8 @@ tx_err: mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]); } - + mlx4_en_destroy_drop_qp(priv); +rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn); @@ -760,6 +918,7 @@ void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_mc_list *mclist, *tmp; int i; u8 mc_list[16] = {0}; @@ -778,19 +937,26 @@ void mlx4_en_stop_port(struct net_device *dev) /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); - mc_list[5] = priv->port; + mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, - MLX4_PROT_ETH); - for (i = 0; i < priv->mc_addrs_cnt; i++) { - memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN); + MLX4_PROT_ETH, priv->broadcast_id); + list_for_each_entry(mclist, &priv->curr_list, list) { + memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, - mc_list, MLX4_PROT_ETH); + mc_list, MLX4_PROT_ETH, mclist->reg_id); } mlx4_en_clear_list(dev); + list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { + list_del(&mclist->list); + kfree(mclist); + } + /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); + mlx4_en_destroy_drop_qp(priv); + /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index d49a7ac3187d..a04cbf767eb5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -844,6 +844,36 @@ out: return err; } +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) +{ + int err; + u32 qpn; + + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn); + if (err) { + en_err(priv, "Failed reserving drop qpn\n"); + return err; + } + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); + if (err) { + en_err(priv, "Failed allocating drop qp\n"); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); + return err; + } + + return 0; +} + +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) +{ + u32 qpn; + + qpn = priv->drop_qp.qpn; + mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); + mlx4_qp_release_range(priv->mdev->dev, qpn, 1); +} + /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9c83bb8151ea..1d70657058a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -123,7 +123,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) static const char * const fname[] = { [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", - [2] = "RSS XOR Hash Function support" + [2] = "RSS XOR Hash Function support", + [3] = "Device manage flow steering support" }; int i; @@ -391,6 +392,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 +#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 +#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -474,6 +477,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->num_ports = field & 0xf; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET); dev_cap->max_msg_sz = 1 << (field & 0x1f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; + dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); + dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); dev_cap->stat_rate_support = stat_rate; MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); @@ -1061,6 +1070,15 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 +#define INIT_HCA_FS_PARAM_OFFSET 0x1d0 +#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) +#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) +#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) +#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25) +#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) @@ -1119,14 +1137,44 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); - /* multicast attributes */ - - MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); - MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) - MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET); - MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + /* steering attributes */ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= + cpu_to_be32(1 << + INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN); + + MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + /* Enable Ethernet flow steering + * with udp unicast and tcp unicast + */ + MLX4_PUT(inbox, param->fs_hash_enable_bits, + INIT_HCA_FS_ETH_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); + /* Enable IPoIB flow steering + * with udp unicast and tcp unicast + */ + MLX4_PUT(inbox, param->fs_hash_enable_bits, + INIT_HCA_FS_IB_BITS_OFFSET); + MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, + INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); + } else { + MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MLX4_PUT(inbox, param->log_mc_entry_sz, + INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_hash_sz, + INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_PUT(inbox, param->log_mc_table_sz, + INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0) + MLX4_PUT(inbox, (u8) (1 << 3), + INIT_HCA_UC_STEERING_OFFSET); + } /* TPT attributes */ @@ -1188,15 +1236,24 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); - /* multicast attributes */ + /* steering attributes */ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { - MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); + MLX4_GET(param->log_mc_entry_sz, outbox, + INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + MLX4_GET(param->log_mc_table_sz, outbox, + INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + } else { + MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); + MLX4_GET(param->log_mc_entry_sz, outbox, + INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MLX4_GET(param->log_mc_hash_sz, outbox, + INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + MLX4_GET(param->log_mc_table_sz, outbox, + INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + } /* TPT attributes */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 64c0399e4b78..83fcbbf1b169 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -78,6 +78,8 @@ struct mlx4_dev_cap { u16 wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; + int fs_log_max_ucast_qp_range_size; + int fs_max_num_qp_per_entry; u64 flags; u64 flags2; int reserved_uars; @@ -165,6 +167,7 @@ struct mlx4_init_hca_param { u8 log_mpt_sz; u8 log_uar_sz; u8 uar_page_sz; /* log pg sz in 4k chunks */ + u8 fs_hash_enable_bits; }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a0313de122de..42645166bae2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -41,6 +41,7 @@ #include <linux/slab.h> #include <linux/io-mapping.h> #include <linux/delay.h> +#include <linux/netdevice.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> @@ -90,7 +91,9 @@ module_param_named(log_num_mgm_entry_size, MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 9<=" - " log_num_mgm_entry_size <= 12"); + " log_num_mgm_entry_size <= 12." + " Not in use with device managed" + " flow steering"); #define MLX4_VF (1 << 0) @@ -243,7 +246,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; - dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an @@ -274,6 +276,28 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { + dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; + dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; + dev->caps.fs_log_max_ucast_qp_range_size = + dev_cap->fs_log_max_ucast_qp_range_size; + } else { + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) { + dev->caps.steering_mode = MLX4_STEERING_MODE_B0; + } else { + dev->caps.steering_mode = MLX4_STEERING_MODE_A0; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || + dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) + mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags " + "set to use B0 steering. Falling back to A0 steering mode.\n"); + } + dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); + } + mlx4_dbg(dev, "Steering mode is: %s\n", + mlx4_steering_mode_str(dev->caps.steering_mode)); + /* Sense port always allowed on supported devices for ConnectX1 and 2 */ if (dev->pdev->device != 0x1003) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; @@ -967,9 +991,11 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, } /* - * It's not strictly required, but for simplicity just map the - * whole multicast group table now. The table isn't very big - * and it's a lot easier than trying to track ref counts. + * For flow steering device managed mode it is required to use + * mlx4_init_icm_table. For B0 steering mode it's not strictly + * required, but for simplicity just map the whole multicast + * group table now. The table isn't very big and it's a lot + * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, @@ -1205,7 +1231,26 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto err_stop_fw; } + priv->fs_hash_mode = MLX4_FS_L2_HASH; + + switch (priv->fs_hash_mode) { + case MLX4_FS_L2_HASH: + init_hca.fs_hash_enable_bits = 0; + break; + + case MLX4_FS_L2_L3_L4_HASH: + /* Enable flow steering with + * udp unicast and tcp unicast + */ + init_hca.fs_hash_enable_bits = + MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN; + break; + } + profile = default_profile; + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca); @@ -1539,8 +1584,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * - min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT) - + MSIX_LEGACY_SZ, MAX_MSIX); + min_t(int, netif_get_num_default_rss_queues() + 1, + MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f4a8f98e402a..bc62f536ffae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -41,6 +41,7 @@ #define MGM_QPN_MASK 0x00FFFFFF #define MGM_BLCK_LB_BIT 30 +#define MLX4_MAC_MASK 0xffffffffffffULL static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -54,7 +55,12 @@ struct mlx4_mgm { int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { - return min((1 << mlx4_log_num_mgm_entry_size), MLX4_MAX_MGM_ENTRY_SIZE); + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE; + else + return min((1 << mlx4_log_num_mgm_entry_size), + MLX4_MAX_MGM_ENTRY_SIZE); } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) @@ -62,6 +68,35 @@ int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); } +static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox, + u32 size, + u64 *reg_id) +{ + u64 imm; + int err = 0; + + err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + return err; + *reg_id = imm; + + return err; +} + +static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) +{ + int err = 0; + + err = mlx4_cmd(dev, regid, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + return err; +} + static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { @@ -614,6 +649,311 @@ static int find_entry(struct mlx4_dev *dev, u8 port, return err; } +struct mlx4_net_trans_rule_hw_ctrl { + __be32 ctrl; + __be32 vf_vep_port; + __be32 qpn; + __be32 reserved; +}; + +static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, + struct mlx4_net_trans_rule_hw_ctrl *hw) +{ + static const u8 __promisc_mode[] = { + [MLX4_FS_PROMISC_NONE] = 0x0, + [MLX4_FS_PROMISC_UPLINK] = 0x1, + [MLX4_FS_PROMISC_FUNCTION_PORT] = 0x2, + [MLX4_FS_PROMISC_ALL_MULTI] = 0x3, + }; + + u32 dw = 0; + + dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; + dw |= ctrl->exclusive ? (1 << 2) : 0; + dw |= ctrl->allow_loopback ? (1 << 3) : 0; + dw |= __promisc_mode[ctrl->promisc_mode] << 8; + dw |= ctrl->priority << 16; + + hw->ctrl = cpu_to_be32(dw); + hw->vf_vep_port = cpu_to_be32(ctrl->port); + hw->qpn = cpu_to_be32(ctrl->qpn); +} + +struct mlx4_net_trans_rule_hw_ib { + u8 size; + u8 rsvd1; + __be16 id; + u32 rsvd2; + __be32 qpn; + __be32 qpn_mask; + u8 dst_gid[16]; + u8 dst_gid_msk[16]; +} __packed; + +struct mlx4_net_trans_rule_hw_eth { + u8 size; + u8 rsvd; + __be16 id; + u8 rsvd1[6]; + u8 dst_mac[6]; + u16 rsvd2; + u8 dst_mac_msk[6]; + u16 rsvd3; + u8 src_mac[6]; + u16 rsvd4; + u8 src_mac_msk[6]; + u8 rsvd5; + u8 ether_type_enable; + __be16 ether_type; + __be16 vlan_id_msk; + __be16 vlan_id; +} __packed; + +struct mlx4_net_trans_rule_hw_tcp_udp { + u8 size; + u8 rsvd; + __be16 id; + __be16 rsvd1[3]; + __be16 dst_port; + __be16 rsvd2; + __be16 dst_port_msk; + __be16 rsvd3; + __be16 src_port; + __be16 rsvd4; + __be16 src_port_msk; +} __packed; + +struct mlx4_net_trans_rule_hw_ipv4 { + u8 size; + u8 rsvd; + __be16 id; + __be32 rsvd1; + __be32 dst_ip; + __be32 dst_ip_msk; + __be32 src_ip; + __be32 src_ip_msk; +} __packed; + +struct _rule_hw { + union { + struct { + u8 size; + u8 rsvd; + __be16 id; + }; + struct mlx4_net_trans_rule_hw_eth eth; + struct mlx4_net_trans_rule_hw_ib ib; + struct mlx4_net_trans_rule_hw_ipv4 ipv4; + struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; + }; +}; + +static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, + struct _rule_hw *rule_hw) +{ + static const u16 __sw_id_hw[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, + [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, + [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, + [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, + [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 + }; + + static const size_t __rule_hw_sz[] = { + [MLX4_NET_TRANS_RULE_ID_ETH] = + sizeof(struct mlx4_net_trans_rule_hw_eth), + [MLX4_NET_TRANS_RULE_ID_IB] = + sizeof(struct mlx4_net_trans_rule_hw_ib), + [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, + [MLX4_NET_TRANS_RULE_ID_IPV4] = + sizeof(struct mlx4_net_trans_rule_hw_ipv4), + [MLX4_NET_TRANS_RULE_ID_TCP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_UDP] = + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) + }; + if (spec->id > MLX4_NET_TRANS_RULE_NUM) { + mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id); + return -EINVAL; + } + memset(rule_hw, 0, __rule_hw_sz[spec->id]); + rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); + rule_hw->size = __rule_hw_sz[spec->id] >> 2; + + switch (spec->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); + memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, + ETH_ALEN); + memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); + memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, + ETH_ALEN); + if (spec->eth.ether_type_enable) { + rule_hw->eth.ether_type_enable = 1; + rule_hw->eth.ether_type = spec->eth.ether_type; + } + rule_hw->eth.vlan_id = spec->eth.vlan_id; + rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + rule_hw->ib.qpn = spec->ib.r_qpn; + rule_hw->ib.qpn_mask = spec->ib.qpn_msk; + memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); + memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV6: + return -EOPNOTSUPP; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + rule_hw->ipv4.src_ip = spec->ipv4.src_ip; + rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; + rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; + rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; + rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; + rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; + rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; + break; + + default: + return -EINVAL; + } + + return __rule_hw_sz[spec->id]; +} + +static void mlx4_err_rule(struct mlx4_dev *dev, char *str, + struct mlx4_net_trans_rule *rule) +{ +#define BUF_SIZE 256 + struct mlx4_spec_list *cur; + char buf[BUF_SIZE]; + int len = 0; + + mlx4_err(dev, "%s", str); + len += snprintf(buf + len, BUF_SIZE - len, + "port = %d prio = 0x%x qp = 0x%x ", + rule->port, rule->priority, rule->qpn); + + list_for_each_entry(cur, &rule->list, list) { + switch (cur->id) { + case MLX4_NET_TRANS_RULE_ID_ETH: + len += snprintf(buf + len, BUF_SIZE - len, + "dmac = %pM ", &cur->eth.dst_mac); + if (cur->eth.ether_type) + len += snprintf(buf + len, BUF_SIZE - len, + "ethertype = 0x%x ", + be16_to_cpu(cur->eth.ether_type)); + if (cur->eth.vlan_id) + len += snprintf(buf + len, BUF_SIZE - len, + "vlan-id = %d ", + be16_to_cpu(cur->eth.vlan_id)); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV4: + if (cur->ipv4.src_ip) + len += snprintf(buf + len, BUF_SIZE - len, + "src-ip = %pI4 ", + &cur->ipv4.src_ip); + if (cur->ipv4.dst_ip) + len += snprintf(buf + len, BUF_SIZE - len, + "dst-ip = %pI4 ", + &cur->ipv4.dst_ip); + break; + + case MLX4_NET_TRANS_RULE_ID_TCP: + case MLX4_NET_TRANS_RULE_ID_UDP: + if (cur->tcp_udp.src_port) + len += snprintf(buf + len, BUF_SIZE - len, + "src-port = %d ", + be16_to_cpu(cur->tcp_udp.src_port)); + if (cur->tcp_udp.dst_port) + len += snprintf(buf + len, BUF_SIZE - len, + "dst-port = %d ", + be16_to_cpu(cur->tcp_udp.dst_port)); + break; + + case MLX4_NET_TRANS_RULE_ID_IB: + len += snprintf(buf + len, BUF_SIZE - len, + "dst-gid = %pI6\n", cur->ib.dst_gid); + len += snprintf(buf + len, BUF_SIZE - len, + "dst-gid-mask = %pI6\n", + cur->ib.dst_gid_msk); + break; + + case MLX4_NET_TRANS_RULE_ID_IPV6: + break; + + default: + break; + } + } + len += snprintf(buf + len, BUF_SIZE - len, "\n"); + mlx4_err(dev, "%s", buf); + + if (len >= BUF_SIZE) + mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n"); +} + +int mlx4_flow_attach(struct mlx4_dev *dev, + struct mlx4_net_trans_rule *rule, u64 *reg_id) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_spec_list *cur; + u32 size = 0; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl)); + trans_rule_ctrl_to_hw(rule, mailbox->buf); + + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + + list_for_each_entry(cur, &rule->list, list) { + ret = parse_trans_rule(dev, cur, mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(dev, mailbox); + return -EINVAL; + } + size += ret; + } + + ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); + if (ret == -ENOMEM) + mlx4_err_rule(dev, + "mcg table is full. Fail to register network rule.\n", + rule); + else if (ret) + mlx4_err_rule(dev, "Fail to register network rule.\n", rule); + + mlx4_free_cmd_mailbox(dev, mailbox); + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_attach); + +int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + + err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); + if (err) + mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", + reg_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_flow_detach); + int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) @@ -866,49 +1206,159 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback, enum mlx4_protocol prot) + u8 port, int block_mcast_loopback, + enum mlx4_protocol prot, u64 *reg_id) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; + + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); + + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 1, + block_mcast_loopback, prot); + return mlx4_qp_attach_common(dev, qp, gid, + block_mcast_loopback, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.allow_loopback = ~block_mcast_loopback; + rule.port = port; + rule.qpn = qp->qpn; + INIT_LIST_HEAD(&rule.list); + + switch (prot) { + case MLX4_PROT_ETH: + spec.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); + memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + break; - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 1, - block_mcast_loopback, prot); + case MLX4_PROT_IB_IPV6: + spec.id = MLX4_NET_TRANS_RULE_ID_IB; + memcpy(spec.ib.dst_gid, gid, 16); + memset(&spec.ib.dst_gid_msk, 0xff, 16); + break; + default: + return -EINVAL; + } + list_add_tail(&spec.list, &rule.list); - return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, - prot, MLX4_MC_STEER); + return mlx4_flow_attach(dev, &rule, reg_id); + } + + default: + return -EINVAL; + } } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol prot) + enum mlx4_protocol prot, u64 reg_id) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_A0: + if (prot == MLX4_PROT_ETH) + return 0; - if (prot == MLX4_PROT_ETH) - gid[7] |= (MLX4_MC_STEER << 1); + case MLX4_STEERING_MODE_B0: + if (prot == MLX4_PROT_ETH) + gid[7] |= (MLX4_MC_STEER << 1); - if (mlx4_is_mfunc(dev)) - return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + if (mlx4_is_mfunc(dev)) + return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); + + return mlx4_qp_detach_common(dev, qp, gid, prot, + MLX4_MC_STEER); + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return mlx4_flow_detach(dev, reg_id); - return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); + default: + return -EINVAL; + } } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); +int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, + u32 qpn, enum mlx4_net_trans_promisc_mode mode) +{ + struct mlx4_net_trans_rule rule; + u64 *regid_p; + + switch (mode) { + case MLX4_FS_PROMISC_UPLINK: + case MLX4_FS_PROMISC_FUNCTION_PORT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_PROMISC_ALL_MULTI: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p != 0) + return -1; + + rule.promisc_mode = mode; + rule.port = port; + rule.qpn = qpn; + INIT_LIST_HEAD(&rule.list); + mlx4_err(dev, "going promisc on %x\n", port); + + return mlx4_flow_attach(dev, &rule, regid_p); +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); + +int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, + enum mlx4_net_trans_promisc_mode mode) +{ + int ret; + u64 *regid_p; + + switch (mode) { + case MLX4_FS_PROMISC_UPLINK: + case MLX4_FS_PROMISC_FUNCTION_PORT: + regid_p = &dev->regid_promisc_array[port]; + break; + case MLX4_FS_PROMISC_ALL_MULTI: + regid_p = &dev->regid_allmulti_array[port]; + break; + default: + return -1; + } + + if (*regid_p == 0) + return -1; + + ret = mlx4_flow_detach(dev, *regid_p); + if (ret == 0) + *regid_p = 0; + + return ret; +} +EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); + int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); @@ -924,10 +1374,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_attach); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { - if (prot == MLX4_PROT_ETH && - !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); @@ -968,9 +1414,6 @@ static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); @@ -980,9 +1423,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); @@ -992,9 +1432,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); @@ -1004,9 +1441,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) - return 0; - if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); @@ -1019,6 +1453,10 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int err; + /* No need for mcg_table when fw managed the mcg table*/ + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) + return 0; err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, dev->caps.num_amgms - 1, 0, 0); if (err) @@ -1031,5 +1469,7 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev) void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) { - mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e5d20220762c..d2c436b10fbf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -39,6 +39,7 @@ #include <linux/mutex.h> #include <linux/radix-tree.h> +#include <linux/rbtree.h> #include <linux/timer.h> #include <linux/semaphore.h> #include <linux/workqueue.h> @@ -53,6 +54,17 @@ #define DRV_VERSION "1.1" #define DRV_RELDATE "Dec, 2011" +#define MLX4_FS_UDP_UC_EN (1 << 1) +#define MLX4_FS_TCP_UC_EN (1 << 2) +#define MLX4_FS_NUM_OF_L2_ADDR 8 +#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 +#define MLX4_FS_NUM_MCG (1 << 17) + +enum { + MLX4_FS_L2_HASH = 0, + MLX4_FS_L2_L3_L4_HASH, +}; + #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 #define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */ @@ -137,6 +149,7 @@ enum mlx4_resource { RES_VLAN, RES_EQ, RES_COUNTER, + RES_FS_RULE, MLX4_NUM_OF_RESOURCE_TYPE }; @@ -509,7 +522,7 @@ struct slave_list { struct mlx4_resource_tracker { spinlock_t lock; /* tree for each resources */ - struct radix_tree_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; + struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE]; /* num_of_slave's lists, one per slave */ struct slave_list *slave_list; }; @@ -703,6 +716,7 @@ struct mlx4_set_port_rqp_calc_context { struct mlx4_mac_entry { u64 mac; + u64 reg_id; }; struct mlx4_port_info { @@ -776,6 +790,7 @@ struct mlx4_priv { struct mutex bf_mutex; struct io_mapping *bf_mapping; int reserved_mtts; + int fs_hash_mode; }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -1032,7 +1047,7 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port); /* resource tracker functions*/ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource resource_type, - int resource_id, int *slave); + u64 resource_id, int *slave); void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id); int mlx4_init_resource_tracker(struct mlx4_dev *dev); @@ -1117,6 +1132,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 225c20d47900..a12632150b34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -75,6 +75,7 @@ #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) +#define MAX_NUM_OF_FS_RULES 256 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 @@ -404,6 +405,19 @@ struct mlx4_en_perf_stats { #define NUM_PERF_COUNTERS 6 }; +enum mlx4_en_mclist_act { + MCLIST_NONE, + MCLIST_REM, + MCLIST_ADD, +}; + +struct mlx4_en_mc_list { + struct list_head list; + enum mlx4_en_mclist_act action; + u8 addr[ETH_ALEN]; + u64 reg_id; +}; + struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; @@ -422,6 +436,11 @@ struct mlx4_en_frag_info { #endif +struct ethtool_flow_id { + struct ethtool_rx_flow_spec flow_spec; + u64 id; +}; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -431,6 +450,7 @@ struct mlx4_en_priv { struct net_device_stats ret_stats; struct mlx4_en_port_state port_state; spinlock_t stats_lock; + struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; @@ -480,6 +500,7 @@ struct mlx4_en_priv { struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS]; struct mlx4_en_cq *tx_cq; struct mlx4_en_cq rx_cq[MAX_RX_RINGS]; + struct mlx4_qp drop_qp; struct work_struct mcast_task; struct work_struct mac_task; struct work_struct watchdog_task; @@ -489,8 +510,9 @@ struct mlx4_en_priv { struct mlx4_en_pkt_stats pkstats; struct mlx4_en_port_stats port_stats; u64 stats_bitmap; - char *mc_addrs; - int mc_addrs_cnt; + struct list_head mc_list; + struct list_head curr_list; + u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; @@ -565,6 +587,8 @@ void mlx4_en_unmap_buffer(struct mlx4_buf *buf); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); +int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); +void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index a8fb52992c64..a51d1b9bf1d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -75,21 +75,54 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table) table->total = 0; } -static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) +static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, + u64 mac, int *qpn, u64 *reg_id) { - struct mlx4_qp qp; - u8 gid[16] = {0}; __be64 be_mac; int err; - qp.qpn = *qpn; - - mac &= 0xffffffffffffULL; + mac &= MLX4_MAC_MASK; be_mac = cpu_to_be64(mac << 16); - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; - err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + + qp.qpn = *qpn; + memcpy(&gid[10], &be_mac, ETH_ALEN); + gid[5] = port; + + err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + struct mlx4_spec_list spec_eth = { {NULL} }; + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_PROMISC_NONE, + .priority = MLX4_DOMAIN_NIC, + }; + + rule.port = port; + rule.qpn = *qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN); + memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + list_add_tail(&spec_eth.list, &rule.list); + + err = mlx4_flow_attach(dev, &rule, reg_id); + break; + } + default: + return -EINVAL; + } if (err) mlx4_warn(dev, "Failed Attaching Unicast\n"); @@ -97,19 +130,30 @@ static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) } static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port, - u64 mac, int qpn) + u64 mac, int qpn, u64 reg_id) { - struct mlx4_qp qp; - u8 gid[16] = {0}; - __be64 be_mac; + switch (dev->caps.steering_mode) { + case MLX4_STEERING_MODE_B0: { + struct mlx4_qp qp; + u8 gid[16] = {0}; + __be64 be_mac; - qp.qpn = qpn; - mac &= 0xffffffffffffULL; - be_mac = cpu_to_be64(mac << 16); - memcpy(&gid[10], &be_mac, ETH_ALEN); - gid[5] = port; + qp.qpn = qpn; + mac &= MLX4_MAC_MASK; + be_mac = cpu_to_be64(mac << 16); + memcpy(&gid[10], &be_mac, ETH_ALEN); + gid[5] = port; - mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); + break; + } + case MLX4_STEERING_MODE_DEVICE_MANAGED: { + mlx4_flow_detach(dev, reg_id); + break; + } + default: + mlx4_err(dev, "Invalid steering mode.\n"); + } } static int validate_index(struct mlx4_dev *dev, @@ -144,6 +188,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) struct mlx4_mac_entry *entry; int index = 0; int err = 0; + u64 reg_id; mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n", (unsigned long long) mac); @@ -155,7 +200,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) return err; } - if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) { + if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { *qpn = info->base_qpn + index; return 0; } @@ -167,7 +212,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) goto qp_err; } - err = mlx4_uc_steer_add(dev, port, mac, qpn); + err = mlx4_uc_steer_add(dev, port, mac, qpn, ®_id); if (err) goto steer_err; @@ -177,6 +222,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn) goto alloc_err; } entry->mac = mac; + entry->reg_id = reg_id; err = radix_tree_insert(&info->mac_tree, *qpn, entry); if (err) goto insert_err; @@ -186,7 +232,7 @@ insert_err: kfree(entry); alloc_err: - mlx4_uc_steer_release(dev, port, mac, *qpn); + mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id); steer_err: mlx4_qp_release_range(dev, *qpn, 1); @@ -206,13 +252,14 @@ void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn) (unsigned long long) mac); mlx4_unregister_mac(dev, port, mac); - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { entry = radix_tree_lookup(&info->mac_tree, qpn); if (entry) { mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx," " qpn %d\n", port, (unsigned long long) mac, qpn); - mlx4_uc_steer_release(dev, port, entry->mac, qpn); + mlx4_uc_steer_release(dev, port, entry->mac, + qpn, entry->reg_id); mlx4_qp_release_range(dev, qpn, 1); radix_tree_delete(&info->mac_tree, qpn); kfree(entry); @@ -359,15 +406,18 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac) int index = qpn - info->base_qpn; int err = 0; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) { entry = radix_tree_lookup(&info->mac_tree, qpn); if (!entry) return -EINVAL; - mlx4_uc_steer_release(dev, port, entry->mac, qpn); + mlx4_uc_steer_release(dev, port, entry->mac, + qpn, entry->reg_id); mlx4_unregister_mac(dev, port, entry->mac); entry->mac = new_mac; + entry->reg_id = 0; mlx4_register_mac(dev, port, new_mac); - err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn); + err = mlx4_uc_steer_add(dev, port, entry->mac, + &qpn, &entry->reg_id); return err; } @@ -803,8 +853,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ? MCAST_DIRECT : MCAST_DEFAULT; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER && - dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) + if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) return 0; mailbox = mlx4_alloc_cmd_mailbox(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index b83bc928d52a..9ee4725363d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -237,13 +237,19 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, init_hca->mtt_base = profile[i].start; break; case MLX4_RES_MCG: - dev->caps.num_mgms = profile[i].num >> 1; - dev->caps.num_amgms = profile[i].num >> 1; init_hca->mc_base = profile[i].start; init_hca->log_mc_entry_sz = ilog2(mlx4_get_mgm_entry_size(dev)); init_hca->log_mc_table_sz = profile[i].log_num; - init_hca->log_mc_hash_sz = profile[i].log_num - 1; + if (dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + dev->caps.num_mgms = profile[i].num; + } else { + init_hca->log_mc_hash_sz = + profile[i].log_num - 1; + dev->caps.num_mgms = profile[i].num >> 1; + dev->caps.num_amgms = profile[i].num >> 1; + } break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 766b8c5a235e..c3fa91986190 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -57,7 +57,8 @@ struct mac_res { struct res_common { struct list_head list; - u32 res_id; + struct rb_node node; + u64 res_id; int owner; int state; int from_state; @@ -189,6 +190,58 @@ struct res_xrcdn { int port; }; +enum res_fs_rule_states { + RES_FS_RULE_BUSY = RES_ANY_BUSY, + RES_FS_RULE_ALLOCATED, +}; + +struct res_fs_rule { + struct res_common com; +}; + +static void *res_tracker_lookup(struct rb_root *root, u64 res_id) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct res_common *res = container_of(node, struct res_common, + node); + + if (res_id < res->res_id) + node = node->rb_left; + else if (res_id > res->res_id) + node = node->rb_right; + else + return res; + } + return NULL; +} + +static int res_tracker_insert(struct rb_root *root, struct res_common *res) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct res_common *this = container_of(*new, struct res_common, + node); + + parent = *new; + if (res->res_id < this->res_id) + new = &((*new)->rb_left); + else if (res->res_id > this->res_id) + new = &((*new)->rb_right); + else + return -EEXIST; + } + + /* Add new node and rebalance tree. */ + rb_link_node(&res->node, parent, new); + rb_insert_color(&res->node, root); + + return 0; +} + /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -201,6 +254,7 @@ static const char *ResourceType(enum mlx4_resource rt) case RES_MAC: return "RES_MAC"; case RES_EQ: return "RES_EQ"; case RES_COUNTER: return "RES_COUNTER"; + case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; }; @@ -228,8 +282,7 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n", dev->num_slaves); for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) - INIT_RADIX_TREE(&priv->mfunc.master.res_tracker.res_tree[i], - GFP_ATOMIC|__GFP_NOWARN); + priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT; spin_lock_init(&priv->mfunc.master.res_tracker.lock); return 0 ; @@ -277,11 +330,11 @@ static void *find_res(struct mlx4_dev *dev, int res_id, { struct mlx4_priv *priv = mlx4_priv(dev); - return radix_tree_lookup(&priv->mfunc.master.res_tracker.res_tree[type], - res_id); + return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type], + res_id); } -static int get_res(struct mlx4_dev *dev, int slave, int res_id, +static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type, void *res) { @@ -307,7 +360,7 @@ static int get_res(struct mlx4_dev *dev, int slave, int res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; - mlx4_dbg(dev, "res %s id 0x%x to busy\n", + mlx4_dbg(dev, "res %s id 0x%llx to busy\n", ResourceType(type), r->res_id); if (res) @@ -320,7 +373,7 @@ exit: int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, - int res_id, int *slave) + u64 res_id, int *slave) { struct res_common *r; @@ -341,7 +394,7 @@ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, return err; } -static void put_res(struct mlx4_dev *dev, int slave, int res_id, +static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, enum mlx4_resource type) { struct res_common *r; @@ -473,7 +526,21 @@ static struct res_common *alloc_xrcdn_tr(int id) return &ret->com; } -static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave, +static struct res_common *alloc_fs_rule_tr(u64 id) +{ + struct res_fs_rule *ret; + + ret = kzalloc(sizeof *ret, GFP_KERNEL); + if (!ret) + return NULL; + + ret->com.res_id = id; + ret->com.state = RES_FS_RULE_ALLOCATED; + + return &ret->com; +} + +static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave, int extra) { struct res_common *ret; @@ -506,6 +573,9 @@ static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave, case RES_XRCD: ret = alloc_xrcdn_tr(id); break; + case RES_FS_RULE: + ret = alloc_fs_rule_tr(id); + break; default: return NULL; } @@ -515,7 +585,7 @@ static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave, return ret; } -static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, +static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { int i; @@ -523,7 +593,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, struct mlx4_priv *priv = mlx4_priv(dev); struct res_common **res_arr; struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; - struct radix_tree_root *root = &tracker->res_tree[type]; + struct rb_root *root = &tracker->res_tree[type]; res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); if (!res_arr) @@ -546,7 +616,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, err = -EEXIST; goto undo; } - err = radix_tree_insert(root, base + i, res_arr[i]); + err = res_tracker_insert(root, res_arr[i]); if (err) goto undo; list_add_tail(&res_arr[i]->list, @@ -559,7 +629,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count, undo: for (--i; i >= base; --i) - radix_tree_delete(&tracker->res_tree[type], i); + rb_erase(&res_arr[i]->node, root); spin_unlock_irq(mlx4_tlock(dev)); @@ -638,6 +708,16 @@ static int remove_xrcdn_ok(struct res_xrcdn *res) return 0; } +static int remove_fs_rule_ok(struct res_fs_rule *res) +{ + if (res->com.state == RES_FS_RULE_BUSY) + return -EBUSY; + else if (res->com.state != RES_FS_RULE_ALLOCATED) + return -EPERM; + + return 0; +} + static int remove_cq_ok(struct res_cq *res) { if (res->com.state == RES_CQ_BUSY) @@ -679,15 +759,17 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) return remove_counter_ok((struct res_counter *)res); case RES_XRCD: return remove_xrcdn_ok((struct res_xrcdn *)res); + case RES_FS_RULE: + return remove_fs_rule_ok((struct res_fs_rule *)res); default: return -EINVAL; } } -static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, +static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, enum mlx4_resource type, int extra) { - int i; + u64 i; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; @@ -695,7 +777,7 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, spin_lock_irq(mlx4_tlock(dev)); for (i = base; i < base + count; ++i) { - r = radix_tree_lookup(&tracker->res_tree[type], i); + r = res_tracker_lookup(&tracker->res_tree[type], i); if (!r) { err = -ENOENT; goto out; @@ -710,8 +792,8 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count, } for (i = base; i < base + count; ++i) { - r = radix_tree_lookup(&tracker->res_tree[type], i); - radix_tree_delete(&tracker->res_tree[type], i); + r = res_tracker_lookup(&tracker->res_tree[type], i); + rb_erase(&r->node, &tracker->res_tree[type]); list_del(&r->list); kfree(r); } @@ -733,7 +815,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_QP], qpn); + r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -741,7 +823,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, else { switch (state) { case RES_QP_BUSY: - mlx4_dbg(dev, "%s: failed RES_QP, 0x%x\n", + mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n", __func__, r->com.res_id); err = -EBUSY; break; @@ -750,7 +832,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, if (r->com.state == RES_QP_MAPPED && !alloc) break; - mlx4_dbg(dev, "failed RES_QP, 0x%x\n", r->com.res_id); + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); err = -EINVAL; break; @@ -759,7 +841,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn, r->com.state == RES_QP_HW) break; else { - mlx4_dbg(dev, "failed RES_QP, 0x%x\n", + mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id); err = -EINVAL; } @@ -797,7 +879,7 @@ static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_MPT], index); + r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -850,7 +932,7 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_EQ], index); + r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -898,7 +980,7 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, int err; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_CQ], cqn); + r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -952,7 +1034,7 @@ static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, int err = 0; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[RES_SRQ], index); + r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); if (!r) err = -ENOENT; else if (r->com.owner != slave) @@ -1001,7 +1083,7 @@ static void res_abort_move(struct mlx4_dev *dev, int slave, struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[type], id); + r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->from_state; spin_unlock_irq(mlx4_tlock(dev)); @@ -1015,7 +1097,7 @@ static void res_end_move(struct mlx4_dev *dev, int slave, struct res_common *r; spin_lock_irq(mlx4_tlock(dev)); - r = radix_tree_lookup(&tracker->res_tree[type], id); + r = res_tracker_lookup(&tracker->res_tree[type], id); if (r && (r->owner == slave)) r->state = r->to_state; spin_unlock_irq(mlx4_tlock(dev)); @@ -2695,6 +2777,60 @@ ex_put: return err; } +int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; + + err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param, + vhcr->in_modifier, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + return err; + + err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to add flow steering resources.\n "); + /* detach rule*/ + mlx4_cmd(dev, vhcr->out_param, 0, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + } + return err; +} + +int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + + if (dev->caps.steering_mode != + MLX4_STEERING_MODE_DEVICE_MANAGED) + return -EOPNOTSUPP; + + err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0); + if (err) { + mlx4_err(dev, "Fail to remove flow steering resources.\n "); + return err; + } + + err = mlx4_cmd(dev, vhcr->in_param, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + return err; +} + enum { BUSY_MAX_RETRIES = 10 }; @@ -2751,7 +2887,7 @@ static int _move_all_busy(struct mlx4_dev *dev, int slave, if (r->state == RES_ANY_BUSY) { if (print) mlx4_dbg(dev, - "%s id 0x%x is busy\n", + "%s id 0x%llx is busy\n", ResourceType(type), r->res_id); ++busy; @@ -2817,8 +2953,8 @@ static void rem_slave_qps(struct mlx4_dev *dev, int slave) switch (state) { case RES_QP_RESERVED: spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_QP], - qp->com.res_id); + rb_erase(&qp->com.node, + &tracker->res_tree[RES_QP]); list_del(&qp->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(qp); @@ -2888,8 +3024,8 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave) case RES_SRQ_ALLOCATED: __mlx4_srq_free_icm(dev, srqn); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_SRQ], - srqn); + rb_erase(&srq->com.node, + &tracker->res_tree[RES_SRQ]); list_del(&srq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(srq); @@ -2954,8 +3090,8 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave) case RES_CQ_ALLOCATED: __mlx4_cq_free_icm(dev, cqn); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_CQ], - cqn); + rb_erase(&cq->com.node, + &tracker->res_tree[RES_CQ]); list_del(&cq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(cq); @@ -3017,8 +3153,8 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) case RES_MPT_RESERVED: __mlx4_mr_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_MPT], - mptn); + rb_erase(&mpt->com.node, + &tracker->res_tree[RES_MPT]); list_del(&mpt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mpt); @@ -3086,8 +3222,8 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) __mlx4_free_mtt_range(dev, base, mtt->order); spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_MTT], - base); + rb_erase(&mtt->com.node, + &tracker->res_tree[RES_MTT]); list_del(&mtt->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(mtt); @@ -3104,6 +3240,58 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) spin_unlock_irq(mlx4_tlock(dev)); } +static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_resource_tracker *tracker = + &priv->mfunc.master.res_tracker; + struct list_head *fs_rule_list = + &tracker->slave_list[slave].res_list[RES_FS_RULE]; + struct res_fs_rule *fs_rule; + struct res_fs_rule *tmp; + int state; + u64 base; + int err; + + err = move_all_busy(dev, slave, RES_FS_RULE); + if (err) + mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n", + slave); + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (fs_rule->com.owner == slave) { + base = fs_rule->com.res_id; + state = fs_rule->com.from_state; + while (state != 0) { + switch (state) { + case RES_FS_RULE_ALLOCATED: + /* detach rule */ + err = mlx4_cmd(dev, base, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + spin_lock_irq(mlx4_tlock(dev)); + rb_erase(&fs_rule->com.node, + &tracker->res_tree[RES_FS_RULE]); + list_del(&fs_rule->com.list); + spin_unlock_irq(mlx4_tlock(dev)); + kfree(fs_rule); + state = 0; + break; + + default: + state = 0; + } + } + } + spin_lock_irq(mlx4_tlock(dev)); + } + spin_unlock_irq(mlx4_tlock(dev)); +} + static void rem_slave_eqs(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -3133,8 +3321,8 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) switch (state) { case RES_EQ_RESERVED: spin_lock_irq(mlx4_tlock(dev)); - radix_tree_delete(&tracker->res_tree[RES_EQ], - eqn); + rb_erase(&eq->com.node, + &tracker->res_tree[RES_EQ]); list_del(&eq->com.list); spin_unlock_irq(mlx4_tlock(dev)); kfree(eq); @@ -3191,7 +3379,8 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) list_for_each_entry_safe(counter, tmp, counter_list, com.list) { if (counter->com.owner == slave) { index = counter->com.res_id; - radix_tree_delete(&tracker->res_tree[RES_COUNTER], index); + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); list_del(&counter->com.list); kfree(counter); __mlx4_counter_free(dev, index); @@ -3220,7 +3409,7 @@ static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) { if (xrcd->com.owner == slave) { xrcdn = xrcd->com.res_id; - radix_tree_delete(&tracker->res_tree[RES_XRCD], xrcdn); + rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]); list_del(&xrcd->com.list); kfree(xrcd); __mlx4_xrcd_free(dev, xrcdn); @@ -3244,5 +3433,6 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) rem_slave_mtts(dev, slave); rem_slave_counters(dev, slave); rem_slave_xrcdns(dev, slave); + rem_slave_fs_rule(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 90153fc983cb..fa85cf1353fd 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3775,7 +3775,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp) mgp->num_slices = 1; msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX); - ncpus = num_online_cpus(); + ncpus = netif_get_num_default_rss_queues(); if (myri10ge_max_slices == 1 || msix_cap == 0 || (myri10ge_max_slices == -1 && ncpus < 2)) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 2578eb1f025d..2fd1edbc5e0e 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3687,7 +3687,8 @@ static int __devinit vxge_config_vpaths( return 0; if (!driver_config->g_no_cpus) - driver_config->g_no_cpus = num_online_cpus(); + driver_config->g_no_cpus = + netif_get_num_default_rss_queues(); driver_config->vpath_per_dev = driver_config->g_no_cpus >> 1; if (!driver_config->vpath_per_dev) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 09d8d33171df..3c3499d928b9 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4649,7 +4649,7 @@ static int __devinit qlge_probe(struct pci_dev *pdev, int err = 0; ndev = alloc_etherdev_mq(sizeof(struct ql_adapter), - min(MAX_CPUS, (int)num_online_cpus())); + min(MAX_CPUS, netif_get_num_default_rss_queues())); if (!ndev) return -ENOMEM; diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index d1827e887f4e..9acc026f62e8 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1256,7 +1256,6 @@ static void __devexit r6040_remove_one(struct pci_dev *pdev) kfree(lp->mii_bus->irq); mdiobus_free(lp->mii_bus); netif_napi_del(&lp->napi); - pci_set_drvdata(pdev, NULL); pci_iounmap(pdev, lp->base); pci_release_regions(pdev); free_netdev(dev); diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index 3ae80eccd0ef..6564c32d3af0 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -358,14 +358,30 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, padlen = ((skb->len + 4) & (dev->maxpacket - 1)) ? 0 : 4; - if ((!skb_cloned(skb)) && - ((headroom + tailroom) >= (4 + padlen))) { - if ((headroom < 4) || (tailroom < padlen)) { + /* We need to push 4 bytes in front of frame (packet_len) + * and maybe add 4 bytes after the end (if padlen is 4) + * + * Avoid skb_copy_expand() expensive call, using following rules : + * - We are allowed to push 4 bytes in headroom if skb_header_cloned() + * is false (and if we have 4 bytes of headroom) + * - We are allowed to put 4 bytes at tail if skb_cloned() + * is false (and if we have 4 bytes of tailroom) + * + * TCP packets for example are cloned, but skb_header_release() + * was called in tcp stack, allowing us to use headroom for our needs. + */ + if (!skb_header_cloned(skb) && + !(padlen && skb_cloned(skb)) && + headroom + tailroom >= 4 + padlen) { + /* following should not happen, but better be safe */ + if (headroom < 4 || + tailroom < padlen) { skb->data = memmove(skb->head + 4, skb->data, skb->len); skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; + skb2 = skb_copy_expand(skb, 4, padlen, flags); dev_kfree_skb_any(skb); skb = skb2; @@ -373,8 +389,8 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, return NULL; } + packet_len = ((skb->len ^ 0x0000ffff) << 16) + skb->len; skb_push(skb, 4); - packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4); cpu_to_le32s(&packet_len); skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len)); @@ -880,6 +896,8 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->netdev_ops = &ax88172_netdev_ops; dev->net->ethtool_ops = &ax88172_ethtool_ops; + dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ + dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, @@ -1075,6 +1093,8 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->netdev_ops = &ax88772_netdev_ops; dev->net->ethtool_ops = &ax88772_ethtool_ops; + dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ + dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index da14432806c6..efc4b7f308cf 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -25,7 +25,7 @@ static int oprofile_perf_enabled; static DEFINE_MUTEX(oprofile_perf_mutex); static struct op_counter_config *counter_config; -static struct perf_event **perf_events[nr_cpumask_bits]; +static struct perf_event **perf_events[NR_CPUS]; static int num_counters; /* diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7be5e9775691..73ac63d901c9 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2700,10 +2700,11 @@ int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb) rcu_read_lock(); dst = skb_dst(skb); if (dst) - n = dst_get_neighbour_noref(dst); + n = dst_neigh_lookup_skb(dst, skb); if (n) { cast_type = n->type; rcu_read_unlock(); + neigh_release(n); if ((cast_type == RTN_BROADCAST) || (cast_type == RTN_MULTICAST) || (cast_type == RTN_ANYCAST)) diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index 36739da8bc15..49692a1ac44a 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -966,7 +966,8 @@ static int init_act_open(struct cxgbi_sock *csk) csk->saddr.sin_addr.s_addr = chba->ipv4addr; csk->rss_qid = 0; - csk->l2t = t3_l2t_get(t3dev, dst, ndev); + csk->l2t = t3_l2t_get(t3dev, dst, ndev, + &csk->daddr.sin_addr.s_addr); if (!csk->l2t) { pr_err("NO l2t available.\n"); return -EINVAL; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 5a4a3bfc60cf..cc9a06897f34 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1142,7 +1142,7 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_flag(csk, CTPF_HAS_ATID); cxgbi_sock_get(csk); - n = dst_get_neighbour_noref(csk->dst); + n = dst_neigh_lookup(csk->dst, &csk->daddr.sin_addr.s_addr); if (!n) { pr_err("%s, can't get neighbour of csk->dst.\n", ndev->name); goto rel_resource; @@ -1182,9 +1182,12 @@ static int init_act_open(struct cxgbi_sock *csk) cxgbi_sock_set_state(csk, CTP_ACTIVE_OPEN); send_act_open_req(csk, skb, csk->l2t); + neigh_release(n); return 0; rel_resource: + if (n) + neigh_release(n); if (skb) __kfree_skb(skb); return -EINVAL; diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index d9253db1d0e2..b44c1cff3114 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -494,7 +494,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) goto err_out; } dst = &rt->dst; - n = dst_get_neighbour_noref(dst); + n = dst_neigh_lookup(dst, &daddr->sin_addr.s_addr); if (!n) { err = -ENODEV; goto rel_rt; @@ -506,7 +506,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) &daddr->sin_addr.s_addr, ntohs(daddr->sin_port), ndev->name); err = -ENETUNREACH; - goto rel_rt; + goto rel_neigh; } if (ndev->flags & IFF_LOOPBACK) { @@ -521,7 +521,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) pr_info("dst %pI4, %s, NOT cxgbi device.\n", &daddr->sin_addr.s_addr, ndev->name); err = -ENETUNREACH; - goto rel_rt; + goto rel_neigh; } log_debug(1 << CXGBI_DBG_SOCK, "route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n", @@ -531,7 +531,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) csk = cxgbi_sock_create(cdev); if (!csk) { err = -ENOMEM; - goto rel_rt; + goto rel_neigh; } csk->cdev = cdev; csk->port_id = port; @@ -541,9 +541,13 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) csk->daddr.sin_port = daddr->sin_port; csk->daddr.sin_family = daddr->sin_family; csk->saddr.sin_addr.s_addr = fl4.saddr; + neigh_release(n); return csk; +rel_neigh: + neigh_release(n); + rel_rt: ip_rt_put(rt); if (csk) diff --git a/fs/splice.c b/fs/splice.c index c9f1318a3b82..7bf08fa22ec9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -273,13 +273,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) * Check if we need to grow the arrays holding pages and partial page * descriptions. */ -int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + unsigned int buffers = ACCESS_ONCE(pipe->buffers); + + spd->nr_pages_max = buffers; + if (buffers <= PIPE_DEF_BUFFERS) return 0; - spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); - spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); + spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL); + spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL); if (spd->pages && spd->partial) return 0; @@ -289,10 +292,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) return -ENOMEM; } -void splice_shrink_spd(struct pipe_inode_info *pipe, - struct splice_pipe_desc *spd) +void splice_shrink_spd(struct splice_pipe_desc *spd) { - if (pipe->buffers <= PIPE_DEF_BUFFERS) + if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) return; kfree(spd->pages); @@ -315,6 +317,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &page_cache_pipe_buf_ops, .spd_release = spd_release_page, @@ -326,7 +329,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, index = *ppos >> PAGE_CACHE_SHIFT; loff = *ppos & ~PAGE_CACHE_MASK; req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - nr_pages = min(req_pages, pipe->buffers); + nr_pages = min(req_pages, spd.nr_pages_max); /* * Lookup the (hopefully) full range of pages we need. @@ -497,7 +500,7 @@ fill_it: if (spd.nr_pages) error = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return error; } @@ -598,6 +601,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &default_pipe_buf_ops, .spd_release = spd_release_page, @@ -608,8 +612,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, res = -ENOMEM; vec = __vec; - if (pipe->buffers > PIPE_DEF_BUFFERS) { - vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); + if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { + vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL); if (!vec) goto shrink_ret; } @@ -617,7 +621,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, offset = *ppos & ~PAGE_CACHE_MASK; nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { + for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) { struct page *page; page = alloc_page(GFP_USER); @@ -665,7 +669,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, shrink_ret: if (vec != __vec) kfree(vec); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return res; err: @@ -1614,6 +1618,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &user_page_pipe_buf_ops, .spd_release = spd_release_page, @@ -1629,13 +1634,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, spd.partial, false, - pipe->buffers); + spd.nr_pages_max); if (spd.nr_pages <= 0) ret = spd.nr_pages; else ret = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba43f408baa3..07954b05b86c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -827,7 +827,6 @@ extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -extern void blk_abort_queue(struct request_queue *); extern void blk_unprep_request(struct request *); /* diff --git a/include/linux/irq.h b/include/linux/irq.h index 61f5cec031e0..a5261e3d2e3c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -301,8 +301,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_print_chip: optional to print special chip info in show_interrupts * @flags: chip specific flags - * - * @release: release function solely used by UML */ struct irq_chip { const char *name; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 1f3860a8a109..260695186256 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -154,6 +154,10 @@ enum { /* set port opcode modifiers */ MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, + + /* register/delete flow steering network rules */ + MLX4_QP_FLOW_STEERING_ATTACH = 0x65, + MLX4_QP_FLOW_STEERING_DETACH = 0x66, }; enum { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6a8f002b8ed3..6f0d133cc7ad 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -70,6 +70,36 @@ enum { MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; +/* Driver supports 3 diffrent device methods to manage traffic steering: + * -device managed - High level API for ib and eth flow steering. FW is + * managing flow steering tables. + * - B0 steering mode - Common low level API for ib and (if supported) eth. + * - A0 steering mode - Limited low level API for eth. In case of IB, + * B0 mode is in use. + */ +enum { + MLX4_STEERING_MODE_A0, + MLX4_STEERING_MODE_B0, + MLX4_STEERING_MODE_DEVICE_MANAGED +}; + +static inline const char *mlx4_steering_mode_str(int steering_mode) +{ + switch (steering_mode) { + case MLX4_STEERING_MODE_A0: + return "A0 steering"; + + case MLX4_STEERING_MODE_B0: + return "B0 steering"; + + case MLX4_STEERING_MODE_DEVICE_MANAGED: + return "Device managed flow steering"; + + default: + return "Unrecognize steering mode"; + } +} + enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, @@ -102,7 +132,8 @@ enum { enum { MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, - MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2 + MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, + MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 }; #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) @@ -295,6 +326,8 @@ struct mlx4_caps { int num_amgms; int reserved_mcgs; int num_qp_per_mgm; + int steering_mode; + int fs_log_max_ucast_qp_range_size; int num_pds; int reserved_pds; int max_xrcds; @@ -509,6 +542,8 @@ struct mlx4_dev { u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; int num_vfs; + u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; + u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; struct mlx4_init_port_param { @@ -623,9 +658,99 @@ int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - int block_mcast_loopback, enum mlx4_protocol protocol); + u8 port, int block_mcast_loopback, + enum mlx4_protocol protocol, u64 *reg_id); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], - enum mlx4_protocol protocol); + enum mlx4_protocol protocol, u64 reg_id); + +enum { + MLX4_DOMAIN_UVERBS = 0x1000, + MLX4_DOMAIN_ETHTOOL = 0x2000, + MLX4_DOMAIN_RFS = 0x3000, + MLX4_DOMAIN_NIC = 0x5000, +}; + +enum mlx4_net_trans_rule_id { + MLX4_NET_TRANS_RULE_ID_ETH = 0, + MLX4_NET_TRANS_RULE_ID_IB, + MLX4_NET_TRANS_RULE_ID_IPV6, + MLX4_NET_TRANS_RULE_ID_IPV4, + MLX4_NET_TRANS_RULE_ID_TCP, + MLX4_NET_TRANS_RULE_ID_UDP, + MLX4_NET_TRANS_RULE_NUM, /* should be last */ +}; + +enum mlx4_net_trans_promisc_mode { + MLX4_FS_PROMISC_NONE = 0, + MLX4_FS_PROMISC_UPLINK, + /* For future use. Not implemented yet */ + MLX4_FS_PROMISC_FUNCTION_PORT, + MLX4_FS_PROMISC_ALL_MULTI, +}; + +struct mlx4_spec_eth { + u8 dst_mac[6]; + u8 dst_mac_msk[6]; + u8 src_mac[6]; + u8 src_mac_msk[6]; + u8 ether_type_enable; + __be16 ether_type; + __be16 vlan_id_msk; + __be16 vlan_id; +}; + +struct mlx4_spec_tcp_udp { + __be16 dst_port; + __be16 dst_port_msk; + __be16 src_port; + __be16 src_port_msk; +}; + +struct mlx4_spec_ipv4 { + __be32 dst_ip; + __be32 dst_ip_msk; + __be32 src_ip; + __be32 src_ip_msk; +}; + +struct mlx4_spec_ib { + __be32 r_qpn; + __be32 qpn_msk; + u8 dst_gid[16]; + u8 dst_gid_msk[16]; +}; + +struct mlx4_spec_list { + struct list_head list; + enum mlx4_net_trans_rule_id id; + union { + struct mlx4_spec_eth eth; + struct mlx4_spec_ib ib; + struct mlx4_spec_ipv4 ipv4; + struct mlx4_spec_tcp_udp tcp_udp; + }; +}; + +enum mlx4_net_trans_hw_rule_queue { + MLX4_NET_TRANS_Q_FIFO, + MLX4_NET_TRANS_Q_LIFO, +}; + +struct mlx4_net_trans_rule { + struct list_head list; + enum mlx4_net_trans_hw_rule_queue queue_mode; + bool exclusive; + bool allow_loopback; + enum mlx4_net_trans_promisc_mode promisc_mode; + u8 port; + u16 priority; + u32 qpn; +}; + +int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, + enum mlx4_net_trans_promisc_mode mode); +int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, + enum mlx4_net_trans_promisc_mode mode); int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); @@ -668,4 +793,8 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); +int mlx4_flow_attach(struct mlx4_dev *dev, + struct mlx4_net_trans_rule *rule, u64 *reg_id); +int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); + #endif /* MLX4_DEVICE_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c2ecea28a1b..ab0251d541ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2119,6 +2119,9 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, #endif } +#define DEFAULT_MAX_NUM_RSS_QUEUES (8) +extern int netif_get_num_default_rss_queues(void); + /* Use this variant when it is known for sure that it * is executing from hardware interrupt context or with hardware interrupts * disabled. diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 768883370080..f649f7423ca2 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -7,6 +7,8 @@ enum cntl_msg_types { IPCTNL_MSG_CT_GET, IPCTNL_MSG_CT_DELETE, IPCTNL_MSG_CT_GET_CTRZERO, + IPCTNL_MSG_CT_GET_STATS_CPU, + IPCTNL_MSG_CT_GET_STATS, IPCTNL_MSG_MAX }; @@ -15,6 +17,7 @@ enum ctnl_exp_msg_types { IPCTNL_MSG_EXP_NEW, IPCTNL_MSG_EXP_GET, IPCTNL_MSG_EXP_DELETE, + IPCTNL_MSG_EXP_GET_STATS_CPU, IPCTNL_MSG_EXP_MAX }; @@ -203,4 +206,39 @@ enum ctattr_secctx { }; #define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1) +enum ctattr_stats_cpu { + CTA_STATS_UNSPEC, + CTA_STATS_SEARCHED, + CTA_STATS_FOUND, + CTA_STATS_NEW, + CTA_STATS_INVALID, + CTA_STATS_IGNORE, + CTA_STATS_DELETE, + CTA_STATS_DELETE_LIST, + CTA_STATS_INSERT, + CTA_STATS_INSERT_FAILED, + CTA_STATS_DROP, + CTA_STATS_EARLY_DROP, + CTA_STATS_ERROR, + CTA_STATS_SEARCH_RESTART, + __CTA_STATS_MAX, +}; +#define CTA_STATS_MAX (__CTA_STATS_MAX - 1) + +enum ctattr_stats_global { + CTA_STATS_GLOBAL_UNSPEC, + CTA_STATS_GLOBAL_ENTRIES, + __CTA_STATS_GLOBAL_MAX, +}; +#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1) + +enum ctattr_expect_stats { + CTA_STATS_EXP_UNSPEC, + CTA_STATS_EXP_NEW, + CTA_STATS_EXP_CREATE, + CTA_STATS_EXP_DELETE, + __CTA_STATS_EXP_MAX, +}; +#define CTA_STATS_EXP_MAX (__CTA_STATS_EXP_MAX - 1) + #endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index e0d8fd8d4d24..3b1c1360aedf 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -95,5 +95,6 @@ enum nfqnl_attr_config { /* Flags for NFQA_CFG_FLAGS */ #define NFQA_CFG_F_FAIL_OPEN (1 << 0) #define NFQA_CFG_F_CONNTRACK (1 << 1) +#define NFQA_CFG_F_MAX (1 << 2) #endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/splice.h b/include/linux/splice.h index 26e5b613deda..09a545a7dfa3 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -51,7 +51,8 @@ struct partial_page { struct splice_pipe_desc { struct page **pages; /* page map */ struct partial_page *partial; /* pages[] may not be contig */ - int nr_pages; /* number of pages in map */ + int nr_pages; /* number of populated pages in map */ + unsigned int nr_pages_max; /* pages[] & partial[] arrays size */ unsigned int flags; /* splice flags */ const struct pipe_buf_operations *ops;/* ops associated with output pipe */ void (*spd_release)(struct splice_pipe_desc *, unsigned int); @@ -85,9 +86,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, /* * for dynamic pipe sizing */ -extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *); -extern void splice_shrink_spd(struct pipe_inode_info *, - struct splice_pipe_desc *); +extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *); +extern void splice_shrink_spd(struct splice_pipe_desc *); extern void spd_release_page(struct splice_pipe_desc *, unsigned int); extern const struct pipe_buf_operations page_cache_pipe_buf_ops; diff --git a/include/net/arp.h b/include/net/arp.h index 4a1f3fb562eb..4617d9841132 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -15,24 +15,34 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd return val * hash_rnd; } -static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { - struct neigh_hash_table *nht; + struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht); struct neighbour *n; u32 hash_val; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(arp_tbl.nht); + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = 0; + hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { - if (n->dev == dev && *(u32 *)n->primary_key == key) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - break; - } + if (n->dev == dev && *(u32 *)n->primary_key == key) + return n; } + + return NULL; +} + +static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n && !atomic_inc_not_zero(&n->refcnt)) + n = NULL; rcu_read_unlock_bh(); return n; diff --git a/include/net/dn_route.h b/include/net/dn_route.h index c507e05d172f..4f7d6a182381 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -67,6 +67,8 @@ extern void dn_rt_cache_flush(int delay); struct dn_route { struct dst_entry dst; + struct neighbour *n; + struct flowidn fld; __le16 rt_saddr; diff --git a/include/net/dst.h b/include/net/dst.h index f0bf3b8d5911..b2634e446613 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -42,7 +42,7 @@ struct dst_entry { struct dst_entry *from; }; struct dst_entry *path; - struct neighbour __rcu *_neighbour; + void *__pad0; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -51,7 +51,7 @@ struct dst_entry { int (*input)(struct sk_buff *); int (*output)(struct sk_buff *); - int flags; + unsigned short flags; #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 @@ -62,6 +62,8 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 + unsigned short pending_confirm; + short error; short obsolete; unsigned short header_len; /* more space at head required */ @@ -94,21 +96,6 @@ struct dst_entry { }; }; -static inline struct neighbour *dst_get_neighbour_noref(struct dst_entry *dst) -{ - return rcu_dereference(dst->_neighbour); -} - -static inline struct neighbour *dst_get_neighbour_noref_raw(struct dst_entry *dst) -{ - return rcu_dereference_raw(dst->_neighbour); -} - -static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) -{ - rcu_assign_pointer(dst->_neighbour, neigh); -} - extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[RTAX_MAX]; @@ -371,7 +358,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) extern int dst_discard(struct sk_buff *skb); extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags); + int initial_ref, int initial_obsolete, + unsigned short flags); extern void __dst_free(struct dst_entry *dst); extern struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -395,19 +383,35 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - if (dst) { - struct neighbour *n; + dst->pending_confirm = 1; +} + +static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, + struct sk_buff *skb) +{ + struct hh_cache *hh; - rcu_read_lock(); - n = dst_get_neighbour_noref(dst); - neigh_confirm(n); - rcu_read_unlock(); + if (unlikely(dst->pending_confirm)) { + n->confirmed = jiffies; + dst->pending_confirm = 0; } + + hh = &n->hh; + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); } static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, daddr); + return dst->ops->neigh_lookup(dst, NULL, daddr); +} + +static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, + struct sk_buff *skb) +{ + return dst->ops->neigh_lookup(dst, skb, NULL); } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 3682a0a076c1..4badc86e45d1 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -26,7 +26,9 @@ struct dst_ops { void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, const void *daddr); + struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a192f7807659..0fedbd8d747a 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -86,6 +86,8 @@ struct fib6_table; struct rt6_info { struct dst_entry dst; + struct neighbour *n; + /* * Tail elements of dst_entry (__refcnt etc.) * and these elements (rarely used in hot path) are in diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3dc7c96bbeab..539c6721f810 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,11 +220,33 @@ extern void __net_exit fib4_rules_exit(struct net *net); extern u32 fib_rules_tclass(const struct fib_result *res); #endif -extern int fib_lookup(struct net *n, struct flowi4 *flp, struct fib_result *res); - extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); +extern int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res); + +static inline int fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res) +{ + if (!net->ipv4.fib_has_custom_rules) { + if (net->ipv4.fib_local && + !fib_table_lookup(net->ipv4.fib_local, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_main && + !fib_table_lookup(net->ipv4.fib_main, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_default && + !fib_table_lookup(net->ipv4.fib_default, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + return -ENETUNREACH; + } + return __fib_lookup(net, flp, res); +} + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -236,9 +258,15 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID -extern int fib_num_tclassid_users; +static inline int fib_num_tclassid_users(struct net *net) +{ + return net->ipv4.fib_num_tclassid_users; +} #else -#define fib_num_tclassid_users 0 +static inline int fib_num_tclassid_users(struct net *net) +{ + return 0; +} #endif /* Exported by fib_semantics.c */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6cdfeedb650b..344d8988842a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -202,9 +202,16 @@ extern struct neighbour * neigh_lookup(struct neigh_table *tbl, extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey); -extern struct neighbour * neigh_create(struct neigh_table *tbl, +extern struct neighbour * __neigh_create(struct neigh_table *tbl, + const void *pkey, + struct net_device *dev, + bool want_ref); +static inline struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev); + struct net_device *dev) +{ + return __neigh_create(tbl, pkey, dev, true); +} extern void neigh_destroy(struct neighbour *neigh); extern int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); extern int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, @@ -302,12 +309,6 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) atomic_inc(&(n)->refcnt) -static inline void neigh_confirm(struct neighbour *neigh) -{ - if (neigh) - neigh->confirmed = jiffies; -} - static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; @@ -351,15 +352,6 @@ static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) -{ - struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) - return neigh_hh_output(hh, skb); - else - return n->output(n, skb); -} - static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { diff --git a/include/net/netevent.h b/include/net/netevent.h index 086f8a5b59dc..3ce4988c9c08 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -12,10 +12,14 @@ */ struct dst_entry; +struct neighbour; struct netevent_redirect { struct dst_entry *old; + struct neighbour *old_neigh; struct dst_entry *new; + struct neighbour *new_neigh; + const void *daddr; }; enum netevent_notif_type { diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 81c52b5205f2..c3be4aef6bf7 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -97,7 +97,10 @@ struct nf_conntrack_l4proto { #endif int *net_id; /* Init l4proto pernet data */ - int (*init_net)(struct net *net); + int (*init_net)(struct net *net, u_int16_t proto); + + /* Return the per-net protocol part. */ + struct nf_proto_net *(*get_net_proto)(struct net *net); /* Protocol name */ const char *name; @@ -124,6 +127,14 @@ extern int nf_conntrack_l4proto_register(struct net *net, extern void nf_conntrack_l4proto_unregister(struct net *net, struct nf_conntrack_l4proto *proto); +static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; +#endif +} + /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 227f0cd9d3f6..599e48fa97cb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -11,6 +11,7 @@ struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; struct hlist_head; +struct fib_table; struct sock; struct netns_ipv4 { @@ -24,6 +25,13 @@ struct netns_ipv4 { struct ipv4_devconf *devconf_dflt; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; + bool fib_has_custom_rules; + struct fib_table *fib_local; + struct fib_table *fib_main; + struct fib_table *fib_default; +#endif +#ifdef CONFIG_IP_ROUTE_CLASSID + int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e4652fe58958..fecdf31816f2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -912,6 +912,9 @@ struct sctp_transport { /* Is this structure kfree()able? */ malloced:1; + /* Has this transport moved the ctsn since we last sacked */ + __u32 sack_generation; + struct flowi fl; /* This is the peer's IP address and port. */ @@ -1584,6 +1587,7 @@ struct sctp_association { */ __u8 sack_needed; /* Do we need to sack the peer? */ __u32 sack_cnt; + __u32 sack_generation; /* These are capabilities which our peer advertised. */ __u8 ecn_capable:1, /* Can peer do ECN? */ diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index e7728bc14ccf..2c5d2b4d5d1e 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map); int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn, + struct sctp_transport *trans); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); diff --git a/kernel/printk.c b/kernel/printk.c index a2276b916769..dba18211685e 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -193,12 +193,19 @@ static int console_may_schedule; * separated by ',', and find the message after the ';' character. */ +enum log_flags { + LOG_DEFAULT = 0, + LOG_NOCONS = 1, /* already flushed, do not print to console */ +}; + struct log { u64 ts_nsec; /* timestamp in nanoseconds */ u16 len; /* length of entire record */ u16 text_len; /* length of text buffer */ u16 dict_len; /* length of dictionary buffer */ - u16 level; /* syslog level + facility */ + u8 facility; /* syslog facility */ + u8 flags:5; /* internal record flags */ + u8 level:3; /* syslog level */ }; /* @@ -286,6 +293,7 @@ static u32 log_next(u32 idx) /* insert record into the buffer, discard old ones, update heads */ static void log_store(int facility, int level, + enum log_flags flags, u64 ts_nsec, const char *dict, u16 dict_len, const char *text, u16 text_len) { @@ -329,8 +337,13 @@ static void log_store(int facility, int level, msg->text_len = text_len; memcpy(log_dict(msg), dict, dict_len); msg->dict_len = dict_len; - msg->level = (facility << 3) | (level & 7); - msg->ts_nsec = local_clock(); + msg->facility = facility; + msg->level = level & 7; + msg->flags = flags & 0x1f; + if (ts_nsec > 0) + msg->ts_nsec = ts_nsec; + else + msg->ts_nsec = local_clock(); memset(log_dict(msg) + dict_len, 0, pad_len); msg->len = sizeof(struct log) + text_len + dict_len + pad_len; @@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, ts_usec = msg->ts_nsec; do_div(ts_usec, 1000); len = sprintf(user->buf, "%u,%llu,%llu;", - msg->level, user->seq, ts_usec); + (msg->facility << 3) | msg->level, user->seq, ts_usec); /* escape non-printable characters */ for (i = 0; i < msg->text_len; i++) { @@ -787,6 +800,21 @@ static bool printk_time; #endif module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); +static size_t print_time(u64 ts, char *buf) +{ + unsigned long rem_nsec; + + if (!printk_time) + return 0; + + if (!buf) + return 15; + + rem_nsec = do_div(ts, 1000000000); + return sprintf(buf, "[%5lu.%06lu] ", + (unsigned long)ts, rem_nsec / 1000); +} + static size_t print_prefix(const struct log *msg, bool syslog, char *buf) { size_t len = 0; @@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf) } } - if (printk_time) { - if (buf) { - unsigned long long ts = msg->ts_nsec; - unsigned long rem_nsec = do_div(ts, 1000000000); - - len += sprintf(buf + len, "[%5lu.%06lu] ", - (unsigned long) ts, rem_nsec / 1000); - } else { - len += 15; - } - } - + len += print_time(msg->ts_nsec, buf ? buf + len : NULL); return len; } @@ -862,28 +879,49 @@ static int syslog_print(char __user *buf, int size) { char *text; struct log *msg; - int len; + int len = 0; text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); if (!text) return -ENOMEM; - raw_spin_lock_irq(&logbuf_lock); - if (syslog_seq < log_first_seq) { - /* messages are gone, move to first one */ - syslog_seq = log_first_seq; - syslog_idx = log_first_idx; - } - msg = log_from_idx(syslog_idx); - len = msg_print_text(msg, true, text, LOG_LINE_MAX); - syslog_idx = log_next(syslog_idx); - syslog_seq++; - raw_spin_unlock_irq(&logbuf_lock); + while (size > 0) { + size_t n; - if (len > size) - len = -EINVAL; - else if (len > 0 && copy_to_user(buf, text, len)) - len = -EFAULT; + raw_spin_lock_irq(&logbuf_lock); + if (syslog_seq < log_first_seq) { + /* messages are gone, move to first one */ + syslog_seq = log_first_seq; + syslog_idx = log_first_idx; + } + if (syslog_seq == log_next_seq) { + raw_spin_unlock_irq(&logbuf_lock); + break; + } + msg = log_from_idx(syslog_idx); + n = msg_print_text(msg, true, text, LOG_LINE_MAX); + if (n <= size) { + syslog_idx = log_next(syslog_idx); + syslog_seq++; + } else + n = 0; + raw_spin_unlock_irq(&logbuf_lock); + + if (!n) + break; + + len += n; + size -= n; + buf += n; + n = copy_to_user(buf - n, text, n); + + if (n) { + len -= n; + if (!len) + len = -EFAULT; + break; + } + } kfree(text); return len; @@ -1040,6 +1078,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) /* Clear ring buffer */ case SYSLOG_ACTION_CLEAR: syslog_print_all(NULL, 0, true); + break; /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_OFF: if (saved_console_loglevel == -1) @@ -1272,15 +1311,92 @@ static inline void printk_delay(void) } } +/* + * Continuation lines are buffered, and not committed to the record buffer + * until the line is complete, or a race forces it. The line fragments + * though, are printed immediately to the consoles to ensure everything has + * reached the console in case of a kernel crash. + */ +static struct cont { + char buf[LOG_LINE_MAX]; + size_t len; /* length == 0 means unused buffer */ + size_t cons; /* bytes written to console */ + struct task_struct *owner; /* task of first print*/ + u64 ts_nsec; /* time of first print */ + u8 level; /* log level of first message */ + u8 facility; /* log level of first message */ + bool flushed:1; /* buffer sealed and committed */ +} cont; + +static void cont_flush(void) +{ + if (cont.flushed) + return; + if (cont.len == 0) + return; + + log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec, + NULL, 0, cont.buf, cont.len); + + cont.flushed = true; +} + +static bool cont_add(int facility, int level, const char *text, size_t len) +{ + if (cont.len && cont.flushed) + return false; + + if (cont.len + len > sizeof(cont.buf)) { + cont_flush(); + return false; + } + + if (!cont.len) { + cont.facility = facility; + cont.level = level; + cont.owner = current; + cont.ts_nsec = local_clock(); + cont.cons = 0; + cont.flushed = false; + } + + memcpy(cont.buf + cont.len, text, len); + cont.len += len; + return true; +} + +static size_t cont_print_text(char *text, size_t size) +{ + size_t textlen = 0; + size_t len; + + if (cont.cons == 0) { + textlen += print_time(cont.ts_nsec, text); + size -= textlen; + } + + len = cont.len - cont.cons; + if (len > 0) { + if (len+1 > size) + len = size-1; + memcpy(text + textlen, cont.buf + cont.cons, len); + textlen += len; + cont.cons = cont.len; + } + + if (cont.flushed) { + text[textlen++] = '\n'; + /* got everything, release buffer */ + cont.len = 0; + } + return textlen; +} + asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) { static int recursion_bug; - static char cont_buf[LOG_LINE_MAX]; - static size_t cont_len; - static int cont_level; - static struct task_struct *cont_task; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len; @@ -1326,7 +1442,8 @@ asmlinkage int vprintk_emit(int facility, int level, recursion_bug = 0; printed_len += strlen(recursion_msg); /* emit KERN_CRIT message */ - log_store(0, 2, NULL, 0, recursion_msg, printed_len); + log_store(0, 2, LOG_DEFAULT, 0, + NULL, 0, recursion_msg, printed_len); } /* @@ -1364,55 +1481,37 @@ asmlinkage int vprintk_emit(int facility, int level, } if (!newline) { - if (cont_len && (prefix || cont_task != current)) { - /* - * Flush earlier buffer, which is either from a - * different thread, or when we got a new prefix. - */ - log_store(facility, cont_level, NULL, 0, cont_buf, cont_len); - cont_len = 0; - } - - if (!cont_len) { - cont_level = level; - cont_task = current; - } + /* + * Flush the conflicting buffer. An earlier newline was missing, + * or another task also prints continuation lines. + */ + if (cont.len && (prefix || cont.owner != current)) + cont_flush(); - /* buffer or append to earlier buffer from the same thread */ - if (cont_len + text_len > sizeof(cont_buf)) - text_len = sizeof(cont_buf) - cont_len; - memcpy(cont_buf + cont_len, text, text_len); - cont_len += text_len; + /* buffer line if possible, otherwise store it right away */ + if (!cont_add(facility, level, text, text_len)) + log_store(facility, level, LOG_DEFAULT, 0, + dict, dictlen, text, text_len); } else { - if (cont_len && cont_task == current) { - if (prefix) { - /* - * New prefix from the same thread; flush. We - * either got no earlier newline, or we race - * with an interrupt. - */ - log_store(facility, cont_level, - NULL, 0, cont_buf, cont_len); - cont_len = 0; - } + bool stored = false; - /* append to the earlier buffer and flush */ - if (cont_len + text_len > sizeof(cont_buf)) - text_len = sizeof(cont_buf) - cont_len; - memcpy(cont_buf + cont_len, text, text_len); - cont_len += text_len; - log_store(facility, cont_level, - NULL, 0, cont_buf, cont_len); - cont_len = 0; - cont_task = NULL; - printed_len = cont_len; - } else { - /* ordinary single and terminated line */ - log_store(facility, level, - dict, dictlen, text, text_len); - printed_len = text_len; + /* + * If an earlier newline was missing and it was the same task, + * either merge it with the current buffer and flush, or if + * there was a race with interrupts (prefix == true) then just + * flush it out and store this line separately. + */ + if (cont.len && cont.owner == current) { + if (!prefix) + stored = cont_add(facility, level, text, text_len); + cont_flush(); } + + if (!stored) + log_store(facility, level, LOG_DEFAULT, 0, + dict, dictlen, text, text_len); } + printed_len += text_len; /* * Try to acquire and then immediately release the console semaphore. @@ -1499,11 +1598,18 @@ EXPORT_SYMBOL(printk); #else #define LOG_LINE_MAX 0 +static struct cont { + size_t len; + size_t cons; + u8 level; + bool flushed:1; +} cont; static struct log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } static void call_console_drivers(int level, const char *text, size_t len) {} static size_t msg_print_text(const struct log *msg, bool syslog, char *buf, size_t size) { return 0; } +static size_t cont_print_text(char *text, size_t size) { return 0; } #endif /* CONFIG_PRINTK */ @@ -1795,6 +1901,7 @@ static u32 console_idx; */ void console_unlock(void) { + static char text[LOG_LINE_MAX]; static u64 seen_seq; unsigned long flags; bool wake_klogd = false; @@ -1807,10 +1914,23 @@ void console_unlock(void) console_may_schedule = 0; + /* flush buffered message fragment immediately to console */ + raw_spin_lock_irqsave(&logbuf_lock, flags); + if (cont.len && (cont.cons < cont.len || cont.flushed)) { + size_t len; + + len = cont_print_text(text, sizeof(text)); + raw_spin_unlock(&logbuf_lock); + stop_critical_timings(); + call_console_drivers(cont.level, text, len); + start_critical_timings(); + local_irq_restore(flags); + } else + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + again: for (;;) { struct log *msg; - static char text[LOG_LINE_MAX]; size_t len; int level; @@ -1825,13 +1945,22 @@ again: console_seq = log_first_seq; console_idx = log_first_idx; } - +skip: if (console_seq == log_next_seq) break; msg = log_from_idx(console_idx); - level = msg->level & 7; + if (msg->flags & LOG_NOCONS) { + /* + * Skip record we have buffered and already printed + * directly to the console when we received it. + */ + console_idx = log_next(console_idx); + console_seq++; + goto skip; + } + level = msg->level; len = msg_print_text(msg, false, text, sizeof(text)); console_idx = log_next(console_idx); @@ -2409,7 +2538,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); * kmsg_dump_get_buffer - copy kmsg log lines * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes - * @line: buffer to copy the line to + * @buf: buffer to copy the line to * @size: maximum size of the buffer * @len: length of line placed into buffer * diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3b0f1337f75b..38ecdda3f55f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1530,7 +1530,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; struct rcu_head *next, *list, **tail; - int bl, count, count_lazy; + int bl, count, count_lazy, i; /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { @@ -1553,9 +1553,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; tail = rdp->nxttail[RCU_DONE_TAIL]; - for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) - if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) - rdp->nxttail[count] = &rdp->nxtlist; + for (i = RCU_NEXT_SIZE - 1; i >= 0; i--) + if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) + rdp->nxttail[i] = &rdp->nxtlist; local_irq_restore(flags); /* Invoke callbacks. */ @@ -1583,9 +1583,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) if (list != NULL) { *tail = rdp->nxtlist; rdp->nxtlist = list; - for (count = 0; count < RCU_NEXT_SIZE; count++) - if (&rdp->nxtlist == rdp->nxttail[count]) - rdp->nxttail[count] = tail; + for (i = 0; i < RCU_NEXT_SIZE; i++) + if (&rdp->nxtlist == rdp->nxttail[i]) + rdp->nxttail[i] = tail; else break; } diff --git a/kernel/relay.c b/kernel/relay.c index ab56a1764d4d..e8cd2027abbd 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in, struct splice_pipe_desc spd = { .pages = pages, .nr_pages = 0, + .nr_pages_max = PIPE_DEF_BUFFERS, .partial = partial, .flags = flags, .ops = &relay_pipe_buf_ops, @@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in, ret += padding; out: - splice_shrink_spd(pipe, &spd); - return ret; + splice_shrink_spd(&spd); + return ret; } static ssize_t relay_file_splice_read(struct file *in, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 49249c28690d..a7fa0702be1c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .pages = pages_def, .partial = partial_def, .nr_pages = 0, /* This gets updated below. */ + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, @@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, ret = splice_to_pipe(pipe, &spd); out: - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); return ret; out_err: @@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages_def, .partial = partial_def, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &buffer_pipe_buf_ops, .spd_release = buffer_spd_release, @@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } ret = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); out: return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index a15a466d0d1d..4ce02e0673db 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1594,6 +1594,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = PIPE_DEF_BUFFERS, .flags = flags, .ops = &page_cache_pipe_buf_ops, .spd_release = spd_release_page, @@ -1682,7 +1683,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, if (spd.nr_pages) error = splice_to_pipe(pipe, &spd); - splice_shrink_spd(pipe, &spd); + splice_shrink_spd(&spd); if (error > 0) { *ppos += error; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 20fa719889ee..b98d3d78ca7f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -120,7 +120,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) return NULL; } -static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return NULL; } @@ -373,19 +375,29 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) if (!skb->dev) goto free_skb; dst = skb_dst(skb); - neigh = dst_get_neighbour_noref(dst); - if (neigh->hh.hh_len) { - neigh_hh_bridge(&neigh->hh, skb); - skb->dev = nf_bridge->physindev; - return br_handle_frame_finish(skb); - } else { - /* the neighbour function below overwrites the complete - * MAC header, so we save the Ethernet source address and - * protocol number. */ - skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); - /* tell br_dev_xmit to continue with forwarding */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return neigh->output(neigh, skb); + neigh = dst_neigh_lookup_skb(dst, skb); + if (neigh) { + int ret; + + if (neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); + skb->dev = nf_bridge->physindev; + ret = br_handle_frame_finish(skb); + } else { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. + */ + skb_copy_from_linear_data_offset(skb, + -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, + ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + ret = neigh->output(neigh, skb); + } + neigh_release(neigh); + return ret; } free_skb: kfree_skb(skb); diff --git a/net/core/dev.c b/net/core/dev.c index ed674e212b7a..69f7a1a393d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1793,6 +1793,17 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif +/* netif_get_num_default_rss_queues - default number of RSS queues + * + * This routine should set an upper limit on the number of RSS queues + * used by default by multiqueue devices. + */ +int netif_get_num_default_rss_queues() +{ + return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus()); +} +EXPORT_SYMBOL(netif_get_num_default_rss_queues); + static inline void __netif_reschedule(struct Qdisc *q) { struct softnet_data *sd; diff --git a/net/core/dst.c b/net/core/dst.c index 43d94cedbf7c..07bacff84aa4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags) + int initial_ref, int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -188,6 +187,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; + dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); @@ -224,19 +224,12 @@ EXPORT_SYMBOL(__dst_free); struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; - struct neighbour *neigh; smp_rmb(); again: - neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; - if (neigh) { - RCU_INIT_POINTER(dst->_neighbour, NULL); - neigh_release(neigh); - } - if (!(dst->flags & DST_NOCOUNT)) dst_entries_add(dst->ops, -1); @@ -360,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - struct neighbour *neigh; - dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); - if (neigh && neigh->dev == dev) { - neigh->dev = dst->dev; - dev_hold(dst->dev); - dev_put(dev); - } - rcu_read_unlock(); } } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d81d026138f0..117afaf51268 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev) +struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, bool want_ref) { u32 hash_val; int key_len = tbl->key_len; @@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { - neigh_hold(n1); + if (want_ref) + neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } n->dead = 0; - neigh_hold(n); + if (want_ref) + neigh_hold(n); rcu_assign_pointer(n->next, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); @@ -558,7 +560,7 @@ out_neigh_release: neigh_release(n); goto out; } -EXPORT_SYMBOL(neigh_create); +EXPORT_SYMBOL(__neigh_create); static u32 pneigh_hash(const void *pkey, int key_len) { @@ -1199,10 +1201,23 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_unlock_bh(&neigh->lock); rcu_read_lock(); - /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL) - n1 = n2; + + /* Why not just use 'neigh' as-is? The problem is that + * things such as shaper, eql, and sch_teql can end up + * using alternative, different, neigh objects to output + * the packet in the output path. So what we need to do + * here is re-lookup the top-level neigh in the path so + * we can reinject the packet there. + */ + n2 = NULL; + if (dst) { + n2 = dst_neigh_lookup_skb(dst, skb); + if (n2) + n1 = n2; + } n1->output(n1, skb); + if (n2) + neigh_release(n2); rcu_read_unlock(); write_lock_bh(&neigh->lock); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b21522ed0e1..5a789a807ec3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1755,6 +1755,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct splice_pipe_desc spd = { .pages = pages, .partial = partial, + .nr_pages_max = MAX_SKB_FRAGS, .flags = flags, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9991be083ad0..02162cfa5048 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -239,7 +239,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; - struct ipv6_txoptions *opt = NULL; struct in6_addr *final_p, final; struct flowi6 fl6; int err = -1; @@ -255,9 +254,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - opt = np->opt; - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { @@ -274,13 +272,11 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->loc_addr, &ireq6->rmt_addr); fl6.daddr = ireq6->rmt_addr; - err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); err = net_xmit_eval(err); } done: - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return err; } @@ -475,7 +471,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; - struct ipv6_txoptions *opt; if (skb->protocol == htons(ETH_P_IP)) { /* @@ -520,7 +515,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return newsk; } - opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -532,7 +526,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = ireq6->rmt_addr; - final_p = fl6_update_dst(&fl6, opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq6->loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = inet_rsk(req)->rmt_port; @@ -597,11 +591,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (opt != NULL) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + if (np->opt != NULL) + newnp->opt = ipv6_dup_options(newsk, np->opt); inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt != NULL) @@ -627,8 +618,6 @@ out_nonewsk: dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); return NULL; } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 8e9a35b17df4..3aede1b459fd 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst_get_neighbour_noref(dst); + struct neighbour *neigh = rt->n; struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; unsigned int seq; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2493ed5bfecd..6e74b3f110bc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -114,10 +114,13 @@ static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); +static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static int dn_route_input(struct sk_buff *); static void dn_run_flush(unsigned long dummy); @@ -138,6 +141,7 @@ static struct dst_ops dn_dst_ops = { .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, + .ifdown = dn_dst_ifdown, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, @@ -146,9 +150,27 @@ static struct dst_ops dn_dst_ops = { static void dn_dst_destroy(struct dst_entry *dst) { + struct dn_route *rt = (struct dn_route *) dst; + + if (rt->n) + neigh_release(rt->n); dst_destroy_metrics_generic(dst); } +static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) +{ + if (how) { + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; + + if (n && n->dev == dev) { + n->dev = dev_net(dev)->loopback_dev; + dev_hold(n->dev); + dev_put(dev); + } + } +} + static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -244,7 +266,8 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; u32 min_mtu = 230; struct dn_dev *dn; @@ -713,7 +736,8 @@ out: static int dn_to_neigh_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; return n->output(n, skb); } @@ -727,7 +751,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if (dst_get_neighbour_noref(dst) == NULL) + if (rt->n == NULL) goto error; skb->dev = dev; @@ -828,7 +852,9 @@ static unsigned int dn_dst_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); } @@ -848,11 +874,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) { + if (dev != NULL && rt->n == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - dst_set_neighbour(&rt->dst, n); + rt->n = n; } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1159,7 +1185,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; neigh = NULL; rt->dst.lastuse = jiffies; @@ -1429,7 +1455,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch (res.type) { diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index cd5007f3a569..f4070e54d1a1 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -55,7 +55,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netdevice.h> -#include <linux/etherdevice.h> #include <net/af_ieee802154.h> #include <net/ieee802154.h> #include <net/ieee802154_netdev.h> @@ -936,6 +935,19 @@ drop: return -EINVAL; } +static int lowpan_set_address(struct net_device *dev, void *p) +{ + struct sockaddr *sa = p; + + if (netif_running(dev)) + return -EBUSY; + + /* TODO: validate addr */ + memcpy(dev->dev_addr, sa->sa_data, dev->addr_len); + + return 0; +} + static int lowpan_get_mac_header_length(struct sk_buff *skb) { /* @@ -1078,7 +1090,7 @@ static struct header_ops lowpan_header_ops = { static const struct net_device_ops lowpan_netdev_ops = { .ndo_start_xmit = lowpan_xmit, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = lowpan_set_address, }; static struct ieee802154_mlme_ops lowpan_mlme = { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3e11ea225dad..81f85716a894 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -86,6 +86,24 @@ struct fib_table *fib_new_table(struct net *net, u32 id) tb = fib_trie_table(id); if (!tb) return NULL; + + switch (id) { + case RT_TABLE_LOCAL: + net->ipv4.fib_local = tb; + break; + + case RT_TABLE_MAIN: + net->ipv4.fib_main = tb; + break; + + case RT_TABLE_DEFAULT: + net->ipv4.fib_default = tb; + break; + + default: + break; + } + h = id & (FIB_TABLE_HASHSZ - 1); hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); return tb; @@ -218,10 +236,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } -#ifdef CONFIG_IP_ROUTE_CLASSID -int fib_num_tclassid_users __read_mostly; -#endif - /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@ -312,7 +326,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users) { + if (!r && !fib_num_tclassid_users(dev_net(dev))) { *itag = 0; return 0; } @@ -1134,6 +1148,9 @@ static int __net_init fib_net_init(struct net *net) { int error; +#ifdef CONFIG_IP_ROUTE_CLASSID + net->ipv4.fib_num_tclassid_users = 0; +#endif error = ip_fib_net_init(net); if (error < 0) goto out; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b23fd952c84f..c06da93b0b70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -54,7 +54,7 @@ u32 fib_rules_tclass(const struct fib_result *res) } #endif -int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -67,7 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) return err; } -EXPORT_SYMBOL_GPL(fib_lookup); +EXPORT_SYMBOL_GPL(__fib_lookup); static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) @@ -172,7 +172,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - fib_num_tclassid_users++; + net->ipv4.fib_num_tclassid_users++; } #endif @@ -182,6 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dstmask = inet_make_mask(rule4->dst_len); rule4->tos = frh->tos; + net->ipv4.fib_has_custom_rules = true; err = 0; errout: return err; @@ -189,12 +190,14 @@ errout: static void fib4_rule_delete(struct fib_rule *rule) { + struct net *net = rule->fr_net; #ifdef CONFIG_IP_ROUTE_CLASSID struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (rule4->tclassid) - fib_num_tclassid_users--; + net->ipv4.fib_num_tclassid_users--; #endif + net->ipv4.fib_has_custom_rules = true; } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, @@ -309,6 +312,7 @@ int __net_init fib4_rules_init(struct net *net) if (err < 0) goto fail; net->ipv4.rules_ops = ops; + net->ipv4.fib_has_custom_rules = false; return 0; fail: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c46c20b6b0b6..ae301c897a19 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -166,7 +166,7 @@ void free_fib_info(struct fib_info *fi) #ifdef CONFIG_IP_ROUTE_CLASSID change_nexthops(fi) { if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users--; + fi->fib_net->ipv4.fib_num_tclassid_users--; } endfor_nexthops(fi); #endif call_rcu(&fi->rcu, free_fib_info_rcu); @@ -428,7 +428,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif } @@ -824,7 +824,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 766dfe56885a..a19d6471a318 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -242,6 +242,15 @@ void ip_options_fragment(struct sk_buff *skb) opt->ts_needtime = 0; } +/* helper used by ip_options_compile() to call fib_compute_spec_dst() + * at most one time. + */ +static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) +{ + if (*spec_dst == htonl(INADDR_ANY)) + *spec_dst = fib_compute_spec_dst(skb); +} + /* * Verify options and fill pointers in struct options. * Caller should clear *opt, and set opt->data. @@ -251,14 +260,15 @@ void ip_options_fragment(struct sk_buff *skb) int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb) { - __be32 spec_dst = (__force __be32) 0; + __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; + struct rtable *rt = NULL; unsigned char *optptr; unsigned char *iph; int optlen, l; if (skb != NULL) { - spec_dst = fib_compute_spec_dst(skb); + rt = skb_rtable(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else optptr = opt->__data; @@ -330,7 +340,8 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (skb) { + if (rt) { + spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); opt->is_changed = 1; } @@ -372,7 +383,8 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (skb) { + if (rt) { + spec_dst_fill(&spec_dst, skb); memcpy(&optptr[optptr[2]-1], &spec_dst, 4); timeptr = &optptr[optptr[2]+3]; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2630900e480a..cc52679790b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -170,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; + u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -191,15 +192,18 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rcu_read_lock_bh(); + nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + neigh = __ipv4_neigh_lookup_noref(dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); - rcu_read_unlock(); + rcu_read_unlock_bh(); return res; } - rcu_read_unlock(); + rcu_read_unlock_bh(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 041923cb67ad..5241d997ab75 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -337,34 +337,62 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int icmp_init_net(struct net *net) +static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) { - struct nf_icmp_net *in = icmp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)in; - in->timeout = nf_ct_icmp_timeout; - #ifdef CONFIG_SYSCTL pn->ctl_table = kmemdup(icmp_sysctl_table, sizeof(icmp_sysctl_table), GFP_KERNEL); if (!pn->ctl_table) return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#endif + return 0; +} + +static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, sizeof(icmp_compat_sysctl_table), GFP_KERNEL); - if (!pn->ctl_compat_table) { - kfree(pn->ctl_table); - pn->ctl_table = NULL; + if (!pn->ctl_compat_table) return -ENOMEM; - } + pn->ctl_compat_table[0].data = &in->timeout; #endif #endif return 0; } +static int icmp_init_net(struct net *net, u_int16_t proto) +{ + int ret; + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = &in->pn; + + in->timeout = nf_ct_icmp_timeout; + + ret = icmp_kmemdup_compat_sysctl_table(pn, in); + if (ret < 0) + return ret; + + ret = icmp_kmemdup_sysctl_table(pn, in); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + + return ret; +} + +static struct nf_proto_net *icmp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -395,4 +423,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmp_init_net, + .get_net_proto = icmp_get_net_proto, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a5afc715558..72e88c208025 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -188,7 +188,9 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -418,13 +420,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; - struct neighbour *n; - int len, HHUptod; - - rcu_read_lock(); - n = dst_get_neighbour_noref(&r->dst); - HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; - rcu_read_unlock(); + int len; seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", @@ -438,9 +434,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, - -1, - HHUptod, - 0, &len); + -1, 0, 0, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } @@ -1096,20 +1090,20 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { - static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; const struct rtable *rt; struct neighbour *n; rt = (const struct rtable *) dst; - - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - pkey = &inaddr_any; - else if (rt->rt_gateway) + if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; + else if (skb) + pkey = &ip_hdr(skb)->daddr; n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); if (n) @@ -1117,16 +1111,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo return neigh_create(&arp_tbl, pkey, dev); } -static int rt_bind_neighbour(struct rtable *rt) -{ - struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); - if (IS_ERR(n)) - return PTR_ERR(n); - dst_set_neighbour(&rt->dst, n); - - return 0; -} - static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, struct sk_buff *skb, int ifindex) { @@ -1135,7 +1119,6 @@ static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, unsigned long now; u32 min_score; int chain_length; - int attempts = !in_softirq(); restart: chain_length = 0; @@ -1162,15 +1145,6 @@ restart: */ rt->dst.flags |= DST_NOCACHE; - if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = rt_bind_neighbour(rt); - if (err) { - net_warn_ratelimited("Neighbour table failure & not caching routes\n"); - ip_rt_put(rt); - return ERR_PTR(err); - } - } - goto skip_hashing; } @@ -1253,40 +1227,6 @@ restart: } } - /* Try to bind route to arp only if it is output - route or unicast forwarding path. - */ - if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = rt_bind_neighbour(rt); - if (err) { - spin_unlock_bh(rt_hash_lock_addr(hash)); - - if (err != -ENOBUFS) { - rt_drop(rt); - return ERR_PTR(err); - } - - /* Neighbour tables are full and nothing - can be released. Try to shrink route cache, - it is most likely it holds some neighbour records. - */ - if (attempts-- > 0) { - int saved_elasticity = ip_rt_gc_elasticity; - int saved_int = ip_rt_gc_min_interval; - ip_rt_gc_elasticity = 1; - ip_rt_gc_min_interval = 0; - rt_garbage_collect(&ipv4_dst_ops); - ip_rt_gc_min_interval = saved_int; - ip_rt_gc_elasticity = saved_elasticity; - goto restart; - } - - net_warn_ratelimited("Neighbour table overflow\n"); - rt_drop(rt); - return ERR_PTR(-ENOBUFS); - } - } - rt->dst.rt_next = rt_hash_table[hash].chain; /* @@ -1394,26 +1334,24 @@ static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n, *old_n; + struct neighbour *n; dst_confirm(&rt->dst); rt->rt_gateway = peer->redirect_learned.a4; - n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); - if (IS_ERR(n)) { + n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); + if (!n) { rt->rt_gateway = orig_gw; return; } - old_n = xchg(&rt->dst._neighbour, n); - if (old_n) - neigh_release(old_n); if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } + neigh_release(n); } /* called in rcu_read_lock() section */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8416f8a68e65..ca0d0e7c9778 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -740,13 +740,13 @@ void tcp_update_metrics(struct sock *sk) if (sysctl_tcp_nometrics_save) return; - dst_confirm(dst); - if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; unsigned long rtt; + dst_confirm(dst); + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. @@ -3869,9 +3869,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) - dst_confirm(__sk_dst_get(sk)); - + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { + struct dst_entry *dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + } return 1; no_queue: @@ -6140,9 +6142,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case TCP_FIN_WAIT1: if (tp->snd_una == tp->write_seq) { + struct dst_entry *dst; + tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst_confirm(__sk_dst_get(sk)); + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); if (!sock_flag(sk, SOCK_DEAD)) /* Wake up lingering close() */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a233a7ccbc3a..c6af5963a202 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -88,6 +88,7 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; + struct rt6_info *rt; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -123,9 +124,10 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rt = (struct rt6_info *) dst; + neigh = rt->n; if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock(); return res; @@ -944,6 +946,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct net *net = sock_net(sk); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; + struct rt6_info *rt; #endif int err; @@ -972,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rcu_read_lock(); - n = dst_get_neighbour_noref(*dst); + rt = (struct rt6_info *) *dst; + n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 63ed0121836c..2d54b2061d68 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -333,22 +333,36 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int icmpv6_init_net(struct net *net) +static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) { - struct nf_icmp_net *in = icmpv6_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)in; - in->timeout = nf_ct_icmpv6_timeout; #ifdef CONFIG_SYSCTL pn->ctl_table = kmemdup(icmpv6_sysctl_table, sizeof(icmpv6_sysctl_table), GFP_KERNEL); if (!pn->ctl_table) return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; #endif return 0; } +static int icmpv6_init_net(struct net *net, u_int16_t proto) +{ + struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_proto_net *pn = &in->pn; + + in->timeout = nf_ct_icmpv6_timeout; + + return icmpv6_kmemdup_sysctl_table(pn, in); +} + +static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -377,4 +391,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmpv6_init_net, + .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c518e4ec0cea..6cc6c881f54f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -120,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +static inline const void *choose_neigh_daddr(struct rt6_info *rt, + struct sk_buff *skb, + const void *daddr) { struct in6_addr *p = &rt->rt6i_gateway; if (!ipv6_addr_any(p)) return (const void *) p; + else if (skb) + return &ipv6_hdr(skb)->daddr; return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, daddr); + daddr = choose_neigh_daddr(rt, skb, daddr); n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; @@ -149,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) if (IS_ERR(n)) return PTR_ERR(n); } - dst_set_neighbour(&rt->dst, n); + rt->n = n; return 0; } @@ -267,7 +273,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, 0, 0, flags); if (rt) { - memset(&rt->rt6i_table, 0, + memset(&rt->n, 0, sizeof(*rt) - sizeof(struct dst_entry)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); } @@ -279,6 +285,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + if (rt->n) + neigh_release(rt->n); + if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -329,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev && idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); + if (dev != loopback_dev) { + if (idev && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); + } + } + if (rt->n && rt->n->dev == dev) { + rt->n->dev = loopback_dev; + dev_hold(loopback_dev); + dev_put(dev); } } } @@ -424,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt) * to no more than one per minute. */ rcu_read_lock(); - neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; + neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) goto out; read_lock_bh(&neigh->lock); @@ -471,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) int m; rcu_read_lock(); - neigh = dst_get_neighbour_noref(&rt->dst); + neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -818,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, if (rt) { rt->rt6i_flags |= RTF_CACHE; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); + rt->n = neigh_clone(ort->n); } return rt; } @@ -852,7 +868,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1162,7 +1178,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (neigh) neigh_hold(neigh); else { - neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); + neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); if (IS_ERR(neigh)) { in6_dev_put(idev); dst_free(&rt->dst); @@ -1172,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1681,6 +1697,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; struct net *net = dev_net(neigh->dev); + struct neighbour *old_neigh; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1708,7 +1725,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) + old_neigh = rt->n; + if (neigh == old_neigh) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1720,13 +1738,16 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); + nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->dst; + netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; + netevent.new_neigh = neigh; + netevent.daddr = dest; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { @@ -2431,7 +2452,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { rcu_read_unlock(); @@ -2655,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000 00 "); #endif rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8e951d8d3b81..7bf3cc427c28 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -21,9 +21,6 @@ #include <net/ipv6.h> #include <net/tcp.h> -extern int sysctl_tcp_syncookies; -extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; - #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9c06eafaf695..6cc67ed6c2e6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -486,7 +486,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; - struct ipv6_txoptions *opt = np->opt; int err = -ENOMEM; /* First, grab a route. */ @@ -500,13 +499,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, fl6->daddr = treq->rmt_addr; skb_set_queue_mapping(skb, queue_mapping); - err = ip6_xmit(sk, skb, fl6, opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); } done: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); return err; } @@ -1229,7 +1226,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; - struct ipv6_txoptions *opt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif @@ -1290,7 +1286,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } treq = inet6_rsk(req); - opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; @@ -1359,11 +1354,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ - if (opt) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + if (np->opt) + newnp->opt = ipv6_dup_options(newsk, np->opt); inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) @@ -1410,8 +1402,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: - if (opt && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7494845efbf..bb02038b822b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -103,6 +103,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 819c342f5b30..9730882697aa 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -640,6 +640,14 @@ find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) } static int +ip_set_none(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + return -EOPNOTSUPP; +} + +static int ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) @@ -1539,6 +1547,10 @@ nlmsg_failure: } static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { + [IPSET_CMD_NONE] = { + .call = ip_set_none, + .attr_count = IPSET_ATTR_CMD_MAX, + }, [IPSET_CMD_CREATE] = { .call = ip_set_create, .attr_count = IPSET_ATTR_CMD_MAX, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index ee863943c826..d5d3607ae7bc 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -38,30 +38,6 @@ struct iface_node { #define iface_data(n) (rb_entry(n, struct iface_node, node)->iface) -static inline long -ifname_compare(const char *_a, const char *_b) -{ - const long *a = (const long *)_a; - const long *b = (const long *)_b; - - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); - if (a[0] != b[0]) - return a[0] - b[0]; - if (IFNAMSIZ > sizeof(long)) { - if (a[1] != b[1]) - return a[1] - b[1]; - } - if (IFNAMSIZ > 2 * sizeof(long)) { - if (a[2] != b[2]) - return a[2] - b[2]; - } - if (IFNAMSIZ > 3 * sizeof(long)) { - if (a[3] != b[3]) - return a[3] - b[3]; - } - return 0; -} - static void rbtree_destroy(struct rb_root *root) { @@ -99,7 +75,7 @@ iface_test(struct rb_root *root, const char **iface) while (n) { const char *d = iface_data(n); - long res = ifname_compare(*iface, d); + int res = strcmp(*iface, d); if (res < 0) n = n->rb_left; @@ -121,7 +97,7 @@ iface_add(struct rb_root *root, const char **iface) while (*n) { char *ifname = iface_data(*n); - long res = ifname_compare(*iface, ifname); + int res = strcmp(*iface, ifname); p = *n; if (res < 0) @@ -366,7 +342,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface4_elem data = { .cidr = HOST_MASK }; u32 ip = 0, ip_to, last; u32 timeout = h->timeout; - char iface[IFNAMSIZ] = {}; + char iface[IFNAMSIZ]; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -663,7 +639,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem data = { .cidr = HOST_MASK }; u32 timeout = h->timeout; - char iface[IFNAMSIZ] = {}; + char iface[IFNAMSIZ]; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index dd811b8dd97c..d43e3c122f7b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -76,19 +76,19 @@ static void __ip_vs_del_service(struct ip_vs_service *svc); #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ -static int __ip_vs_addr_is_local_v6(struct net *net, - const struct in6_addr *addr) +static bool __ip_vs_addr_is_local_v6(struct net *net, + const struct in6_addr *addr) { - struct rt6_info *rt; struct flowi6 fl6 = { .daddr = *addr, }; + struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); + bool is_local; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); - if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - return 1; + is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); - return 0; + dst_release(dst); + return is_local; } #endif diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b9b8f4ac7a36..14f67a2cbcb5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2012 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -1627,6 +1627,155 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } +static int +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + __u16 cpu, const struct ip_conntrack_stat *st) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; + + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(cpu); + + if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) || + nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || + nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) || + nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || + nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || + nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) || + nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) || + nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || + nla_put_be32(skb, CTA_STATS_INSERT_FAILED, + htonl(st->insert_failed)) || + nla_put_be32(skb, CTA_STATS_DROP, htonl(st->drop)) || + nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) || + nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) || + nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, + htonl(st->search_restart))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nla_put_failure: +nlmsg_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int cpu; + struct net *net = sock_net(skb->sk); + + if (cb->args[0] == nr_cpu_ids) + return 0; + + for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { + const struct ip_conntrack_stat *st; + + if (!cpu_possible(cpu)) + continue; + + st = per_cpu_ptr(net->ct.stat, cpu); + if (ctnetlink_ct_stat_cpu_fill_info(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + cpu, st) < 0) + break; + } + cb->args[0] = cpu; + + return skb->len; +} + +static int +ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_ct_stat_cpu_dump, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return 0; +} + +static int +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + struct net *net) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int nr_conntracks = atomic_read(&net->ct.count); + + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nla_put_failure: +nlmsg_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + struct sk_buff *skb2; + int err; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + sock_net(skb->sk)); + if (err <= 0) + goto free; + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; + + return 0; + +free: + kfree_skb(skb2); +out: + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; +} + #ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT static size_t ctnetlink_nfqueue_build_size(const struct nf_conn *ct) @@ -2440,6 +2589,79 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; } +static int +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, + const struct ip_conntrack_stat *st) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; + + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(cpu); + + if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) || + nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) || + nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nla_put_failure: +nlmsg_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int cpu; + struct net *net = sock_net(skb->sk); + + if (cb->args[0] == nr_cpu_ids) + return 0; + + for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) { + const struct ip_conntrack_stat *st; + + if (!cpu_possible(cpu)) + continue; + + st = per_cpu_ptr(net->ct.stat, cpu); + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + cpu, st) < 0) + break; + } + cb->args[0] = cpu; + + return skb->len; +} + +static int +ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_exp_stat_cpu_dump, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return 0; +} + #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { .fcn = ctnetlink_conntrack_event, @@ -2463,6 +2685,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, .attr_count = CTA_MAX, .policy = ct_nla_policy }, + [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, + [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { @@ -2475,6 +2699,7 @@ static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, .attr_count = CTA_EXPECT_MAX, .policy = exp_nla_policy }, + [IPCTNL_MSG_EXP_GET_STATS_CPU] = { .call = ctnetlink_stat_exp_cpu }, }; static const struct nfnetlink_subsystem ctnl_subsys = { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 1ea919450fc3..0dc63854390f 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -39,16 +39,13 @@ static int nf_ct_register_sysctl(struct net *net, struct ctl_table_header **header, const char *path, - struct ctl_table *table, - unsigned int *users) + struct ctl_table *table) { if (*header == NULL) { *header = register_net_sysctl(net, path, table); if (*header == NULL) return -ENOMEM; } - if (users != NULL) - (*users)++; return 0; } @@ -56,9 +53,9 @@ nf_ct_register_sysctl(struct net *net, static void nf_ct_unregister_sysctl(struct ctl_table_header **header, struct ctl_table **table, - unsigned int *users) + unsigned int users) { - if (users != NULL && --*users > 0) + if (users > 0) return; unregister_net_sysctl_table(*header); @@ -191,8 +188,7 @@ static int nf_ct_l3proto_register_sysctl(struct net *net, err = nf_ct_register_sysctl(net, &in->ctl_table_header, l3proto->ctl_table_path, - in->ctl_table, - NULL); + in->ctl_table); if (err < 0) { kfree(in->ctl_table); in->ctl_table = NULL; @@ -213,7 +209,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net, if (in->ctl_table_header != NULL) nf_ct_unregister_sysctl(&in->ctl_table_header, &in->ctl_table, - NULL); + 0); #endif } @@ -253,18 +249,23 @@ int nf_conntrack_l3proto_register(struct net *net, { int ret = 0; - if (net == &init_net) - ret = nf_conntrack_l3proto_register_net(proto); + if (proto->init_net) { + ret = proto->init_net(net); + if (ret < 0) + return ret; + } + ret = nf_ct_l3proto_register_sysctl(net, proto); if (ret < 0) return ret; - if (proto->init_net) { - ret = proto->init_net(net); + if (net == &init_net) { + ret = nf_conntrack_l3proto_register_net(proto); if (ret < 0) - return ret; + nf_ct_l3proto_unregister_sysctl(net, proto); } - return nf_ct_l3proto_register_sysctl(net, proto); + + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); @@ -302,93 +303,77 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { - switch (l4proto->l4proto) { - case IPPROTO_TCP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; - case IPPROTO_UDP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; - case IPPROTO_ICMP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; - case IPPROTO_ICMPV6: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6; - case 255: /* l4proto_generic */ - return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; - default: - if (l4proto->net_id) - return net_generic(net, *l4proto->net_id); - else - return NULL; + if (l4proto->get_net_proto) { + /* statically built-in protocols use static per-net */ + return l4proto->get_net_proto(net); + } else if (l4proto->net_id) { + /* ... and loadable protocols use dynamic per-net */ + return net_generic(net, *l4proto->net_id); } return NULL; } static int nf_ct_l4proto_register_sysctl(struct net *net, + struct nf_proto_net *pn, struct nf_conntrack_l4proto *l4proto) { int err = 0; - struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - if (pn == NULL) - return 0; #ifdef CONFIG_SYSCTL if (pn->ctl_table != NULL) { err = nf_ct_register_sysctl(net, &pn->ctl_table_header, "net/netfilter", - pn->ctl_table, - &pn->users); + pn->ctl_table); if (err < 0) { if (!pn->users) { kfree(pn->ctl_table); pn->ctl_table = NULL; } - goto out; } } #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { + if (err < 0) { + nf_ct_kfree_compat_sysctl_table(pn); + goto out; + } err = nf_ct_register_sysctl(net, &pn->ctl_compat_header, "net/ipv4/netfilter", - pn->ctl_compat_table, - NULL); + pn->ctl_compat_table); if (err == 0) goto out; - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; + nf_ct_kfree_compat_sysctl_table(pn); nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, - &pn->users); + pn->users); } -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ out: +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ return err; } static void nf_ct_l4proto_unregister_sysctl(struct net *net, + struct nf_proto_net *pn, struct nf_conntrack_l4proto *l4proto) { - struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - if (pn == NULL) - return; #ifdef CONFIG_SYSCTL if (pn->ctl_table_header != NULL) nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, - &pn->users); + pn->users); #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) nf_ct_unregister_sysctl(&pn->ctl_compat_header, &pn->ctl_compat_table, - NULL); + 0); #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ -#else - pn->users--; #endif /* CONFIG_SYSCTL */ } @@ -454,19 +439,33 @@ int nf_conntrack_l4proto_register(struct net *net, struct nf_conntrack_l4proto *l4proto) { int ret = 0; - if (net == &init_net) - ret = nf_conntrack_l4proto_register_net(l4proto); + struct nf_proto_net *pn = NULL; - if (ret < 0) - return ret; + if (l4proto->init_net) { + ret = l4proto->init_net(net, l4proto->l3proto); + if (ret < 0) + goto out; + } - if (l4proto->init_net) - ret = l4proto->init_net(net); + pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + goto out; + ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto); if (ret < 0) - return ret; + goto out; - return nf_ct_l4proto_register_sysctl(net, l4proto); + if (net == &init_net) { + ret = nf_conntrack_l4proto_register_net(l4proto); + if (ret < 0) { + nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); + goto out; + } + } + + pn->users++; +out: + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); @@ -490,10 +489,18 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) void nf_conntrack_l4proto_unregister(struct net *net, struct nf_conntrack_l4proto *l4proto) { + struct nf_proto_net *pn = NULL; + if (net == &init_net) nf_conntrack_l4proto_unregister_net(l4proto); - nf_ct_l4proto_unregister_sysctl(net, l4proto); + pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + return; + + pn->users--; + nf_ct_l4proto_unregister_sysctl(net, pn, l4proto); + /* Remove all contrack entries for this protocol */ rtnl_lock(); nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); @@ -505,10 +512,15 @@ int nf_conntrack_proto_init(struct net *net) { unsigned int i; int err; - err = nf_conntrack_l4proto_generic.init_net(net); + struct nf_proto_net *pn = nf_ct_l4proto_net(net, + &nf_conntrack_l4proto_generic); + + err = nf_conntrack_l4proto_generic.init_net(net, + nf_conntrack_l4proto_generic.l3proto); if (err < 0) return err; err = nf_ct_l4proto_register_sysctl(net, + pn, &nf_conntrack_l4proto_generic); if (err < 0) return err; @@ -518,13 +530,20 @@ int nf_conntrack_proto_init(struct net *net) rcu_assign_pointer(nf_ct_l3protos[i], &nf_conntrack_l3proto_generic); } + + pn->users++; return 0; } void nf_conntrack_proto_fini(struct net *net) { unsigned int i; + struct nf_proto_net *pn = nf_ct_l4proto_net(net, + &nf_conntrack_l4proto_generic); + + pn->users--; nf_ct_l4proto_unregister_sysctl(net, + pn, &nf_conntrack_l4proto_generic); if (net == &init_net) { /* free l3proto protocol tables */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index c33f76af913f..6535326cf07c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -387,7 +387,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = /* this module per-net specifics */ static int dccp_net_id __read_mostly; struct dccp_net { - struct nf_proto_net np; + struct nf_proto_net pn; int dccp_loose; unsigned int dccp_timeout[CT_DCCP_MAX + 1]; }; @@ -815,16 +815,37 @@ static struct ctl_table dccp_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int dccp_init_net(struct net *net) +static int dccp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct dccp_net *dn) { - struct dccp_net *dn = dccp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)dn; - #ifdef CONFIG_SYSCTL - if (!pn->ctl_table) { -#else - if (!pn->users++) { + if (pn->ctl_table) + return 0; + + pn->ctl_table = kmemdup(dccp_sysctl_table, + sizeof(dccp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; + pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; + pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; + pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; + pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; + pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; + pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; + pn->ctl_table[7].data = &dn->dccp_loose; #endif + return 0; +} + +static int dccp_init_net(struct net *net, u_int16_t proto) +{ + struct dccp_net *dn = dccp_pernet(net); + struct nf_proto_net *pn = &dn->pn; + + if (!pn->users) { /* default values */ dn->dccp_loose = 1; dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; @@ -834,24 +855,9 @@ static int dccp_init_net(struct net *net) dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(dccp_sysctl_table, - sizeof(dccp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; - pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; - pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; - pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; - pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; - pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; - pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; - pn->ctl_table[7].data = &dn->dccp_loose; -#endif } - return 0; + + return dccp_kmemdup_sysctl_table(pn, dn); } static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index bb0e74fe0fae..d25f29377648 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -135,34 +135,62 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int generic_init_net(struct net *net) +static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_generic_net *gn) { - struct nf_generic_net *gn = generic_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)gn; - gn->timeout = nf_ct_generic_timeout; #ifdef CONFIG_SYSCTL pn->ctl_table = kmemdup(generic_sysctl_table, sizeof(generic_sysctl_table), GFP_KERNEL); if (!pn->ctl_table) return -ENOMEM; + pn->ctl_table[0].data = &gn->timeout; +#endif + return 0; +} +static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_generic_net *gn) +{ +#ifdef CONFIG_SYSCTL #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, sizeof(generic_compat_sysctl_table), GFP_KERNEL); - if (!pn->ctl_compat_table) { - kfree(pn->ctl_table); - pn->ctl_table = NULL; + if (!pn->ctl_compat_table) return -ENOMEM; - } + pn->ctl_compat_table[0].data = &gn->timeout; #endif #endif return 0; } +static int generic_init_net(struct net *net, u_int16_t proto) +{ + int ret; + struct nf_generic_net *gn = generic_pernet(net); + struct nf_proto_net *pn = &gn->pn; + + gn->timeout = nf_ct_generic_timeout; + + ret = generic_kmemdup_compat_sysctl_table(pn, gn); + if (ret < 0) + return ret; + + ret = generic_kmemdup_sysctl_table(pn, gn); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + + return ret; +} + +static struct nf_proto_net *generic_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.generic.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, @@ -184,4 +212,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = generic_init_net, + .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5cac41c2fa09..b09b7af7f6f8 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -348,7 +348,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -static int gre_init_net(struct net *net) +static int gre_init_net(struct net *net, u_int16_t proto) { struct netns_proto_gre *net_gre = gre_pernet(net); int i; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8fb0582ad397..c746d61f83ed 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -707,23 +707,10 @@ static struct ctl_table sctp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif -static void sctp_init_net_data(struct sctp_net *sn) -{ - int i; -#ifdef CONFIG_SYSCTL - if (!sn->pn.ctl_table) { -#else - if (!sn->pn.users++) { -#endif - for (i = 0; i < SCTP_CONNTRACK_MAX; i++) - sn->timeouts[i] = sctp_timeouts[i]; - } -} - -static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn) +static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct sctp_net *sn) { #ifdef CONFIG_SYSCTL - struct sctp_net *sn = (struct sctp_net *)pn; if (pn->ctl_table) return 0; @@ -744,11 +731,11 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn) return 0; } -static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct sctp_net *sn) { #ifdef CONFIG_SYSCTL #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct sctp_net *sn = (struct sctp_net *)pn; pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table, sizeof(sctp_compat_sysctl_table), GFP_KERNEL); @@ -767,41 +754,33 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int sctpv4_init_net(struct net *net) +static int sctp_init_net(struct net *net, u_int16_t proto) { int ret; struct sctp_net *sn = sctp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)sn; + struct nf_proto_net *pn = &sn->pn; - sctp_init_net_data(sn); + if (!pn->users) { + int i; - ret = sctp_kmemdup_compat_sysctl_table(pn); - if (ret < 0) - return ret; + for (i = 0; i < SCTP_CONNTRACK_MAX; i++) + sn->timeouts[i] = sctp_timeouts[i]; + } - ret = sctp_kmemdup_sysctl_table(pn); + if (proto == AF_INET) { + ret = sctp_kmemdup_compat_sysctl_table(pn, sn); + if (ret < 0) + return ret; -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (ret < 0) { + ret = sctp_kmemdup_sysctl_table(pn, sn); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + } else + ret = sctp_kmemdup_sysctl_table(pn, sn); - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; - } -#endif -#endif return ret; } -static int sctpv6_init_net(struct net *net) -{ - struct sctp_net *sn = sctp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)sn; - - sctp_init_net_data(sn); - return sctp_kmemdup_sysctl_table(pn); -} - static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, @@ -833,7 +812,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .net_id = &sctp_net_id, - .init_net = sctpv4_init_net, + .init_net = sctp_init_net, }; static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { @@ -867,7 +846,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #endif .net_id = &sctp_net_id, - .init_net = sctpv6_init_net, + .init_net = sctp_init_net, }; static int sctp_net_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 99caa1304477..a5ac11ebef33 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -821,7 +821,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, static unsigned int *tcp_get_timeouts(struct net *net) { - return tcp_timeouts; + return tcp_pernet(net)->timeouts; } /* Returns verdict for packet, or -1 for invalid. */ @@ -1533,11 +1533,10 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn) +static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_tcp_net *tn) { #ifdef CONFIG_SYSCTL - struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; - if (pn->ctl_table) return 0; @@ -1564,11 +1563,11 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn) return 0; } -static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_tcp_net *tn) { #ifdef CONFIG_SYSCTL #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, sizeof(tcp_compat_sysctl_table), GFP_KERNEL); @@ -1593,18 +1592,15 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int tcpv4_init_net(struct net *net) +static int tcp_init_net(struct net *net, u_int16_t proto) { - int i; - int ret = 0; + int ret; struct nf_tcp_net *tn = tcp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)tn; + struct nf_proto_net *pn = &tn->pn; + + if (!pn->users) { + int i; -#ifdef CONFIG_SYSCTL - if (!pn->ctl_table) { -#else - if (!pn->users++) { -#endif for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) tn->timeouts[i] = tcp_timeouts[i]; @@ -1613,43 +1609,23 @@ static int tcpv4_init_net(struct net *net) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - ret = tcp_kmemdup_compat_sysctl_table(pn); - - if (ret < 0) - return ret; + if (proto == AF_INET) { + ret = tcp_kmemdup_compat_sysctl_table(pn, tn); + if (ret < 0) + return ret; - ret = tcp_kmemdup_sysctl_table(pn); + ret = tcp_kmemdup_sysctl_table(pn, tn); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + } else + ret = tcp_kmemdup_sysctl_table(pn, tn); -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (ret < 0) { - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; - } -#endif -#endif return ret; } -static int tcpv6_init_net(struct net *net) +static struct nf_proto_net *tcp_get_net_proto(struct net *net) { - int i; - struct nf_tcp_net *tn = tcp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)tn; - -#ifdef CONFIG_SYSCTL - if (!pn->ctl_table) { -#else - if (!pn->users++) { -#endif - for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) - tn->timeouts[i] = tcp_timeouts[i]; - tn->tcp_loose = nf_ct_tcp_loose; - tn->tcp_be_liberal = nf_ct_tcp_be_liberal; - tn->tcp_max_retrans = nf_ct_tcp_max_retrans; - } - - return tcp_kmemdup_sysctl_table(pn); + return &net->ct.nf_ct_proto.tcp.pn; } struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = @@ -1684,7 +1660,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = tcpv4_init_net, + .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1720,6 +1697,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = tcpv6_init_net, + .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a83cf93545cd..59623cc56e8d 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -235,10 +235,10 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn) +static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_udp_net *un) { #ifdef CONFIG_SYSCTL - struct nf_udp_net *un = (struct nf_udp_net *)pn; if (pn->ctl_table) return 0; pn->ctl_table = kmemdup(udp_sysctl_table, @@ -252,11 +252,11 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn) return 0; } -static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_udp_net *un) { #ifdef CONFIG_SYSCTL #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct nf_udp_net *un = (struct nf_udp_net *)pn; pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, sizeof(udp_compat_sysctl_table), GFP_KERNEL); @@ -270,50 +270,36 @@ static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static void udp_init_net_data(struct nf_udp_net *un) +static int udp_init_net(struct net *net, u_int16_t proto) { - int i; -#ifdef CONFIG_SYSCTL - if (!un->pn.ctl_table) { -#else - if (!un->pn.users++) { -#endif + int ret; + struct nf_udp_net *un = udp_pernet(net); + struct nf_proto_net *pn = &un->pn; + + if (!pn->users) { + int i; + for (i = 0; i < UDP_CT_MAX; i++) un->timeouts[i] = udp_timeouts[i]; } -} - -static int udpv4_init_net(struct net *net) -{ - int ret; - struct nf_udp_net *un = udp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)un; - udp_init_net_data(un); + if (proto == AF_INET) { + ret = udp_kmemdup_compat_sysctl_table(pn, un); + if (ret < 0) + return ret; - ret = udp_kmemdup_compat_sysctl_table(pn); - if (ret < 0) - return ret; + ret = udp_kmemdup_sysctl_table(pn, un); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + } else + ret = udp_kmemdup_sysctl_table(pn, un); - ret = udp_kmemdup_sysctl_table(pn); -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (ret < 0) { - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; - } -#endif -#endif return ret; } -static int udpv6_init_net(struct net *net) +static struct nf_proto_net *udp_get_net_proto(struct net *net) { - struct nf_udp_net *un = udp_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)un; - - udp_init_net_data(un); - return udp_kmemdup_sysctl_table(pn); + return &net->ct.nf_ct_proto.udp.pn; } struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = @@ -343,7 +329,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udpv4_init_net, + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -374,6 +361,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .init_net = udpv6_init_net, + .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index b32e700f8dde..4b66df209286 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -234,29 +234,38 @@ static struct ctl_table udplite_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int udplite_init_net(struct net *net) +static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct udplite_net *un) { - int i; - struct udplite_net *un = udplite_pernet(net); - struct nf_proto_net *pn = (struct nf_proto_net *)un; #ifdef CONFIG_SYSCTL - if (!pn->ctl_table) { -#else - if (!pn->users++) { + if (pn->ctl_table) + return 0; + + pn->ctl_table = kmemdup(udplite_sysctl_table, + sizeof(udplite_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED]; + pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED]; #endif + return 0; +} + +static int udplite_init_net(struct net *net, u_int16_t proto) +{ + struct udplite_net *un = udplite_pernet(net); + struct nf_proto_net *pn = &un->pn; + + if (!pn->users) { + int i; + for (i = 0 ; i < UDPLITE_CT_MAX; i++) un->timeouts[i] = udplite_timeouts[i]; -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(udplite_sysctl_table, - sizeof(udplite_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED]; -#endif } - return 0; + + return udplite_kmemdup_sysctl_table(pn, un); } static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 5a2132b97fe9..a26503342e71 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -178,8 +178,10 @@ replay: err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); return err; + } if (nc->call_rcu) { err = nc->call_rcu(net->nfnl, skb, nlh, @@ -193,9 +195,11 @@ replay: lockdep_is_held(&nfnl_mutex)) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; - else + else if (nc->call) err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); + else + err = -EINVAL; nfnl_unlock(); } if (err == -EAGAIN) diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index a0b64920039d..c0496a55ad0c 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -910,6 +910,11 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); + if (flags >= NFQA_CFG_F_MAX) { + ret = -EOPNOTSUPP; + goto err_out_unlock; + } + spin_lock_bh(&queue->lock); queue->flags &= ~mask; queue->flags |= flags & mask; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ca0c29695d51..474167162947 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -67,7 +67,6 @@ struct teql_master { struct teql_sched_data { struct Qdisc *next; struct teql_master *m; - struct neighbour *ncache; struct sk_buff_head q; }; @@ -134,7 +133,6 @@ teql_reset(struct Qdisc *sch) skb_queue_purge(&dat->q); sch->q.qlen = 0; - teql_neigh_release(xchg(&dat->ncache, NULL)); } static void @@ -166,7 +164,6 @@ teql_destroy(struct Qdisc *sch) } } skb_queue_purge(&dat->q); - teql_neigh_release(xchg(&dat->ncache, NULL)); break; } @@ -225,21 +222,25 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev, struct netdev_queue *txq, - struct neighbour *mn) + struct dst_entry *dst) { - struct teql_sched_data *q = qdisc_priv(txq->qdisc); - struct neighbour *n = q->ncache; + struct neighbour *n; + int err = 0; - if (mn->tbl == NULL) - return -EINVAL; - if (n && n->tbl == mn->tbl && - memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) { - atomic_inc(&n->refcnt); - } else { - n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev); - if (IS_ERR(n)) - return PTR_ERR(n); + n = dst_neigh_lookup_skb(dst, skb); + if (!n) + return -ENOENT; + + if (dst->dev != dev) { + struct neighbour *mn; + + mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); + neigh_release(n); + if (IS_ERR(mn)) + return PTR_ERR(mn); + n = mn; } + if (neigh_event_send(n, skb_res) == 0) { int err; char haddr[MAX_ADDR_LEN]; @@ -248,15 +249,13 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, NULL, skb->len); - if (err < 0) { - neigh_release(n); - return -EINVAL; - } - teql_neigh_release(xchg(&q->ncache, n)); - return 0; + if (err < 0) + err = -EINVAL; + } else { + err = (skb_res == NULL) ? -EAGAIN : 1; } neigh_release(n); - return (skb_res == NULL) ? -EAGAIN : 1; + return err; } static inline int teql_resolve(struct sk_buff *skb, @@ -265,7 +264,6 @@ static inline int teql_resolve(struct sk_buff *skb, struct netdev_queue *txq) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *mn; int res; if (txq->qdisc == &noop_qdisc) @@ -275,8 +273,7 @@ static inline int teql_resolve(struct sk_buff *skb, return 0; rcu_read_lock(); - mn = dst_get_neighbour_noref(dst); - res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; + res = __teql_resolve(skb, skb_res, dev, txq, dst); rcu_read_unlock(); return res; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5bc9ab161b37..b16517ee1aaf 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->peer.sack_needed = 1; asoc->peer.sack_cnt = 0; + asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. diff --git a/net/sctp/output.c b/net/sctp/output.c index f1b7d4bb591e..6ae47acaaec6 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -248,6 +248,11 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; + + if (pkt->transport->sack_generation != + pkt->transport->asoc->peer.sack_generation) + return retval; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a85eeeb55dd0..b6de71efb140 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -734,8 +734,10 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) int len; __u32 ctsn; __u16 num_gabs, num_dup_tsns; + struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + struct sctp_transport *trans; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); @@ -805,6 +807,20 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); + /* Once we have a sack generated, check to see what our sack + * generation is, if its 0, reset the transports to 0, and reset + * the association generation to 1 + * + * The idea is that zero is never used as a valid generation for the + * association so no transport will match after a wrap event like this, + * Until the next sack + */ + if (++aptr->peer.sack_generation == 0) { + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) + trans->sack_generation = 0; + aptr->peer.sack_generation = 1; + } nodata: return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c96d1a81cf42..8716da1a8592 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ error = sctp_tsnmap_mark(&asoc->peer.tsn_map, - cmd->obj.u32); + cmd->obj.u32, NULL); break; case SCTP_CMD_REPORT_FWDTSN: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b026ba0c6992..1dcceb6e0ce6 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memset(&peer->saddr, 0, sizeof(union sctp_addr)); + peer->sack_generation = 0; + /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f1e40cebc981..b5fb7c409023 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, + struct sctp_transport *trans) { u16 gap; @@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) */ map->max_tsn_seen++; map->cumulative_tsn_ack_point++; + if (trans) + trans->sack_generation = + trans->asoc->peer.sack_generation; map->base_tsn++; } else { /* Either we already have a gap, or about to record a gap, so diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8a84017834c2..33d894776192 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * can mark it as received so the tsn_map is updated correctly. */ if (sctp_tsnmap_mark(&asoc->peer.tsn_map, - ntohl(chunk->subh.data_hdr->tsn))) + ntohl(chunk->subh.data_hdr->tsn), + chunk->transport)) goto fail_mark; /* First calculate the padding, so we don't inadvertently diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f2d1de7f2ffb..f5a6a4f4faf7 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk && (freed >= needed)) { __u32 tsn; tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn); + sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); sctp_ulpq_partial_delivery(ulpq, chunk, gfp); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 890b03f8d877..62d0dac8f780 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1014,9 +1014,6 @@ static void xs_udp_data_ready(struct sock *sk, int len) UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); - /* Something worked... */ - dst_confirm(skb_dst(skb)); - xprt_adjust_cwnd(task, copied); xprt_complete_rqst(task, copied); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ccfbd328a69d..6e97855b5842 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1500,9 +1500,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (!dev) goto free_dst; - /* Copy neighbour for reachability confirmation */ - dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour_noref(dst))); - xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); @@ -2404,9 +2401,11 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst) return mtu ? : dst_mtu(dst->path); } -static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { - return dst_neigh_lookup(dst->path, daddr); + return dst->path->ops->neigh_lookup(dst, skb, daddr); } int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) diff --git a/security/security.c b/security/security.c index 3efc9b12aef4..860aeb349cb3 100644 --- a/security/security.c +++ b/security/security.c @@ -23,6 +23,7 @@ #include <linux/mman.h> #include <linux/mount.h> #include <linux/personality.h> +#include <linux/backing-dev.h> #include <net/flow.h> #define MAX_LSM_EVM_XATTR 2 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5ccf10a4d593..aa4c25e0f327 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6688,6 +6688,31 @@ static const struct alc_model_fixup alc662_fixup_models[] = { {} }; +static void alc662_fill_coef(struct hda_codec *codec) +{ + int val, coef; + + coef = alc_get_coef0(codec); + + switch (codec->vendor_id) { + case 0x10ec0662: + if ((coef & 0x00f0) == 0x0030) { + val = alc_read_coef_idx(codec, 0x4); /* EAPD Ctrl */ + alc_write_coef_idx(codec, 0x4, val & ~(1<<10)); + } + break; + case 0x10ec0272: + case 0x10ec0273: + case 0x10ec0663: + case 0x10ec0665: + case 0x10ec0670: + case 0x10ec0671: + case 0x10ec0672: + val = alc_read_coef_idx(codec, 0xd); /* EAPD Ctrl */ + alc_write_coef_idx(codec, 0xd, val | (1<<14)); + break; + } +} /* */ @@ -6707,6 +6732,9 @@ static int patch_alc662(struct hda_codec *codec) alc_fix_pll_init(codec, 0x20, 0x04, 15); + spec->init_hook = alc662_fill_coef; + alc662_fill_coef(codec); + alc_pick_fixup(codec, alc662_fixup_models, alc662_fixup_tbl, alc662_fixups); alc_apply_fixup(codec, ALC_FIXUP_ACT_PRE_PROBE); diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 64d2a4fa34b2..e9b62b5ea637 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -935,9 +935,7 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream, } found: - data = snd_soc_read(codec, AIC3X_PLL_PROGA_REG); - snd_soc_write(codec, AIC3X_PLL_PROGA_REG, - data | (pll_p << PLLP_SHIFT)); + snd_soc_update_bits(codec, AIC3X_PLL_PROGA_REG, PLLP_MASK, pll_p); snd_soc_write(codec, AIC3X_OVRF_STATUS_AND_PLLR_REG, pll_r << PLLR_SHIFT); snd_soc_write(codec, AIC3X_PLL_PROGB_REG, pll_j << PLLJ_SHIFT); diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h index 6f097fb60683..08c7f6685ff0 100644 --- a/sound/soc/codecs/tlv320aic3x.h +++ b/sound/soc/codecs/tlv320aic3x.h @@ -166,6 +166,7 @@ /* PLL registers bitfields */ #define PLLP_SHIFT 0 +#define PLLP_MASK 7 #define PLLQ_SHIFT 3 #define PLLR_SHIFT 0 #define PLLJ_SHIFT 2 diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c index acbdc5fde923..32682c1b7cde 100644 --- a/sound/soc/codecs/wm2200.c +++ b/sound/soc/codecs/wm2200.c @@ -1491,6 +1491,7 @@ static int wm2200_bclk_rates_dat[WM2200_NUM_BCLK_RATES] = { static int wm2200_bclk_rates_cd[WM2200_NUM_BCLK_RATES] = { 5644800, + 3763200, 2882400, 1881600, 1411200, |