diff options
330 files changed, 7678 insertions, 2503 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt index 1a0b704e1a38..da57601153a0 100644 --- a/Documentation/bpf/bpf_devel_QA.txt +++ b/Documentation/bpf/bpf_devel_QA.txt @@ -557,6 +557,14 @@ A: Although LLVM IR generation and optimization try to stay architecture pulls in some header files containing file scope host assembly codes. - You can add "-fno-jump-tables" to work around the switch table issue. - Otherwise, you can use bpf target. + Otherwise, you can use bpf target. Additionally, you _must_ use bpf target + when: + + - Your program uses data structures with pointer or long / unsigned long + types that interface with BPF helpers or context data structures. Access + into these structures is verified by the BPF verifier and may result + in verification failures if the native architecture is not aligned with + the BPF architecture, e.g. 64-bit. An example of this is + BPF_PROG_TYPE_SK_MSG require '-target bpf' Happy BPF hacking! diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 5efae00f6c7f..d2963123eb1c 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -5,6 +5,7 @@ Written 1996 by Gero Kuhlmann <gero@gkminix.han.de> Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz> Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org> Updated 2006 by Horms <horms@verge.net.au> +Updated 2018 by Chris Novakovic <chris@chrisn.me.uk> @@ -79,7 +80,7 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: - <dns0-ip>:<dns1-ip> + <dns0-ip>:<dns1-ip>:<ntp0-ip> This parameter tells the kernel how to configure IP addresses of devices and also how to set up the IP routing table. It was originally called @@ -110,6 +111,9 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: will not be triggered if it is missing and NFS root is not in operation. + Value is exported to /proc/net/pnp with the prefix "bootserver " + (see below). + Default: Determined using autoconfiguration. The address of the autoconfiguration server is used. @@ -123,10 +127,13 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: Default: Determined using autoconfiguration. - <hostname> Name of the client. May be supplied by autoconfiguration, - but its absence will not trigger autoconfiguration. - If specified and DHCP is used, the user provided hostname will - be carried in the DHCP request to hopefully update DNS record. + <hostname> Name of the client. If a '.' character is present, anything + before the first '.' is used as the client's hostname, and anything + after it is used as its NIS domain name. May be supplied by + autoconfiguration, but its absence will not trigger autoconfiguration. + If specified and DHCP is used, the user-provided hostname (and NIS + domain name, if present) will be carried in the DHCP request; this + may cause a DNS record to be created or updated for the client. Default: Client IP address is used in ASCII notation. @@ -162,12 +169,55 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: Default: any - <dns0-ip> IP address of first nameserver. - Value gets exported by /proc/net/pnp which is often linked - on embedded systems by /etc/resolv.conf. + <dns0-ip> IP address of primary nameserver. + Value is exported to /proc/net/pnp with the prefix "nameserver " + (see below). + + Default: None if not using autoconfiguration; determined + automatically if using autoconfiguration. + + <dns1-ip> IP address of secondary nameserver. + See <dns0-ip>. + + <ntp0-ip> IP address of a Network Time Protocol (NTP) server. + Value is exported to /proc/net/ipconfig/ntp_servers, but is + otherwise unused (see below). + + Default: None if not using autoconfiguration; determined + automatically if using autoconfiguration. + + After configuration (whether manual or automatic) is complete, two files + are created in the following format; lines are omitted if their respective + value is empty following configuration: + + - /proc/net/pnp: + + #PROTO: <DHCP|BOOTP|RARP|MANUAL> (depending on configuration method) + domain <dns-domain> (if autoconfigured, the DNS domain) + nameserver <dns0-ip> (primary name server IP) + nameserver <dns1-ip> (secondary name server IP) + nameserver <dns2-ip> (tertiary name server IP) + bootserver <server-ip> (NFS server IP) + + - /proc/net/ipconfig/ntp_servers: + + <ntp0-ip> (NTP server IP) + <ntp1-ip> (NTP server IP) + <ntp2-ip> (NTP server IP) + + <dns-domain> and <dns2-ip> (in /proc/net/pnp) and <ntp1-ip> and <ntp2-ip> + (in /proc/net/ipconfig/ntp_servers) are requested during autoconfiguration; + they cannot be specified as part of the "ip=" kernel command line parameter. + + Because the "domain" and "nameserver" options are recognised by DNS + resolvers, /etc/resolv.conf is often linked to /proc/net/pnp on systems + that use an NFS root filesystem. - <dns1-ip> IP address of second nameserver. - Same as above. + Note that the kernel will not synchronise the system time with any NTP + servers it discovers; this is the responsibility of a user space process + (e.g. an initrd/initramfs script that passes the IP addresses listed in + /proc/net/ipconfig/ntp_servers to an NTP client before mounting the real + root filesystem if it is on NFS). nfsrootdebug diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt index c77f9d57eb91..c4a54c162547 100644 --- a/Documentation/networking/netdev-features.txt +++ b/Documentation/networking/netdev-features.txt @@ -113,6 +113,13 @@ whatever headers there might be. NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6). + * Transmit UDP segmentation offload + +NETIF_F_GSO_UDP_GSO_L4 accepts a single UDP header with a payload that exceeds +gso_size. On segmentation, it segments the payload on gso_size boundaries and +replicates the network and UDP headers (fixing up the last one if less than +gso_size). + * Transmit DMA from high memory On platforms where this is relevant, NETIF_F_HIGHDMA signals that diff --git a/MAINTAINERS b/MAINTAINERS index fc812fb5857a..a52800867850 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1373,7 +1373,8 @@ F: arch/arm/mach-ebsa110/ F: drivers/net/ethernet/amd/am79c961a.* ARM/ENERGY MICRO (SILICON LABS) EFM32 SUPPORT -M: Uwe Kleine-König <kernel@pengutronix.de> +M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> +R: Pengutronix Kernel Team <kernel@pengutronix.de> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained N: efm32 @@ -1401,7 +1402,8 @@ F: arch/arm/mach-footbridge/ ARM/FREESCALE IMX / MXC ARM ARCHITECTURE M: Shawn Guo <shawnguo@kernel.org> -M: Sascha Hauer <kernel@pengutronix.de> +M: Sascha Hauer <s.hauer@pengutronix.de> +R: Pengutronix Kernel Team <kernel@pengutronix.de> R: Fabio Estevam <fabio.estevam@nxp.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -1416,7 +1418,8 @@ F: include/soc/imx/ ARM/FREESCALE VYBRID ARM ARCHITECTURE M: Shawn Guo <shawnguo@kernel.org> -M: Sascha Hauer <kernel@pengutronix.de> +M: Sascha Hauer <s.hauer@pengutronix.de> +R: Pengutronix Kernel Team <kernel@pengutronix.de> R: Stefan Agner <stefan@agner.ch> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -4245,6 +4248,9 @@ F: include/trace/events/fs_dax.h DEVICE DIRECT ACCESS (DAX) M: Dan Williams <dan.j.williams@intel.com> +M: Dave Jiang <dave.jiang@intel.com> +M: Ross Zwisler <ross.zwisler@linux.intel.com> +M: Vishal Verma <vishal.l.verma@intel.com> L: linux-nvdimm@lists.01.org S: Supported F: drivers/dax/ @@ -5651,7 +5657,8 @@ F: drivers/net/ethernet/freescale/fec.h F: Documentation/devicetree/bindings/net/fsl-fec.txt FREESCALE IMX / MXC FRAMEBUFFER DRIVER -M: Sascha Hauer <kernel@pengutronix.de> +M: Sascha Hauer <s.hauer@pengutronix.de> +R: Pengutronix Kernel Team <kernel@pengutronix.de> L: linux-fbdev@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -6263,7 +6270,7 @@ S: Odd Fixes F: drivers/media/usb/hdpvr/ HEWLETT PACKARD ENTERPRISE ILO NMI WATCHDOG DRIVER -M: Jimmy Vance <jimmy.vance@hpe.com> +M: Jerry Hoemann <jerry.hoemann@hpe.com> S: Supported F: Documentation/watchdog/hpwdt.txt F: drivers/watchdog/hpwdt.c @@ -8055,6 +8062,9 @@ F: tools/lib/lockdep/ LIBNVDIMM BLK: MMIO-APERTURE DRIVER M: Ross Zwisler <ross.zwisler@linux.intel.com> +M: Dan Williams <dan.j.williams@intel.com> +M: Vishal Verma <vishal.l.verma@intel.com> +M: Dave Jiang <dave.jiang@intel.com> L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8063,6 +8073,9 @@ F: drivers/nvdimm/region_devs.c LIBNVDIMM BTT: BLOCK TRANSLATION TABLE M: Vishal Verma <vishal.l.verma@intel.com> +M: Dan Williams <dan.j.williams@intel.com> +M: Ross Zwisler <ross.zwisler@linux.intel.com> +M: Dave Jiang <dave.jiang@intel.com> L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8070,6 +8083,9 @@ F: drivers/nvdimm/btt* LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER M: Ross Zwisler <ross.zwisler@linux.intel.com> +M: Dan Williams <dan.j.williams@intel.com> +M: Vishal Verma <vishal.l.verma@intel.com> +M: Dave Jiang <dave.jiang@intel.com> L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported @@ -8085,6 +8101,9 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.txt LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams <dan.j.williams@intel.com> +M: Ross Zwisler <ross.zwisler@linux.intel.com> +M: Vishal Verma <vishal.l.verma@intel.com> +M: Dave Jiang <dave.jiang@intel.com> L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git @@ -12824,7 +12843,8 @@ F: include/linux/siphash.h SIOX M: Gavin Schenk <g.schenk@eckelmann.de> -M: Uwe Kleine-König <kernel@pengutronix.de> +M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> +R: Pengutronix Kernel Team <kernel@pengutronix.de> S: Supported F: drivers/siox/* F: include/trace/events/siox.h @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ba964da31a25..1cb2749a72bf 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -366,7 +366,7 @@ void force_signal_inject(int signal, int code, unsigned long address) } /* Force signals we don't understand to SIGKILL */ - if (WARN_ON(signal != SIGKILL || + if (WARN_ON(signal != SIGKILL && siginfo_layout(signal, code) != SIL_FAULT)) { signal = SIGKILL; } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index dabfc1ecda3d..12145874c02b 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -204,7 +204,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, (void *)mod_shadow_start); @@ -224,7 +224,7 @@ void __init kasan_init(void) kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), (unsigned long)kasan_mem_to_shadow(end), - pfn_to_nid(virt_to_pfn(start))); + early_pfn_to_nid(virt_to_pfn(start))); } /* diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 9033c8194eda..ccc421503363 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - idr_get_cursor(&task_active_pid_ns(current)->idr)); + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index c98b943e58b4..77076a102e34 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3028,10 +3028,27 @@ static struct intel_uncore_type bdx_uncore_cbox = { .format_group = &hswep_uncore_cbox_format_group, }; +static struct intel_uncore_type bdx_uncore_sbox = { + .name = "sbox", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .event_ctl = HSWEP_S0_MSR_PMON_CTL0, + .perf_ctr = HSWEP_S0_MSR_PMON_CTR0, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_SBOX_MSR_OFFSET, + .ops = &hswep_uncore_sbox_msr_ops, + .format_group = &hswep_uncore_sbox_format_group, +}; + +#define BDX_MSR_UNCORE_SBOX 3 + static struct intel_uncore_type *bdx_msr_uncores[] = { &bdx_uncore_ubox, &bdx_uncore_cbox, &hswep_uncore_pcu, + &bdx_uncore_sbox, NULL, }; @@ -3043,10 +3060,25 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { + int pkg = topology_phys_to_logical_pkg(0); + if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + /* BDX-DE doesn't have SBOX */ + if (boot_cpu_data.x86_model == 86) { + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + /* Detect systems with no SBOXes */ + } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { + struct pci_dev *pdev; + u32 capid4; + + pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; + pci_read_config_dword(pdev, 0x94, &capid4); + if (((capid4 >> 6) & 0x3) == 0) + bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + } hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3264,6 +3296,11 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), }, + { /* PCU.3 (for Capability registers) */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + HSWEP_PCI_PCU_3), + }, { /* end: all zeroes */ } }; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 386a6900e206..219faaec51df 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,7 +136,6 @@ #endif #ifndef __ASSEMBLY__ -#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -146,6 +145,5 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif -#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fa4206029e3..21a114914ba4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -749,13 +749,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, extern void enable_sep_cpu(void); extern int sysenter_setup(void); -extern void early_trap_init(void); void early_trap_pf_init(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dde444f932c1..3b20607d581b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -215,6 +215,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) apic_id = processor->local_apic_id; enabled = processor->lapic_flags & ACPI_MADT_ENABLED; + /* Ignore invalid ID */ + if (apic_id == 0xffffffff) + return 0; + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 3182908b7e6c..7326078eaa7a 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -398,11 +398,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel, * little bit simple */ efi_map_sz = efi_get_runtime_map_size(); - efi_map_sz = ALIGN(efi_map_sz, 16); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; params_cmdline_sz = ALIGN(params_cmdline_sz, 16); - kbuf.bufsz = params_cmdline_sz + efi_map_sz + + kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) + sizeof(struct setup_data) + sizeof(struct efi_setup_data); @@ -410,7 +409,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, if (!params) return ERR_PTR(-ENOMEM); efi_map_offset = params_cmdline_sz; - efi_setup_data_offset = efi_map_offset + efi_map_sz; + efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c deleted file mode 100644 index ac7ea3a8242f..000000000000 --- a/arch/x86/kernel/pci-nommu.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Fallback functions when the main IOMMU code is not compiled in. This - code is roughly equivalent to i386. */ -#include <linux/dma-direct.h> -#include <linux/scatterlist.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/pci.h> -#include <linux/mm.h> - -#include <asm/processor.h> -#include <asm/iommu.h> -#include <asm/dma.h> - -#define NOMMU_MAPPING_ERROR 0 - -static int -check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) -{ - if (hwdev && !dma_capable(hwdev, bus, size)) { - if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) - printk(KERN_ERR - "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", - name, (long long)bus, size, - (long long)*hwdev->dma_mask); - return 0; - } - return 1; -} - -static dma_addr_t nommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset; - WARN_ON(size == 0); - if (!check_addr("map_single", dev, bus, size)) - return NOMMU_MAPPING_ERROR; - return bus; -} - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - WARN_ON(nents == 0 || sg[0].length == 0); - - for_each_sg(sg, s, nents, i) { - BUG_ON(!sg_page(s)); - s->dma_address = sg_phys(s); - if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) - return 0; - s->dma_length = s->length; - } - return nents; -} - -static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == NOMMU_MAPPING_ERROR; -} - -const struct dma_map_ops nommu_dma_ops = { - .alloc = dma_generic_alloc_coherent, - .free = dma_generic_free_coherent, - .map_sg = nommu_map_sg, - .map_page = nommu_map_page, - .is_phys = 1, - .mapping_error = nommu_mapping_error, - .dma_supported = x86_dma_supported, -}; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ff99e2b6fc54..45175b81dd5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,8 @@ #include <asm/i8259.h> #include <asm/misc.h> #include <asm/qspinlock.h> +#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -390,15 +392,47 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +/* + * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs. + * + * These are Intel CPUs that enumerate an LLC that is shared by + * multiple NUMA nodes. The LLC on these systems is shared for + * off-package data access but private to the NUMA node (half + * of the package) for on-package access. + * + * CPUID (the source of the information about the LLC) can only + * enumerate the cache as being shared *or* unshared, but not + * this particular configuration. The CPU in this case enumerates + * the cache to be shared across the entire package (spanning both + * NUMA nodes). + */ + +static const struct x86_cpu_id snc_cpu[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X }, + {} +}; + static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; - if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) - return topology_sane(c, o, "llc"); + /* Do not match if we do not have a valid APICID for cpu: */ + if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID) + return false; - return false; + /* Do not match if LLC id does not match: */ + if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2)) + return false; + + /* + * Allow the SNC topology without warning. Return of false + * means 'c' does not share the LLC of 'o'. This will be + * reflected to userspace. + */ + if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu)) + return false; + + return topology_sane(c, o, "llc"); } /* @@ -456,7 +490,8 @@ static struct sched_domain_topology_level x86_topology[] = { /* * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours and Intel Cluster-on-Die have this. + * AMD Magny-Cours, Intel Cluster-on-Die, and Intel + * Sub-NUMA Clustering have this. */ static bool x86_has_numa_in_package; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ef32297ff17e..91e6da48cbb6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -317,7 +317,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) hpet2 -= hpet1; tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); do_div(tmp, 1000000); - do_div(deltatsc, tmp); + deltatsc = div64_u64(deltatsc, tmp); return (unsigned long) deltatsc; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 62a7e9f65dec..cc7ff5957194 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/seq_file.h> +#include <linux/highmem.h> #include <asm/pgtable.h> @@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, pgprotval_t eff_in, unsigned long P) { int i; - pte_t *start; + pte_t *pte; pgprotval_t prot, eff; - start = (pte_t *)pmd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PTE; i++) { - prot = pte_flags(*start); - eff = effective_prot(eff_in, prot); st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); + pte = pte_offset_map(&addr, st->current_address); + prot = pte_flags(*pte); + eff = effective_prot(eff_in, prot); note_page(m, st, __pgprot(prot), eff, 5); - start++; + pte_unmap(pte); } } #ifdef CONFIG_KASAN diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b725154182cc..abce27ceb411 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1027,7 +1027,17 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA: - jmp_offset = addrs[i + insn->off] - addrs[i]; + if (insn->off == -1) + /* -1 jmp instructions will always jump + * backwards two bytes. Explicitly handling + * this case avoids wasting too many passes + * when there are long sequences of replaced + * dead code. + */ + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) /* optimize out nop jumps */ break; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 48b14b534897..ccf4a49bb065 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -98,7 +98,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } else { /* No p4d for 4-level paging: point the pgd to the pud page table */ - pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot)); + pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot)); set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); } diff --git a/drivers/char/random.c b/drivers/char/random.c index e027e7fa1472..3cd3aae24d6d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -427,8 +427,9 @@ struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; -#define crng_ready() (likely(crng_init > 0)) +#define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; +static unsigned long crng_global_init_time = 0; #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE) static void _extract_crng(struct crng_state *crng, __u32 out[CHACHA20_BLOCK_WORDS]); @@ -787,6 +788,36 @@ static void crng_initialize(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } +#ifdef CONFIG_NUMA +static void numa_crng_init(void) +{ + int i; + struct crng_state *crng; + struct crng_state **pool; + + pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); + for_each_online_node(i) { + crng = kmalloc_node(sizeof(struct crng_state), + GFP_KERNEL | __GFP_NOFAIL, i); + spin_lock_init(&crng->lock); + crng_initialize(crng); + pool[i] = crng; + } + mb(); + if (cmpxchg(&crng_node_pool, NULL, pool)) { + for_each_node(i) + kfree(pool[i]); + kfree(pool); + } +} +#else +static void numa_crng_init(void) {} +#endif + +/* + * crng_fast_load() can be called by code in the interrupt service + * path. So we can't afford to dilly-dally. + */ static int crng_fast_load(const char *cp, size_t len) { unsigned long flags; @@ -794,7 +825,7 @@ static int crng_fast_load(const char *cp, size_t len) if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; - if (crng_ready()) { + if (crng_init != 0) { spin_unlock_irqrestore(&primary_crng.lock, flags); return 0; } @@ -813,6 +844,51 @@ static int crng_fast_load(const char *cp, size_t len) return 1; } +/* + * crng_slow_load() is called by add_device_randomness, which has two + * attributes. (1) We can't trust the buffer passed to it is + * guaranteed to be unpredictable (so it might not have any entropy at + * all), and (2) it doesn't have the performance constraints of + * crng_fast_load(). + * + * So we do something more comprehensive which is guaranteed to touch + * all of the primary_crng's state, and which uses a LFSR with a + * period of 255 as part of the mixing algorithm. Finally, we do + * *not* advance crng_init_cnt since buffer we may get may be something + * like a fixed DMI table (for example), which might very well be + * unique to the machine, but is otherwise unvarying. + */ +static int crng_slow_load(const char *cp, size_t len) +{ + unsigned long flags; + static unsigned char lfsr = 1; + unsigned char tmp; + unsigned i, max = CHACHA20_KEY_SIZE; + const char * src_buf = cp; + char * dest_buf = (char *) &primary_crng.state[4]; + + if (!spin_trylock_irqsave(&primary_crng.lock, flags)) + return 0; + if (crng_init != 0) { + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 0; + } + if (len > max) + max = len; + + for (i = 0; i < max ; i++) { + tmp = lfsr; + lfsr >>= 1; + if (tmp & 1) + lfsr ^= 0xE1; + tmp = dest_buf[i % CHACHA20_KEY_SIZE]; + dest_buf[i % CHACHA20_KEY_SIZE] ^= src_buf[i % len] ^ lfsr; + lfsr += (tmp << 3) | (tmp >> 5); + } + spin_unlock_irqrestore(&primary_crng.lock, flags); + return 1; +} + static void crng_reseed(struct crng_state *crng, struct entropy_store *r) { unsigned long flags; @@ -831,7 +907,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) _crng_backtrack_protect(&primary_crng, buf.block, CHACHA20_KEY_SIZE); } - spin_lock_irqsave(&primary_crng.lock, flags); + spin_lock_irqsave(&crng->lock, flags); for (i = 0; i < 8; i++) { unsigned long rv; if (!arch_get_random_seed_long(&rv) && @@ -841,9 +917,10 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) } memzero_explicit(&buf, sizeof(buf)); crng->init_time = jiffies; - spin_unlock_irqrestore(&primary_crng.lock, flags); + spin_unlock_irqrestore(&crng->lock, flags); if (crng == &primary_crng && crng_init < 2) { invalidate_batched_entropy(); + numa_crng_init(); crng_init = 2; process_random_ready_list(); wake_up_interruptible(&crng_init_wait); @@ -856,8 +933,9 @@ static void _extract_crng(struct crng_state *crng, { unsigned long v, flags; - if (crng_init > 1 && - time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) + if (crng_ready() && + (time_after(crng_global_init_time, crng->init_time) || + time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))) crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); spin_lock_irqsave(&crng->lock, flags); if (arch_get_random_long(&v)) @@ -981,10 +1059,8 @@ void add_device_randomness(const void *buf, unsigned int size) unsigned long time = random_get_entropy() ^ jiffies; unsigned long flags; - if (!crng_ready()) { - crng_fast_load(buf, size); - return; - } + if (!crng_ready() && size) + crng_slow_load(buf, size); trace_add_device_randomness(size, _RET_IP_); spin_lock_irqsave(&input_pool.lock, flags); @@ -1139,7 +1215,7 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - if (!crng_ready()) { + if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && crng_fast_load((char *) fast_pool->pool, sizeof(fast_pool->pool))) { @@ -1680,28 +1756,10 @@ static void init_std_data(struct entropy_store *r) */ static int rand_initialize(void) { -#ifdef CONFIG_NUMA - int i; - struct crng_state *crng; - struct crng_state **pool; -#endif - init_std_data(&input_pool); init_std_data(&blocking_pool); crng_initialize(&primary_crng); - -#ifdef CONFIG_NUMA - pool = kcalloc(nr_node_ids, sizeof(*pool), GFP_KERNEL|__GFP_NOFAIL); - for_each_online_node(i) { - crng = kmalloc_node(sizeof(struct crng_state), - GFP_KERNEL | __GFP_NOFAIL, i); - spin_lock_init(&crng->lock); - crng_initialize(crng); - pool[i] = crng; - } - mb(); - crng_node_pool = pool; -#endif + crng_global_init_time = jiffies; return 0; } early_initcall(rand_initialize); @@ -1875,6 +1933,14 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) input_pool.entropy_count = 0; blocking_pool.entropy_count = 0; return 0; + case RNDRESEEDCRNG: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (crng_init < 2) + return -ENODATA; + crng_reseed(&primary_crng, NULL); + crng_global_init_time = jiffies - 1; + return 0; default: return -EINVAL; } @@ -2212,7 +2278,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, { struct entropy_store *poolp = &input_pool; - if (!crng_ready()) { + if (unlikely(crng_init == 0)) { crng_fast_load(buffer, count); return; } diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index 05d97a6871d8..6c8318470b48 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -114,7 +114,7 @@ static int tpm_set_next_event(unsigned long delta, * of writing CNT registers which may cause the min_delta event got * missed, so we need add a ETIME check here in case it happened. */ - return (int)((next - now) <= 0) ? -ETIME : 0; + return (int)(next - now) <= 0 ? -ETIME : 0; } static int tpm_set_state_oneshot(struct clock_event_device *evt) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index be8606457f27..aff2c1594220 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -19,6 +19,7 @@ #include <linux/dax.h> #include <linux/fs.h> #include <linux/mm.h> +#include <linux/mman.h> #include "dax-private.h" #include "dax.h" @@ -540,6 +541,7 @@ static const struct file_operations dax_fops = { .release = dax_release, .get_unmapped_area = dax_get_unmapped_area, .mmap = dax_mmap, + .mmap_supported_flags = MAP_SYNC, }; static void dev_dax_release(struct device *dev) diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 7afbb28d6a0f..1bc5ffb338c8 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -270,7 +270,7 @@ EXPORT_SYMBOL_GPL(dca_remove_requester); * @dev - the device that wants dca service * @cpu - the cpuid as returned by get_cpu() */ -u8 dca_common_get_tag(struct device *dev, int cpu) +static u8 dca_common_get_tag(struct device *dev, int cpu) { struct dca_provider *dca; u8 tag; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index f6cb502c303f..25f064c01038 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -138,13 +138,6 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc) lut = (struct drm_color_lut *)blob->data; lut_size = blob->length / sizeof(struct drm_color_lut); - if (__is_lut_linear(lut, lut_size)) { - /* Set to bypass if lut is set to linear */ - stream->out_transfer_func->type = TF_TYPE_BYPASS; - stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; - return 0; - } - gamma = dc_create_gamma(); if (!gamma) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index add90675fd2a..26fbeafc3c96 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4743,23 +4743,27 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) for (i=0; i < dep_table->count; i++) { if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; - break; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; } } - if (i == dep_table->count) + if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + } dep_table = table_info->vdd_dep_on_sclk; odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); for (i=0; i < dep_table->count; i++) { if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; - break; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; } } - if (i == dep_table->count) + if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } } static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h index fb696e3d06cf..2f8a3b983cce 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h @@ -412,8 +412,10 @@ typedef struct { QuadraticInt_t ReservedEquation2; QuadraticInt_t ReservedEquation3; + uint16_t MinVoltageUlvGfx; + uint16_t MinVoltageUlvSoc; - uint32_t Reserved[15]; + uint32_t Reserved[14]; diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 02a50929af67..e7f4fe2848a5 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -350,19 +350,44 @@ int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type, { uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE; ssize_t ret; + int retry; if (type < DRM_DP_DUAL_MODE_TYPE2_DVI) return 0; - ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, - &tmds_oen, sizeof(tmds_oen)); - if (ret) { - DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n", - enable ? "enable" : "disable"); - return ret; + /* + * LSPCON adapters in low-power state may ignore the first write, so + * read back and verify the written value a few times. + */ + for (retry = 0; retry < 3; retry++) { + uint8_t tmp; + + ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmds_oen, sizeof(tmds_oen)); + if (ret) { + DRM_DEBUG_KMS("Failed to %s TMDS output buffers (%d attempts)\n", + enable ? "enable" : "disable", + retry + 1); + return ret; + } + + ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmp, sizeof(tmp)); + if (ret) { + DRM_DEBUG_KMS("I2C read failed during TMDS output buffer %s (%d attempts)\n", + enable ? "enabling" : "disabling", + retry + 1); + return ret; + } + + if (tmp == tmds_oen) + return 0; } - return 0; + DRM_DEBUG_KMS("I2C write value mismatch during TMDS output buffer %s\n", + enable ? "enabling" : "disabling"); + + return -EIO; } EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 0faaf829f5bf..f0e79178bde6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -18,6 +18,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <uapi/drm/exynos_drm.h> #include "exynos_drm_drv.h" @@ -26,20 +27,6 @@ #include "exynos_drm_iommu.h" #include "exynos_drm_crtc.h" -#define to_exynos_fb(x) container_of(x, struct exynos_drm_fb, fb) - -/* - * exynos specific framebuffer structure. - * - * @fb: drm framebuffer obejct. - * @exynos_gem: array of exynos specific gem object containing a gem object. - */ -struct exynos_drm_fb { - struct drm_framebuffer fb; - struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; - dma_addr_t dma_addr[MAX_FB_BUFFER]; -}; - static int check_fb_gem_memory_type(struct drm_device *drm_dev, struct exynos_drm_gem *exynos_gem) { @@ -66,40 +53,9 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, return 0; } -static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - unsigned int i; - - drm_framebuffer_cleanup(fb); - - for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem); i++) { - struct drm_gem_object *obj; - - if (exynos_fb->exynos_gem[i] == NULL) - continue; - - obj = &exynos_fb->exynos_gem[i]->base; - drm_gem_object_unreference_unlocked(obj); - } - - kfree(exynos_fb); - exynos_fb = NULL; -} - -static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - - return drm_gem_handle_create(file_priv, - &exynos_fb->exynos_gem[0]->base, handle); -} - static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = { - .destroy = exynos_drm_fb_destroy, - .create_handle = exynos_drm_fb_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; struct drm_framebuffer * @@ -108,12 +64,12 @@ exynos_drm_framebuffer_init(struct drm_device *dev, struct exynos_drm_gem **exynos_gem, int count) { - struct exynos_drm_fb *exynos_fb; + struct drm_framebuffer *fb; int i; int ret; - exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL); - if (!exynos_fb) + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (!fb) return ERR_PTR(-ENOMEM); for (i = 0; i < count; i++) { @@ -121,23 +77,21 @@ exynos_drm_framebuffer_init(struct drm_device *dev, if (ret < 0) goto err; - exynos_fb->exynos_gem[i] = exynos_gem[i]; - exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr - + mode_cmd->offsets[i]; + fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, &exynos_fb->fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); - ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs); + ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { DRM_ERROR("failed to initialize framebuffer\n"); goto err; } - return &exynos_fb->fb; + return fb; err: - kfree(exynos_fb); + kfree(fb); return ERR_PTR(ret); } @@ -191,12 +145,13 @@ err: dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index) { - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); + struct exynos_drm_gem *exynos_gem; if (WARN_ON_ONCE(index >= MAX_FB_BUFFER)) return 0; - return exynos_fb->dma_addr[index]; + exynos_gem = to_exynos_gem(fb->obj[index]); + return exynos_gem->dma_addr + fb->offsets[index]; } static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index db6b94dda5df..d85939bd7b47 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1080,6 +1080,7 @@ static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s) { set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt, s->workload->pending_events); + patch_value(s, cmd_ptr(s, 0), MI_NOOP); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index dd96ffc878ac..6d8180e8d1e2 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -169,6 +169,8 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); @@ -267,6 +269,14 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) if (IS_BROADWELL(dev_priv)) vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + /* Disable Primary/Sprite/Cursor plane */ + for_each_pipe(dev_priv, pipe) { + vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE; + vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~CURSOR_MODE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= CURSOR_MODE_DISABLE; + } + vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index b555eb26f9ce..6f4f8e941fc2 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -323,6 +323,7 @@ static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf, struct intel_vgpu_fb_info *fb_info) { gvt_dmabuf->drm_format = fb_info->drm_format; + gvt_dmabuf->drm_format_mod = fb_info->drm_format_mod; gvt_dmabuf->width = fb_info->width; gvt_dmabuf->height = fb_info->height; gvt_dmabuf->stride = fb_info->stride; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 6b50fe78dc1b..1c120683e958 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,16 +245,13 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate primary plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -371,16 +368,13 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate cursor plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -476,16 +470,13 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate sprite plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d29281231507..78e55aafc8bc 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -530,6 +530,16 @@ static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, false, 0, mm->vgpu); } +static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); +} + static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_entry *entry, unsigned long index) { @@ -1818,6 +1828,18 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, return ret; } +static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + + pfn = pte_ops->get_pfn(entry); + if (pfn != vgpu->gvt->gtt.scratch_mfn) + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, + pfn << PAGE_SHIFT); +} + static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { @@ -1844,10 +1866,10 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); - m = e; if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); + m = e; /* one PTE update may be issued in multiple writes and the * first write may not construct a valid gfn @@ -1868,8 +1890,12 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ops->set_pfn(&m, gvt->gtt.scratch_mfn); } else ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); - } else + } else { + ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index); + ggtt_invalidate_pte(vgpu, &m); ops->set_pfn(&m, gvt->gtt.scratch_mfn); + ops->clear_present(&m); + } out: ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); @@ -2030,7 +2056,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return PTR_ERR(gtt->ggtt_mm); } - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, false); return create_scratch_page_tree(vgpu); } @@ -2315,17 +2341,19 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) /** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU + * @invalidate_old: invalidate old entries * * This function is called at the vGPU create stage * to reset all the GGTT entries. * */ -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; + struct intel_gvt_gtt_entry old_entry; u32 index; u32 num_entries; @@ -2334,13 +2362,23 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } ggtt_invalidate(dev_priv); } @@ -2360,5 +2398,5 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) * removing the shadow pages. */ intel_vgpu_destroy_all_ppgtt_mm(vgpu); - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, true); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index a8b369cd352b..3792f2b7f4ff 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -193,7 +193,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old); void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8c5d5d005854..a33c1c3e4a21 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1150,6 +1150,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + /* fall through */ case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); return PTR_ERR_OR_ZERO(mm); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c16a492449d7..1466d8769ec9 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1301,7 +1301,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } - return 0; + return -ENOTTY; } static ssize_t diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 84ca369f15a5..3b4daafebdcb 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1105,30 +1105,32 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = i915_ggtt_probe_hw(dev_priv); if (ret) - return ret; + goto err_perf; - /* WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. */ + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ ret = i915_kick_out_firmware_fb(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_kick_out_vgacon(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting VGA console\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_ggtt_init_hw(dev_priv); if (ret) - return ret; + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; + goto err_ggtt; } pci_set_master(pdev); @@ -1139,7 +1141,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1157,7 +1159,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1190,13 +1192,14 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = intel_gvt_init(dev_priv); if (ret) - goto out_ggtt; + goto err_ggtt; return 0; -out_ggtt: +err_ggtt: i915_ggtt_cleanup_hw(dev_priv); - +err_perf: + i915_perf_fini(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8c170db8495d..0414228cd2b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -728,7 +728,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) err = radix_tree_insert(handles_vma, handle, vma); if (unlikely(err)) { - kfree(lut); + kmem_cache_free(eb->i915->luts, lut); goto err_obj; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d8feb9053e0c..f0519e31543a 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -473,20 +473,37 @@ static u64 get_rc6(struct drm_i915_private *i915) spin_lock_irqsave(&i915->pmu.lock, flags); spin_lock(&kdev->power.lock); - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - i915->pmu.suspended_jiffies_last = - kdev->power.suspended_jiffies; + /* + * After the above branch intel_runtime_pm_get_if_in_use failed + * to get the runtime PM reference we cannot assume we are in + * runtime suspend since we can either: a) race with coming out + * of it before we took the power.lock, or b) there are other + * states than suspended which can bring us here. + * + * We need to double-check that we are indeed currently runtime + * suspended and if not we cannot do better than report the last + * known RC6 value. + */ + if (kdev->power.runtime_status == RPM_SUSPENDED) { + if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) + i915->pmu.suspended_jiffies_last = + kdev->power.suspended_jiffies; - val = kdev->power.suspended_jiffies - - i915->pmu.suspended_jiffies_last; - val += jiffies - kdev->power.accounting_timestamp; + val = kdev->power.suspended_jiffies - + i915->pmu.suspended_jiffies_last; + val += jiffies - kdev->power.accounting_timestamp; - spin_unlock(&kdev->power.lock); + val = jiffies_to_nsecs(val); + val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - val = jiffies_to_nsecs(val); - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } else { + val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; + } + spin_unlock(&kdev->power.lock); spin_unlock_irqrestore(&i915->pmu.lock, flags); } diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 709d6ca68074..3ea566f99450 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -729,7 +729,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_GEN9_BC(dev_priv)) + if (!IS_GEN9(dev_priv)) return; i915_audio_component_get_power(kdev); diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c5c7530ba157..447b721c3be9 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1256,7 +1256,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, return; aux_channel = child->aux_channel; - ddc_pin = child->ddc_pin; is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; @@ -1303,9 +1302,15 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); - - sanitize_ddc_pin(dev_priv, port); + ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin); + if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) { + info->alternate_ddc_pin = ddc_pin; + sanitize_ddc_pin(dev_priv, port); + } else { + DRM_DEBUG_KMS("Port %c has invalid DDC pin %d, " + "sticking to defaults\n", + port_name(port), ddc_pin); + } } if (is_dp) { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697af5add78b..e3a5f673ff67 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -577,6 +577,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * know the next preemption status we see corresponds * to this ELSP update. */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) goto unlock; @@ -738,6 +740,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) memset(port, 0, sizeof(*port)); port++; } + + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -1001,6 +1005,11 @@ static void execlists_submission_tasklet(unsigned long data) if (fw) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); + + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); } static void queue_request(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 2decc8e2c79f..add9cc97a3b6 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -195,6 +195,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) vc4_bo_set_label(obj, -1); if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; @@ -591,6 +592,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index d3f15bf60900..7cf82b071de2 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -942,6 +942,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) fail: kfree(validation_state.branch_targets); if (validated_shader) { + kfree(validated_shader->uniform_addr_offsets); kfree(validated_shader->texture_samples); kfree(validated_shader); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b7b113018853..718e4914e3a0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1660,8 +1660,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, } /* switch(bond_mode) */ #ifdef CONFIG_NET_POLL_CONTROLLER - slave_dev->npinfo = bond->dev->npinfo; - if (slave_dev->npinfo) { + if (bond->dev->npinfo) { if (slave_enable_netpoll(new_slave)) { netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n"); res = -EBUSY; diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index c99e3e845ac0..a90080f12e67 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1074,16 +1074,12 @@ static int amd8111e_calc_coalesce(struct net_device *dev) amd8111e_set_coalesce(dev,TX_INTR_COAL); coal_conf->tx_coal_type = MEDIUM_COALESCE; } - - } - else if(tx_pkt_size >= 1024){ - if (tx_pkt_size >= 1024){ - if(coal_conf->tx_coal_type != HIGH_COALESCE){ - coal_conf->tx_timeout = 4; - coal_conf->tx_event_count = 8; - amd8111e_set_coalesce(dev,TX_INTR_COAL); - coal_conf->tx_coal_type = HIGH_COALESCE; - } + } else if (tx_pkt_size >= 1024) { + if (coal_conf->tx_coal_type != HIGH_COALESCE) { + coal_conf->tx_timeout = 4; + coal_conf->tx_event_count = 8; + amd8111e_set_coalesce(dev, TX_INTR_COAL); + coal_conf->tx_coal_type = HIGH_COALESCE; } } } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 7ea72ef11a55..d272dc6984ac 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1321,6 +1321,10 @@ #define MDIO_VEND2_AN_STAT 0x8002 #endif +#ifndef MDIO_VEND2_PMA_CDR_CONTROL +#define MDIO_VEND2_PMA_CDR_CONTROL 0x8056 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif @@ -1369,6 +1373,10 @@ #define XGBE_AN_CL37_TX_CONFIG_MASK 0x08 #define XGBE_AN_CL37_MII_CTRL_8BIT 0x0100 +#define XGBE_PMA_CDR_TRACK_EN_MASK 0x01 +#define XGBE_PMA_CDR_TRACK_EN_OFF 0x00 +#define XGBE_PMA_CDR_TRACK_EN_ON 0x01 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c index 7d128be61310..b91143947ed2 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c @@ -519,6 +519,22 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata) "debugfs_create_file failed\n"); } + if (pdata->vdata->an_cdr_workaround) { + pfile = debugfs_create_bool("an_cdr_workaround", 0600, + pdata->xgbe_debugfs, + &pdata->debugfs_an_cdr_workaround); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_bool failed\n"); + + pfile = debugfs_create_bool("an_cdr_track_early", 0600, + pdata->xgbe_debugfs, + &pdata->debugfs_an_cdr_track_early); + if (!pfile) + netdev_err(pdata->netdev, + "debugfs_create_bool failed\n"); + } + kfree(buf); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 795e556d4a3f..441d0973957b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -349,6 +349,7 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata) XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1); /* Call MDIO/PHY initialization routine */ + pdata->debugfs_an_cdr_workaround = pdata->vdata->an_cdr_workaround; ret = pdata->phy_if.phy_init(pdata); if (ret) return ret; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 072b9f664597..1b45cd73a258 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -432,11 +432,16 @@ static void xgbe_an73_disable(struct xgbe_prv_data *pdata) xgbe_an73_set(pdata, false, false); xgbe_an73_disable_interrupts(pdata); + pdata->an_start = 0; + netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n"); } static void xgbe_an_restart(struct xgbe_prv_data *pdata) { + if (pdata->phy_if.phy_impl.an_pre) + pdata->phy_if.phy_impl.an_pre(pdata); + switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: @@ -453,6 +458,9 @@ static void xgbe_an_restart(struct xgbe_prv_data *pdata) static void xgbe_an_disable(struct xgbe_prv_data *pdata) { + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + switch (pdata->an_mode) { case XGBE_AN_MODE_CL73: case XGBE_AN_MODE_CL73_REDRV: @@ -505,11 +513,11 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); - if (pdata->phy_if.phy_impl.kr_training_post) - pdata->phy_if.phy_impl.kr_training_post(pdata); - netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); + + if (pdata->phy_if.phy_impl.kr_training_post) + pdata->phy_if.phy_impl.kr_training_post(pdata); } return XGBE_AN_PAGE_RECEIVED; @@ -637,11 +645,11 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) return XGBE_AN_NO_LINK; } - xgbe_an73_disable(pdata); + xgbe_an_disable(pdata); xgbe_switch_mode(pdata); - xgbe_an73_restart(pdata); + xgbe_an_restart(pdata); return XGBE_AN_INCOMPAT_LINK; } @@ -820,6 +828,9 @@ static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata) pdata->an_result = pdata->an_state; pdata->an_state = XGBE_AN_READY; + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } @@ -903,6 +914,9 @@ again: pdata->kx_state = XGBE_RX_BPA; pdata->an_start = 0; + if (pdata->phy_if.phy_impl.an_post) + pdata->phy_if.phy_impl.an_post(pdata); + netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n", xgbe_state_as_string(pdata->an_result)); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index eb23f9ba1a9a..82d1f416ee2a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -456,6 +456,7 @@ static const struct xgbe_version_data xgbe_v2a = { .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, + .an_cdr_workaround = 1, }; static const struct xgbe_version_data xgbe_v2b = { @@ -470,6 +471,7 @@ static const struct xgbe_version_data xgbe_v2b = { .irq_reissue_support = 1, .tx_desc_prefetch = 5, .rx_desc_prefetch = 5, + .an_cdr_workaround = 1, }; static const struct pci_device_id xgbe_pci_table[] = { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 3304a291aa96..aac884314000 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -147,6 +147,14 @@ /* Rate-change complete wait/retry count */ #define XGBE_RATECHANGE_COUNT 500 +/* CDR delay values for KR support (in usec) */ +#define XGBE_CDR_DELAY_INIT 10000 +#define XGBE_CDR_DELAY_INC 10000 +#define XGBE_CDR_DELAY_MAX 100000 + +/* RRC frequency during link status check */ +#define XGBE_RRC_FREQUENCY 10 + enum xgbe_port_mode { XGBE_PORT_MODE_RSVD = 0, XGBE_PORT_MODE_BACKPLANE, @@ -245,6 +253,10 @@ enum xgbe_sfp_speed { #define XGBE_SFP_BASE_VENDOR_SN 4 #define XGBE_SFP_BASE_VENDOR_SN_LEN 16 +#define XGBE_SFP_EXTD_OPT1 1 +#define XGBE_SFP_EXTD_OPT1_RX_LOS BIT(1) +#define XGBE_SFP_EXTD_OPT1_TX_FAULT BIT(3) + #define XGBE_SFP_EXTD_DIAG 28 #define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE BIT(2) @@ -324,6 +336,7 @@ struct xgbe_phy_data { unsigned int sfp_gpio_address; unsigned int sfp_gpio_mask; + unsigned int sfp_gpio_inputs; unsigned int sfp_gpio_rx_los; unsigned int sfp_gpio_tx_fault; unsigned int sfp_gpio_mod_absent; @@ -355,6 +368,10 @@ struct xgbe_phy_data { unsigned int redrv_addr; unsigned int redrv_lane; unsigned int redrv_model; + + /* KR AN support */ + unsigned int phy_cdr_notrack; + unsigned int phy_cdr_delay; }; /* I2C, MDIO and GPIO lines are muxed, so only one device at a time */ @@ -974,6 +991,49 @@ static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata) phy_data->sfp_phy_avail = 1; } +static bool xgbe_phy_check_sfp_rx_los(struct xgbe_phy_data *phy_data) +{ + u8 *sfp_extd = phy_data->sfp_eeprom.extd; + + if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_RX_LOS)) + return false; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_rx_los)) + return true; + + return false; +} + +static bool xgbe_phy_check_sfp_tx_fault(struct xgbe_phy_data *phy_data) +{ + u8 *sfp_extd = phy_data->sfp_eeprom.extd; + + if (!(sfp_extd[XGBE_SFP_EXTD_OPT1] & XGBE_SFP_EXTD_OPT1_TX_FAULT)) + return false; + + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_tx_fault)) + return true; + + return false; +} + +static bool xgbe_phy_check_sfp_mod_absent(struct xgbe_phy_data *phy_data) +{ + if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) + return false; + + if (phy_data->sfp_gpio_inputs & (1 << phy_data->sfp_gpio_mod_absent)) + return true; + + return false; +} + static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; @@ -1019,6 +1079,10 @@ static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata) if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP) return; + /* Update transceiver signals (eeprom extd/options) */ + phy_data->sfp_tx_fault = xgbe_phy_check_sfp_tx_fault(phy_data); + phy_data->sfp_rx_los = xgbe_phy_check_sfp_rx_los(phy_data); + if (xgbe_phy_sfp_parse_quirks(pdata)) return; @@ -1184,7 +1248,6 @@ put: static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int gpio_input; u8 gpio_reg, gpio_ports[2]; int ret; @@ -1199,23 +1262,9 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata) return; } - gpio_input = (gpio_ports[1] << 8) | gpio_ports[0]; - - if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) { - /* No GPIO, just assume the module is present for now */ - phy_data->sfp_mod_absent = 0; - } else { - if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent))) - phy_data->sfp_mod_absent = 0; - } - - if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) && - (gpio_input & (1 << phy_data->sfp_gpio_rx_los))) - phy_data->sfp_rx_los = 1; + phy_data->sfp_gpio_inputs = (gpio_ports[1] << 8) | gpio_ports[0]; - if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) && - (gpio_input & (1 << phy_data->sfp_gpio_tx_fault))) - phy_data->sfp_tx_fault = 1; + phy_data->sfp_mod_absent = xgbe_phy_check_sfp_mod_absent(phy_data); } static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata) @@ -2361,7 +2410,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 1; /* No link, attempt a receiver reset cycle */ - if (phy_data->rrc_count++) { + if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { phy_data->rrc_count = 0; xgbe_phy_rrc(pdata); } @@ -2669,6 +2718,103 @@ static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata) return true; } +static void xgbe_phy_cdr_track(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (!pdata->debugfs_an_cdr_workaround) + return; + + if (!phy_data->phy_cdr_notrack) + return; + + usleep_range(phy_data->phy_cdr_delay, + phy_data->phy_cdr_delay + 500); + + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, + XGBE_PMA_CDR_TRACK_EN_MASK, + XGBE_PMA_CDR_TRACK_EN_ON); + + phy_data->phy_cdr_notrack = 0; +} + +static void xgbe_phy_cdr_notrack(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + if (!pdata->debugfs_an_cdr_workaround) + return; + + if (phy_data->phy_cdr_notrack) + return; + + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_CDR_CONTROL, + XGBE_PMA_CDR_TRACK_EN_MASK, + XGBE_PMA_CDR_TRACK_EN_OFF); + + xgbe_phy_rrc(pdata); + + phy_data->phy_cdr_notrack = 1; +} + +static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata) +{ + if (!pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); +} + +static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata) +{ + if (pdata->debugfs_an_cdr_track_early) + xgbe_phy_cdr_track(pdata); +} + +static void xgbe_phy_an_post(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + if (phy_data->cur_mode != XGBE_MODE_KR) + break; + + xgbe_phy_cdr_track(pdata); + + switch (pdata->an_result) { + case XGBE_AN_READY: + case XGBE_AN_COMPLETE: + break; + default: + if (phy_data->phy_cdr_delay < XGBE_CDR_DELAY_MAX) + phy_data->phy_cdr_delay += XGBE_CDR_DELAY_INC; + else + phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; + break; + } + break; + default: + break; + } +} + +static void xgbe_phy_an_pre(struct xgbe_prv_data *pdata) +{ + struct xgbe_phy_data *phy_data = pdata->phy_data; + + switch (pdata->an_mode) { + case XGBE_AN_MODE_CL73: + case XGBE_AN_MODE_CL73_REDRV: + if (phy_data->cur_mode != XGBE_MODE_KR) + break; + + xgbe_phy_cdr_notrack(pdata); + break; + default: + break; + } +} + static void xgbe_phy_stop(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; @@ -2680,6 +2826,9 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) xgbe_phy_sfp_reset(phy_data); xgbe_phy_sfp_mod_absent(pdata); + /* Reset CDR support */ + xgbe_phy_cdr_track(pdata); + /* Power off the PHY */ xgbe_phy_power_off(pdata); @@ -2712,6 +2861,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata) /* Start in highest supported mode */ xgbe_phy_set_mode(pdata, phy_data->start_mode); + /* Reset CDR support */ + xgbe_phy_cdr_track(pdata); + /* After starting the I2C controller, we can check for an SFP */ switch (phy_data->port_mode) { case XGBE_PORT_MODE_SFP: @@ -3019,6 +3171,8 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) } } + phy_data->phy_cdr_delay = XGBE_CDR_DELAY_INIT; + /* Register for driving external PHYs */ mii = devm_mdiobus_alloc(pdata->dev); if (!mii) { @@ -3071,4 +3225,10 @@ void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if) phy_impl->an_advertising = xgbe_phy_an_advertising; phy_impl->an_outcome = xgbe_phy_an_outcome; + + phy_impl->an_pre = xgbe_phy_an_pre; + phy_impl->an_post = xgbe_phy_an_post; + + phy_impl->kr_training_pre = xgbe_phy_kr_training_pre; + phy_impl->kr_training_post = xgbe_phy_kr_training_post; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ad102c8bac7b..95d4b56448c6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -833,6 +833,7 @@ struct xgbe_hw_if { /* This structure represents implementation specific routines for an * implementation of a PHY. All routines are required unless noted below. * Optional routines: + * an_pre, an_post * kr_training_pre, kr_training_post */ struct xgbe_phy_impl_if { @@ -875,6 +876,10 @@ struct xgbe_phy_impl_if { /* Process results of auto-negotiation */ enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *); + /* Pre/Post auto-negotiation support */ + void (*an_pre)(struct xgbe_prv_data *); + void (*an_post)(struct xgbe_prv_data *); + /* Pre/Post KR training enablement support */ void (*kr_training_pre)(struct xgbe_prv_data *); void (*kr_training_post)(struct xgbe_prv_data *); @@ -989,6 +994,7 @@ struct xgbe_version_data { unsigned int irq_reissue_support; unsigned int tx_desc_prefetch; unsigned int rx_desc_prefetch; + unsigned int an_cdr_workaround; }; struct xgbe_vxlan_data { @@ -1257,6 +1263,9 @@ struct xgbe_prv_data { unsigned int debugfs_xprop_reg; unsigned int debugfs_xi2c_reg; + + bool debugfs_an_cdr_workaround; + bool debugfs_an_cdr_track_early; }; /* Function prototypes*/ diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9a3c1a76d5d..effc651a2a2f 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -654,7 +654,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev, pkts = priv->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !priv->dim.use_dim) { - moder = net_dim_get_def_profile(priv->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(priv->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1064,7 +1064,7 @@ static void bcm_sysport_dim_work(struct work_struct *work) struct bcm_sysport_priv *priv = container_of(ndim, struct bcm_sysport_priv, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -1437,7 +1437,7 @@ static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index 408dd190331e..afa97c8bb081 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -21,11 +21,11 @@ void bnxt_dim_work(struct work_struct *work) struct bnxt_napi *bnapi = container_of(cpr, struct bnxt_napi, cp_ring); - struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode, - dim->profile_ix); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - cpr->rx_ring_coal.coal_ticks = cur_profile.usec; - cpr->rx_ring_coal.coal_bufs = cur_profile.pkts; + cpr->rx_ring_coal.coal_ticks = cur_moder.usec; + cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); dim->state = NET_DIM_START_MEASURE; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 0445f2c0c629..20c1681bb1af 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -652,7 +652,7 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, pkts = ring->rx_max_coalesced_frames; if (ec->use_adaptive_rx_coalesce && !ring->dim.use_dim) { - moder = net_dim_get_def_profile(ring->dim.dim.mode); + moder = net_dim_get_def_rx_moderation(ring->dim.dim.mode); usecs = moder.usec; pkts = moder.pkts; } @@ -1925,7 +1925,7 @@ static void bcmgenet_dim_work(struct work_struct *work) struct bcmgenet_rx_ring *ring = container_of(ndim, struct bcmgenet_rx_ring, dim); struct net_dim_cq_moder cur_profile = - net_dim_get_profile(dim->mode, dim->profile_ix); + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); dim->state = NET_DIM_START_MEASURE; @@ -2102,7 +2102,7 @@ static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) /* If DIM was enabled, re-apply default parameters */ if (dim->use_dim) { - moder = net_dim_get_def_profile(dim->dim.mode); + moder = net_dim_get_def_rx_moderation(dim->dim.mode); usecs = moder.usec; pkts = moder.pkts; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 2adafa366d3f..ddd7431579f4 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -201,13 +201,14 @@ lio_vf_rep_get_stats64(struct net_device *dev, { struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); - stats64->tx_packets = vf_rep->stats.tx_packets; - stats64->tx_bytes = vf_rep->stats.tx_bytes; - stats64->tx_dropped = vf_rep->stats.tx_dropped; - - stats64->rx_packets = vf_rep->stats.rx_packets; - stats64->rx_bytes = vf_rep->stats.rx_bytes; - stats64->rx_dropped = vf_rep->stats.rx_dropped; + /* Swap tx and rx stats as VF rep is a switch port */ + stats64->tx_packets = vf_rep->stats.rx_packets; + stats64->tx_bytes = vf_rep->stats.rx_bytes; + stats64->tx_dropped = vf_rep->stats.rx_dropped; + + stats64->rx_packets = vf_rep->stats.tx_packets; + stats64->rx_bytes = vf_rep->stats.tx_bytes; + stats64->rx_dropped = vf_rep->stats.tx_dropped; } static int diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 6552d68ea6e1..ce6e24c74978 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1391,12 +1391,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params) /* FM_WRONG_RESET_VALUES_ERRATA_FMAN_A005127 Errata * workaround */ - if (port->rev_info.major >= 6) { - u32 reg; + u32 reg; - reg = 0x00001013; - iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp); - } + reg = 0x00001013; + iowrite32be(reg, &port->bmi_regs->tx.fmbm_tfp); } return 0; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2df01ad98df7..6e8d6a6f6aaf 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1128,7 +1128,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter) if (!adapter->rx_pool) return; - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); + rx_scrqs = adapter->num_active_rx_pools; rx_entries = adapter->req_rx_add_entries_per_subcrq; /* Free any remaining skbs in the rx buffer pools */ @@ -1177,7 +1177,7 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter) if (!adapter->tx_pool || !adapter->tso_pool) return; - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); + tx_scrqs = adapter->num_active_tx_pools; /* Free any remaining skbs in the tx buffer pools */ for (i = 0; i < tx_scrqs; i++) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index df8607097e4a..c51d61f5f715 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -445,15 +445,14 @@ static void fm10k_type_trans(struct fm10k_ring *rx_ring, l2_accel = NULL; } - skb->protocol = eth_type_trans(skb, dev); - /* Record Rx queue, or update macvlan statistics */ if (!l2_accel) skb_record_rx_queue(skb, rx_ring->queue_index); else macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, - (skb->pkt_type == PACKET_BROADCAST) || - (skb->pkt_type == PACKET_MULTICAST)); + false); + + skb->protocol = eth_type_trans(skb, dev); } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 45793491d4ba..26e749766337 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -22,6 +22,7 @@ #include "fm10k.h" #include <linux/vmalloc.h> #include <net/udp_tunnel.h> +#include <linux/if_macvlan.h> /** * fm10k_setup_tx_resources - allocate Tx resources (Descriptors) @@ -1254,7 +1255,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) glort = l2_accel->dglort + 1 + i; hw->mac.ops.update_xcast_mode(hw, glort, - FM10K_XCAST_MODE_MULTI); + FM10K_XCAST_MODE_NONE); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, hw->mac.default_vid, true); @@ -1449,6 +1450,13 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, int size = 0, i; u16 glort; + /* The hardware supported by fm10k only filters on the destination MAC + * address. In order to avoid issues we only support offloading modes + * where the hardware can actually provide the functionality. + */ + if (!macvlan_supports_dest_filter(sdev)) + return ERR_PTR(-EMEDIUMTYPE); + /* allocate l2 accel structure if it is not available */ if (!l2_accel) { /* verify there is enough free GLORTs to support l2_accel */ @@ -1515,7 +1523,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, - FM10K_XCAST_MODE_MULTI); + FM10K_XCAST_MODE_NONE); fm10k_queue_mac_request(interface, glort, sdev->dev_addr, hw->mac.default_vid, true); } diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 5b13ca1bd85f..7dc5f045e969 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -586,7 +586,7 @@ struct ice_sw_rule_lg_act { #define ICE_LG_ACT_MIRROR_VSI_ID_S 3 #define ICE_LG_ACT_MIRROR_VSI_ID_M (0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S) - /* Action type = 5 - Large Action */ + /* Action type = 5 - Generic Value */ #define ICE_LG_ACT_GENERIC 0x5 #define ICE_LG_ACT_GENERIC_VALUE_S 3 #define ICE_LG_ACT_GENERIC_VALUE_M (0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 21977ec984c4..71d032cc5fa7 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -78,6 +78,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, struct ice_aq_desc desc; enum ice_status status; u16 flags; + u8 i; cmd = &desc.params.mac_read; @@ -98,8 +99,16 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, return ICE_ERR_CFG; } - ether_addr_copy(hw->port_info->mac.lan_addr, resp->mac_addr); - ether_addr_copy(hw->port_info->mac.perm_addr, resp->mac_addr); + /* A single port can report up to two (LAN and WoL) addresses */ + for (i = 0; i < cmd->num_addr; i++) + if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) { + ether_addr_copy(hw->port_info->mac.lan_addr, + resp[i].mac_addr); + ether_addr_copy(hw->port_info->mac.perm_addr, + resp[i].mac_addr); + break; + } + return 0; } @@ -464,9 +473,12 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; - /* Get port MAC information */ - mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp); - mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL); + /* Get MAC information */ + /* A single port can report up to two (LAN and WoL) addresses */ + mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, + sizeof(struct ice_aqc_manage_mac_read_resp), + GFP_KERNEL); + mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp); if (!mac_buf) { status = ICE_ERR_NO_MEMORY; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 1b9e2ef48a9d..499904874b3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -121,8 +121,6 @@ #define PFINT_FW_CTL_CAUSE_ENA_S 30 #define PFINT_FW_CTL_CAUSE_ENA_M BIT(PFINT_FW_CTL_CAUSE_ENA_S) #define PFINT_OICR 0x0016CA00 -#define PFINT_OICR_INTEVENT_S 0 -#define PFINT_OICR_INTEVENT_M BIT(PFINT_OICR_INTEVENT_S) #define PFINT_OICR_HLP_RDY_S 14 #define PFINT_OICR_HLP_RDY_M BIT(PFINT_OICR_HLP_RDY_S) #define PFINT_OICR_CPM_RDY_S 15 diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 210b7910f1cd..5299caf55a7f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1722,9 +1722,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); - if (!(oicr & PFINT_OICR_INTEVENT_M)) - goto ena_intr; - if (oicr & PFINT_OICR_GRST_M) { u32 reset; /* we have a reset warning */ @@ -1782,7 +1779,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } ret = IRQ_HANDLED; -ena_intr: /* re-enable interrupt causes that are not handled during this pass */ wr32(hw, PFINT_OICR_ENA, ena_mask); if (!test_bit(__ICE_DOWN, pf->state)) { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index f16ff3e4a840..2e6c1d92cc88 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -751,14 +751,14 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, u16 num_added = 0; u32 temp; + *num_nodes_added = 0; + if (!num_nodes) return status; if (!parent || layer < hw->sw_entry_point_layer) return ICE_ERR_PARAM; - *num_nodes_added = 0; - /* max children per node per layer */ max_child_nodes = le16_to_cpu(hw->layer_info[parent->tx_sched_layer].max_children); diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 98534f765e0e..d3d1d868e7ba 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -491,6 +491,8 @@ * manageability enabled, allowing us room for 15 multicast addresses. */ #define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define E1000_RAH_ASEL_SRC_ADDR 0x00010000 +#define E1000_RAH_QSEL_ENABLE 0x10000000 #define E1000_RAL_MAC_ADDR_LEN 4 #define E1000_RAH_MAC_ADDR_LEN 2 #define E1000_RAH_POOL_MASK 0x03FC0000 diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 8dbc399b345e..990a7fb32e4e 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -442,6 +442,8 @@ struct hwmon_buff { enum igb_filter_match_flags { IGB_FILTER_FLAG_ETHER_TYPE = 0x1, IGB_FILTER_FLAG_VLAN_TCI = 0x2, + IGB_FILTER_FLAG_SRC_MAC_ADDR = 0x4, + IGB_FILTER_FLAG_DST_MAC_ADDR = 0x8, }; #define IGB_MAX_RXNFC_FILTERS 16 @@ -456,11 +458,14 @@ struct igb_nfc_input { u8 match_flags; __be16 etype; __be16 vlan_tci; + u8 src_addr[ETH_ALEN]; + u8 dst_addr[ETH_ALEN]; }; struct igb_nfc_filter { struct hlist_node nfc_node; struct igb_nfc_input filter; + unsigned long cookie; u16 etype_reg_index; u16 sw_idx; u16 action; @@ -474,6 +479,8 @@ struct igb_mac_addr { #define IGB_MAC_STATE_DEFAULT 0x1 #define IGB_MAC_STATE_IN_USE 0x2 +#define IGB_MAC_STATE_SRC_ADDR 0x4 +#define IGB_MAC_STATE_QUEUE_STEERING 0x8 /* board specific private data structure */ struct igb_adapter { @@ -598,6 +605,7 @@ struct igb_adapter { /* RX network flow classification support */ struct hlist_head nfc_filter_list; + struct hlist_head cls_flower_list; unsigned int nfc_filter_count; /* lock for RX network flow classification filter */ spinlock_t nfc_lock; @@ -739,4 +747,9 @@ int igb_add_filter(struct igb_adapter *adapter, int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input); +int igb_add_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); +int igb_del_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); + #endif /* _IGB_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index e77ba0d5866d..6697c273ab59 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2495,6 +2495,23 @@ static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, fsp->h_ext.vlan_tci = rule->filter.vlan_tci; fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); } + if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + rule->filter.dst_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); + } + if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_source, + rule->filter.src_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_source); + } + return 0; } return -EINVAL; @@ -2768,14 +2785,41 @@ static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) { + struct e1000_hw *hw = &adapter->hw; int err = -EINVAL; + if (hw->mac.type == e1000_i210 && + !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) { + dev_err(&adapter->pdev->dev, + "i210 doesn't support flow classification rules specifying only source addresses.\n"); + return -EOPNOTSUPP; + } + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { err = igb_rxnfc_write_etype_filter(adapter, input); if (err) return err; } + if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) { + err = igb_add_mac_steering_filter(adapter, + input->filter.dst_addr, + input->action, 0); + err = min_t(int, err, 0); + if (err) + return err; + } + + if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) { + err = igb_add_mac_steering_filter(adapter, + input->filter.src_addr, + input->action, + IGB_MAC_STATE_SRC_ADDR); + err = min_t(int, err, 0); + if (err) + return err; + } + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) err = igb_rxnfc_write_vlan_prio_filter(adapter, input); @@ -2824,6 +2868,15 @@ int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) igb_clear_vlan_prio_filter(adapter, ntohs(input->filter.vlan_tci)); + if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) + igb_del_mac_steering_filter(adapter, input->filter.src_addr, + input->action, + IGB_MAC_STATE_SRC_ADDR); + + if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) + igb_del_mac_steering_filter(adapter, input->filter.dst_addr, + input->action, 0); + return 0; } @@ -2865,7 +2918,7 @@ static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_behind(&parent->nfc_node, &input->nfc_node); + hlist_add_behind(&input->nfc_node, &parent->nfc_node); else hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); @@ -2905,10 +2958,6 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) return -EINVAL; - if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK && - fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) - return -EINVAL; - input = kzalloc(sizeof(*input), GFP_KERNEL); if (!input) return -ENOMEM; @@ -2918,6 +2967,20 @@ static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; } + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) { + input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, + fsp->h_u.ether_spec.h_source); + } + + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) { + input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, + fsp->h_u.ether_spec.h_dest); + } + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { err = -EINVAL; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c1c0bc30a16d..3ce43207a35f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -36,6 +36,7 @@ #include <net/checksum.h> #include <net/ip6_checksum.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <linux/net_tstamp.h> #include <linux/mii.h> #include <linux/ethtool.h> @@ -1700,7 +1701,22 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue, WARN_ON(hw->mac.type != e1000_i210); WARN_ON(queue < 0 || queue > 1); - if (enable) { + if (enable || queue == 0) { + /* i210 does not allow the queue 0 to be in the Strict + * Priority mode while the Qav mode is enabled, so, + * instead of disabling strict priority mode, we give + * queue 0 the maximum of credits possible. + * + * See section 8.12.19 of the i210 datasheet, "Note: + * Queue0 QueueMode must be set to 1b when + * TransmitMode is set to Qav." + */ + if (queue == 0 && !enable) { + /* max "linkspeed" idleslope in kbps */ + idleslope = 1000000; + hicredit = ETH_FRAME_LEN; + } + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); @@ -2498,6 +2514,250 @@ static int igb_offload_cbs(struct igb_adapter *adapter, return 0; } +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +#define VLAN_PRIO_FULL_MASK (0x07) + +static int igb_parse_cls_flower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *f, + int traffic_class, + struct igb_nfc_filter *input) +{ + struct netlink_ext_ack *extack = f->common.extack; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported"); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + if (!is_zero_ether_addr(mask->dst)) { + if (!is_broadcast_ether_addr(mask->dst)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, key->dst); + } + + if (!is_zero_ether_addr(mask->src)) { + if (!is_broadcast_ether_addr(mask->src)) { + NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address"); + return -EINVAL; + } + + input->filter.match_flags |= + IGB_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + + if (mask->n_proto) { + if (mask->n_proto != ETHER_TYPE_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE; + input->filter.etype = key->n_proto; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key, *mask; + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_priority) { + if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority"); + return -EINVAL; + } + + input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; + input->filter.vlan_tci = key->vlan_priority; + } + } + + input->action = traffic_class; + input->cookie = f->cookie; + + return 0; +} + +static int igb_configure_clsflower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct netlink_ext_ack *extack = cls_flower->common.extack; + struct igb_nfc_filter *filter, *f; + int err, tc; + + tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + if (tc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + err = igb_parse_cls_flower(adapter, cls_flower, tc, filter); + if (err < 0) + goto err_parse; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in ethtool"); + goto err_locked; + } + } + + hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) { + if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) { + err = -EEXIST; + NL_SET_ERR_MSG_MOD(extack, + "This filter is already set in cls_flower"); + goto err_locked; + } + } + + err = igb_add_filter(adapter, filter); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter"); + goto err_locked; + } + + hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list); + + spin_unlock(&adapter->nfc_lock); + + return 0; + +err_locked: + spin_unlock(&adapter->nfc_lock); + +err_parse: + kfree(filter); + + return err; +} + +static int igb_delete_clsflower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct igb_nfc_filter *filter; + int err; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node) + if (filter->cookie == cls_flower->cookie) + break; + + if (!filter) { + err = -ENOENT; + goto out; + } + + err = igb_erase_filter(adapter, filter); + if (err < 0) + goto out; + + hlist_del(&filter->nfc_node); + kfree(filter); + +out: + spin_unlock(&adapter->nfc_lock); + + return err; +} + +static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return igb_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return igb_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct igb_adapter *adapter = cb_priv; + + if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return igb_setup_tc_cls_flower(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int igb_setup_tc_block(struct igb_adapter *adapter, + struct tc_block_offload *f) +{ + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, igb_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -2506,6 +2766,8 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); + case TC_SETUP_BLOCK: + return igb_setup_tc_block(adapter, type_data); default: return -EOPNOTSUPP; @@ -2807,6 +3069,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->mac.type >= e1000_82576) netdev->features |= NETIF_F_SCTP_CRC; + if (hw->mac.type >= e1000_i350) + netdev->features |= NETIF_F_HW_TC; + #define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ @@ -6841,8 +7106,35 @@ static void igb_set_default_mac_filter(struct igb_adapter *adapter) igb_rar_set_index(adapter, 0); } -static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, - const u8 queue) +/* If the filter to be added and an already existing filter express + * the same address and address type, it should be possible to only + * override the other configurations, for example the queue to steer + * traffic. + */ +static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry, + const u8 *addr, const u8 flags) +{ + if (!(entry->state & IGB_MAC_STATE_IN_USE)) + return true; + + if ((entry->state & IGB_MAC_STATE_SRC_ADDR) != + (flags & IGB_MAC_STATE_SRC_ADDR)) + return false; + + if (!ether_addr_equal(addr, entry->addr)) + return false; + + return true; +} + +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_add_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) { struct e1000_hw *hw = &adapter->hw; int rar_entries = hw->mac.rar_entry_count - @@ -6857,12 +7149,13 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, * addresses. */ for (i = 0; i < rar_entries; i++) { - if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) + if (!igb_mac_entry_can_be_used(&adapter->mac_table[i], + addr, flags)) continue; ether_addr_copy(adapter->mac_table[i].addr, addr); adapter->mac_table[i].queue = queue; - adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE; + adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags; igb_rar_set_index(adapter, i); return i; @@ -6871,9 +7164,22 @@ static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, return -ENOSPC; } -static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, +static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr, const u8 queue) { + return igb_add_mac_filter_flags(adapter, addr, queue, 0); +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGB_MAC_STATE_SRC_ADDR can be used. + */ +static int igb_del_mac_filter_flags(struct igb_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ struct e1000_hw *hw = &adapter->hw; int rar_entries = hw->mac.rar_entry_count - adapter->vfs_allocated_count; @@ -6889,14 +7195,26 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, for (i = 0; i < rar_entries; i++) { if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)) continue; + if ((adapter->mac_table[i].state & flags) != flags) + continue; if (adapter->mac_table[i].queue != queue) continue; if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) continue; - adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE; - memset(adapter->mac_table[i].addr, 0, ETH_ALEN); - adapter->mac_table[i].queue = 0; + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE; + adapter->mac_table[i].queue = + adapter->vfs_allocated_count; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + } igb_rar_set_index(adapter, i); return 0; @@ -6905,6 +7223,34 @@ static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, return -ENOENT; } +static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr, + const u8 queue) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, 0); +} + +int igb_add_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + struct e1000_hw *hw = &adapter->hw; + + /* In theory, this should be supported on 82575 as well, but + * that part wasn't easily accessible during development. + */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + return igb_add_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + +int igb_del_mac_steering_filter(struct igb_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + return igb_del_mac_filter_flags(adapter, addr, queue, + IGB_MAC_STATE_QUEUE_STEERING | flags); +} + static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -8748,12 +9094,24 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index) if (is_valid_ether_addr(addr)) rar_high |= E1000_RAH_AV; - if (hw->mac.type == e1000_82575) + if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR) + rar_high |= E1000_RAH_ASEL_SRC_ADDR; + + switch (hw->mac.type) { + case e1000_82575: + case e1000_i210: + if (adapter->mac_table[index].state & + IGB_MAC_STATE_QUEUE_STEERING) + rar_high |= E1000_RAH_QSEL_ENABLE; + rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; - else + break; + default: rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; + break; + } } wr32(E1000_RAL(index), rar_low); @@ -9191,6 +9549,9 @@ static void igb_nfc_filter_exit(struct igb_adapter *adapter) hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) igb_erase_filter(adapter, rule); + hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) + igb_erase_filter(adapter, rule); + spin_unlock(&adapter->nfc_lock); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 7dd5038cfcc4..8fccca57cd6a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -305,7 +305,6 @@ enum ixgbe_ring_state_t { struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device *netdev; - struct ixgbe_adapter *real_adapter; unsigned int tx_base_queue; unsigned int rx_base_queue; int pool; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 51e7d82a5860..b6e5cea84949 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1768,15 +1768,14 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) ixgbe_ipsec_rx(rx_ring, rx_desc, skb); - skb->protocol = eth_type_trans(skb, dev); - /* record Rx queue, or update MACVLAN statistics */ if (netif_is_ixgbe(dev)) skb_record_rx_queue(skb, rx_ring->queue_index); else macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, - (skb->pkt_type == PACKET_BROADCAST) || - (skb->pkt_type == PACKET_MULTICAST)); + false); + + skb->protocol = eth_type_trans(skb, dev); } static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, @@ -4219,7 +4218,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 reg_offset, vf_shift; + u16 pool = adapter->num_rx_pools; + u32 reg_offset, vf_shift, vmolr; u32 gcr_ext, vmdctl; int i; @@ -4233,6 +4233,13 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) vmdctl |= IXGBE_VT_CTL_REPLEN; IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); + /* accept untagged packets until a vlan tag is + * specifically set for the VMDQ queue/pool + */ + vmolr = IXGBE_VMOLR_AUPE; + while (pool--) + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr); + vf_shift = VMDQ_P(0) % 32; reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0; @@ -4900,36 +4907,6 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, return -ENOMEM; } -/** - * ixgbe_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure - * @vfn: pool to associate with unicast addresses - * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table - **/ -static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) -{ - struct ixgbe_adapter *adapter = netdev_priv(netdev); - int count = 0; - - /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) - return -ENOMEM; - - if (!netdev_uc_empty(netdev)) { - struct netdev_hw_addr *ha; - netdev_for_each_uc_addr(ha, netdev) { - ixgbe_del_mac_filter(adapter, ha->addr, vfn); - ixgbe_add_mac_filter(adapter, ha->addr, vfn); - count++; - } - } - return count; -} - static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -5309,29 +5286,6 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) spin_unlock(&adapter->fdir_perfect_lock); } -static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool, - struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vmolr; - - /* No unicast promiscuous support for VMDQ devices. */ - vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool)); - vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE); - - /* clear the affected bit */ - vmolr &= ~IXGBE_VMOLR_MPE; - - if (dev->flags & IFF_ALLMULTI) { - vmolr |= IXGBE_VMOLR_MPE; - } else { - vmolr |= IXGBE_VMOLR_ROMPE; - hw->mac.ops.update_mc_addr_list(hw, dev); - } - ixgbe_write_uc_addr_list(adapter->netdev, pool); - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); -} - /** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from @@ -5384,21 +5338,17 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) rx_ring->next_to_use = 0; } -static int ixgbe_fwd_ring_up(struct net_device *vdev, +static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter, struct ixgbe_fwd_adapter *accel) { - struct ixgbe_adapter *adapter = accel->real_adapter; + struct net_device *vdev = accel->netdev; int i, baseq, err; - if (!test_bit(accel->pool, adapter->fwd_bitmask)) - return 0; - baseq = accel->pool * adapter->num_rx_queues_per_pool; netdev_dbg(vdev, "pool %i:%i queues %i:%i\n", accel->pool, adapter->num_rx_pools, baseq, baseq + adapter->num_rx_queues_per_pool); - accel->netdev = vdev; accel->rx_base_queue = baseq; accel->tx_base_queue = baseq; @@ -5415,26 +5365,36 @@ static int ixgbe_fwd_ring_up(struct net_device *vdev, */ err = ixgbe_add_mac_filter(adapter, vdev->dev_addr, VMDQ_P(accel->pool)); - if (err >= 0) { - ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter); + if (err >= 0) return 0; - } + + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); for (i = 0; i < adapter->num_rx_queues_per_pool; i++) adapter->rx_ring[baseq + i]->netdev = NULL; + netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n"); + + clear_bit(accel->pool, adapter->fwd_bitmask); + kfree(accel); + return err; } -static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) +static int ixgbe_macvlan_up(struct net_device *vdev, void *data) { - if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct ixgbe_adapter *adapter = data; + struct ixgbe_fwd_adapter *accel; - if (dfwd->fwd_priv) - ixgbe_fwd_ring_up(upper, vadapter); - } + if (!netif_is_macvlan(vdev)) + return 0; + + accel = macvlan_accel_priv(vdev); + if (!accel) + return 0; + + ixgbe_fwd_ring_up(adapter, accel); return 0; } @@ -5442,7 +5402,7 @@ static int ixgbe_upper_dev_walk(struct net_device *upper, void *data) static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) { netdev_walk_all_upper_dev_rcu(adapter->netdev, - ixgbe_upper_dev_walk, NULL); + ixgbe_macvlan_up, adapter); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -8843,6 +8803,49 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter) } #endif /* CONFIG_IXGBE_DCB */ +static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) +{ + struct ixgbe_adapter *adapter = data; + struct ixgbe_fwd_adapter *accel; + int pool; + + /* we only care about macvlans... */ + if (!netif_is_macvlan(vdev)) + return 0; + + /* that have hardware offload enabled... */ + accel = macvlan_accel_priv(vdev); + if (!accel) + return 0; + + /* If we can relocate to a different bit do so */ + pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); + if (pool < adapter->num_rx_pools) { + set_bit(pool, adapter->fwd_bitmask); + accel->pool = pool; + return 0; + } + + /* if we cannot find a free pool then disable the offload */ + netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n"); + macvlan_release_l2fw_offload(vdev); + kfree(accel); + + return 0; +} + +static void ixgbe_defrag_macvlan_pools(struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + /* flush any stale bits out of the fwd bitmask */ + bitmap_clear(adapter->fwd_bitmask, 1, 63); + + /* walk through upper devices reassigning pools */ + netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool, + adapter); +} + /** * ixgbe_setup_tc - configure net_device for multiple traffic classes * @@ -8910,6 +8913,8 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) #endif /* CONFIG_IXGBE_DCB */ ixgbe_init_interrupt_scheme(adapter); + ixgbe_defrag_macvlan_pools(dev); + if (netif_running(dev)) return ixgbe_open(dev); @@ -9014,13 +9019,12 @@ struct upper_walk_data { static int get_macvlan_queue(struct net_device *upper, void *_data) { if (netif_is_macvlan(upper)) { - struct macvlan_dev *dfwd = netdev_priv(upper); - struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper); struct upper_walk_data *data = _data; struct ixgbe_adapter *adapter = data->adapter; int ifindex = data->ifindex; - if (vadapter && vadapter->netdev->ifindex == ifindex) { + if (vadapter && upper->ifindex == ifindex) { data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; data->action = data->queue; return 1; @@ -9460,6 +9464,22 @@ static netdev_features_t ixgbe_fix_features(struct net_device *netdev, return features; } +static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter) +{ + int rss = min_t(int, ixgbe_max_rss_indices(adapter), + num_online_cpus()); + + /* go back to full RSS if we're not running SR-IOV */ + if (!adapter->ring_feature[RING_F_VMDQ].offset) + adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED); + + adapter->ring_feature[RING_F_RSS].limit = rss; + adapter->ring_feature[RING_F_VMDQ].limit = 1; + + ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs); +} + static int ixgbe_set_features(struct net_device *netdev, netdev_features_t features) { @@ -9540,7 +9560,9 @@ static int ixgbe_set_features(struct net_device *netdev, } } - if (need_reset) + if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1) + ixgbe_reset_l2fw_offload(adapter); + else if (need_reset) ixgbe_do_reset(netdev); else if (changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER)) @@ -9803,71 +9825,98 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) { - struct ixgbe_fwd_adapter *fwd_adapter = NULL; struct ixgbe_adapter *adapter = netdev_priv(pdev); - int used_pools = adapter->num_vfs + adapter->num_rx_pools; + struct ixgbe_fwd_adapter *accel; int tcs = adapter->hw_tcs ? : 1; - unsigned int limit; int pool, err; - /* Hardware has a limited number of available pools. Each VF, and the - * PF require a pool. Check to ensure we don't attempt to use more - * then the available number of pools. + /* The hardware supported by ixgbe only filters on the destination MAC + * address. In order to avoid issues we only support offloading modes + * where the hardware can actually provide the functionality. */ - if (used_pools >= IXGBE_MAX_VF_FUNCTIONS) - return ERR_PTR(-EINVAL); + if (!macvlan_supports_dest_filter(vdev)) + return ERR_PTR(-EMEDIUMTYPE); - if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && - adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) || - (adapter->num_rx_pools > IXGBE_MAX_MACVLANS)) - return ERR_PTR(-EBUSY); + pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); + if (pool == adapter->num_rx_pools) { + u16 used_pools = adapter->num_vfs + adapter->num_rx_pools; + u16 reserved_pools; + + if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && + adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) || + adapter->num_rx_pools > IXGBE_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* Hardware has a limited number of available pools. Each VF, + * and the PF require a pool. Check to ensure we don't + * attempt to use more then the available number of pools. + */ + if (used_pools >= IXGBE_MAX_VF_FUNCTIONS) + return ERR_PTR(-EBUSY); - fwd_adapter = kzalloc(sizeof(*fwd_adapter), GFP_KERNEL); - if (!fwd_adapter) - return ERR_PTR(-ENOMEM); + /* Enable VMDq flag so device will be set in VM mode */ + adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED; - pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); - set_bit(pool, adapter->fwd_bitmask); - limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools + 1); + /* Try to reserve as many queues per pool as possible, + * we start with the configurations that support 4 queues + * per pools, followed by 2, and then by just 1 per pool. + */ + if (used_pools < 32 && adapter->num_rx_pools < 16) + reserved_pools = min_t(u16, + 32 - used_pools, + 16 - adapter->num_rx_pools); + else if (adapter->num_rx_pools < 32) + reserved_pools = min_t(u16, + 64 - used_pools, + 32 - adapter->num_rx_pools); + else + reserved_pools = 64 - used_pools; - /* Enable VMDq flag so device will be set in VM mode */ - adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED; - adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; - fwd_adapter->pool = pool; - fwd_adapter->real_adapter = adapter; + if (!reserved_pools) + return ERR_PTR(-EBUSY); - /* Force reinit of ring allocation with VMDQ enabled */ - err = ixgbe_setup_tc(pdev, adapter->hw_tcs); + adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools; - if (!err && netif_running(pdev)) - err = ixgbe_fwd_ring_up(vdev, fwd_adapter); + /* Force reinit of ring allocation with VMDQ enabled */ + err = ixgbe_setup_tc(pdev, adapter->hw_tcs); + if (err) + return ERR_PTR(err); - if (!err) - return fwd_adapter; + if (pool >= adapter->num_rx_pools) + return ERR_PTR(-ENOMEM); + } + + accel = kzalloc(sizeof(*accel), GFP_KERNEL); + if (!accel) + return ERR_PTR(-ENOMEM); - /* unwind counter and free adapter struct */ - netdev_info(pdev, - "%s: dfwd hardware acceleration failed\n", vdev->name); - clear_bit(pool, adapter->fwd_bitmask); - kfree(fwd_adapter); - return ERR_PTR(err); + set_bit(pool, adapter->fwd_bitmask); + accel->pool = pool; + accel->netdev = vdev; + + if (!netif_running(pdev)) + return accel; + + err = ixgbe_fwd_ring_up(adapter, accel); + if (err) + return ERR_PTR(err); + + return accel; } static void ixgbe_fwd_del(struct net_device *pdev, void *priv) { struct ixgbe_fwd_adapter *accel = priv; - struct ixgbe_adapter *adapter = accel->real_adapter; + struct ixgbe_adapter *adapter = netdev_priv(pdev); unsigned int rxbase = accel->rx_base_queue; - unsigned int limit, i; + unsigned int i; /* delete unicast filter associated with offloaded interface */ ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr, VMDQ_P(accel->pool)); - /* disable ability to receive packets for this pool */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(accel->pool), 0); - /* Allow remaining Rx packets to get flushed out of the * Rx FIFO before we drop the netdev for the ring. */ @@ -9886,25 +9935,6 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) } clear_bit(accel->pool, adapter->fwd_bitmask); - limit = find_last_bit(adapter->fwd_bitmask, adapter->num_rx_pools); - adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; - - /* go back to full RSS if we're done with our VMQs */ - if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { - int rss = min_t(int, ixgbe_max_rss_indices(adapter), - num_online_cpus()); - - adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; - adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; - adapter->ring_feature[RING_F_RSS].limit = rss; - } - - ixgbe_setup_tc(pdev, adapter->hw_tcs); - netdev_dbg(pdev, "pool %i:%i queues %i:%i\n", - accel->pool, adapter->num_rx_pools, - accel->rx_base_queue, - accel->rx_base_queue + - adapter->num_rx_queues_per_pool); kfree(accel); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 008aa073a679..bfc4171cd3f9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -266,7 +266,7 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) #endif /* Disable VMDq flag so device will be set in VM mode */ - if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { + if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) { adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; rss = min_t(int, ixgbe_max_rss_indices(adapter), @@ -312,7 +312,8 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) * other values out of range. */ num_tc = adapter->hw_tcs; - num_rx_pools = adapter->num_rx_pools; + num_rx_pools = bitmap_weight(adapter->fwd_bitmask, + adapter->num_rx_pools); limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC : (num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3d9033f26eff..e3d04f226d57 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3420,7 +3420,7 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j); - break; + goto err_setup_tx; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3317a4da87cb..56c748ca2a09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -241,6 +241,7 @@ struct mlx5e_params { bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; + bool tx_dim_enabled; u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; @@ -330,6 +331,7 @@ enum { MLX5E_SQ_STATE_ENABLED, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, + MLX5E_SQ_STATE_AM, }; struct mlx5e_sq_wqe_info { @@ -342,6 +344,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; + struct net_dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -1111,4 +1114,5 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, u16 max_channels, u16 mtu); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); +void mlx5e_tx_dim_work(struct work_struct *work); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 602851ab5b14..d67adf70a97b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -33,16 +33,30 @@ #include <linux/net_dim.h> #include "en.h" +static void +mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, + struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) +{ + mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); + dim->state = NET_DIM_START_MEASURE; +} + void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, - work); + struct net_dim *dim = container_of(work, struct net_dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_profile = net_dim_get_profile(dim->mode, - dim->profile_ix); + struct net_dim_cq_moder cur_moder = + net_dim_get_rx_moderation(dim->mode, dim->profile_ix); - mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, - cur_profile.usec, cur_profile.pkts); + mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); +} - dim->state = NET_DIM_START_MEASURE; +void mlx5e_tx_dim_work(struct work_struct *work) +{ + struct net_dim *dim = container_of(work, struct net_dim, work); + struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); + struct net_dim_cq_moder cur_moder = + net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + + mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 37fd0245b6c1..2b786c4d3dab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -389,14 +389,20 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { + struct net_dim_cq_moder *rx_moder, *tx_moder; + if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; - coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; - coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + rx_moder = &priv->channels.params.rx_cq_moderation; + coal->rx_coalesce_usecs = rx_moder->usec; + coal->rx_max_coalesced_frames = rx_moder->pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled; + + tx_moder = &priv->channels.params.tx_cq_moderation; + coal->tx_coalesce_usecs = tx_moder->usec; + coal->tx_max_coalesced_frames = tx_moder->pkts; + coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; return 0; } @@ -438,6 +444,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { + struct net_dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -463,11 +470,15 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); new_channels.params = priv->channels.params; - new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; - new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; - new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; - new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + rx_moder = &new_channels.params.rx_cq_moderation; + rx_moder->usec = coal->rx_coalesce_usecs; + rx_moder->pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + + tx_moder = &new_channels.params.tx_cq_moderation; + tx_moder->usec = coal->tx_coalesce_usecs; + tx_moder->pkts = coal->tx_max_coalesced_frames; + new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -475,7 +486,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, } /* we are opened */ - reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; + reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) || + (!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled); + if (!reset) { mlx5e_set_priv_channels_coalesce(priv, coal); priv->channels.params = new_channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f10037419978..f1fe490ed794 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1025,6 +1025,9 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; + INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); + sq->dim.mode = params->tx_cq_moderation.cq_period_mode; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; return 0; @@ -1188,6 +1191,9 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, if (tx_rate) mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); + if (params->tx_dim_enabled) + sq->state |= BIT(MLX5E_SQ_STATE_AM); + return 0; err_free_txqsq: @@ -4084,18 +4090,48 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - params->tx_cq_moderation.cq_period_mode = cq_period_mode; + struct net_dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} - params->tx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - params->tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; +static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct net_dim_cq_moder moder; + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - params->tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : + NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, params->tx_cq_moderation.cq_period_mode == @@ -4104,28 +4140,12 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { - params->rx_cq_moderation.cq_period_mode = cq_period_mode; - - params->rx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - if (params->rx_dim_enabled) { - switch (cq_period_mode) { - case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - params->rx_cq_moderation = - net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE); - break; - case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: - default: - params->rx_cq_moderation = - net_dim_get_def_profile(NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE); - } + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); } MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, @@ -4189,6 +4209,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode); mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f292bb346985..900661d45c8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) return cpumask_test_cpu(current_cpu, aff); } +static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) +{ + struct net_dim_sample dim_sample; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_AM))) + return; + + net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes, + &dim_sample); + net_dim(&sq->dim, dim_sample); +} + +static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) +{ + struct net_dim_sample dim_sample; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_AM))) + return; + + net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes, + &dim_sample); + net_dim(&rq->dim, dim_sample); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -75,18 +99,13 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - for (i = 0; i < c->num_tc; i++) + for (i = 0; i < c->num_tc; i++) { + mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_cq_arm(&c->sq[i].cq); - - if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) { - struct net_dim_sample dim_sample; - net_dim_sample(c->rq.cq.event_ctr, - c->rq.stats.packets, - c->rq.stats.bytes, - &dim_sample); - net_dim(&c->rq.dim, dim_sample); } + mlx5e_handle_rx_dim(&c->rq); + mlx5e_cq_arm(&c->rq.cq); mlx5e_cq_arm(&c->icosq.cq); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c67e1b54c614..733ff53cc601 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -47,6 +47,7 @@ struct net_device; struct nfp_app; +#define NFP_FL_STATS_CTX_DONT_CARE cpu_to_be32(0xffffffff) #define NFP_FL_STATS_ENTRY_RS BIT(20) #define NFP_FL_STATS_ELEM_RS 4 #define NFP_FL_REPEATED_HASH_MAX BIT(17) @@ -189,9 +190,11 @@ struct nfp_fl_payload { spinlock_t lock; /* lock stats */ struct nfp_fl_stats stats; __be32 nfp_tun_ipv4_addr; + struct net_device *ingress_dev; char *unmasked_data; char *mask_data; char *action_data; + bool ingress_offload; }; struct nfp_fl_stats_frame { @@ -216,12 +219,14 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow); int nfp_compile_flow_metadata(struct nfp_app *app, struct tc_cls_flower_offload *flow, - struct nfp_fl_payload *nfp_flow); + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev); int nfp_modify_flow_metadata(struct nfp_app *app, struct nfp_fl_payload *nfp_flow); struct nfp_fl_payload * -nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev, __be32 host_ctx); struct nfp_fl_payload * nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index db977cf8e933..21668aa435e8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -99,14 +99,18 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) /* Must be called with either RTNL or rcu_read_lock */ struct nfp_fl_payload * -nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie) +nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie, + struct net_device *netdev, __be32 host_ctx) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *flower_entry; hash_for_each_possible_rcu(priv->flow_table, flower_entry, link, tc_flower_cookie) - if (flower_entry->tc_flower_cookie == tc_flower_cookie) + if (flower_entry->tc_flower_cookie == tc_flower_cookie && + (!netdev || flower_entry->ingress_dev == netdev) && + (host_ctx == NFP_FL_STATS_CTX_DONT_CARE || + flower_entry->meta.host_ctx_id == host_ctx)) return flower_entry; return NULL; @@ -121,13 +125,11 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats) flower_cookie = be64_to_cpu(stats->stats_cookie); rcu_read_lock(); - nfp_flow = nfp_flower_search_fl_table(app, flower_cookie); + nfp_flow = nfp_flower_search_fl_table(app, flower_cookie, NULL, + stats->stats_con_id); if (!nfp_flow) goto exit_rcu_unlock; - if (nfp_flow->meta.host_ctx_id != stats->stats_con_id) - goto exit_rcu_unlock; - spin_lock(&nfp_flow->lock); nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count); nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count); @@ -317,7 +319,8 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, int nfp_compile_flow_metadata(struct nfp_app *app, struct tc_cls_flower_offload *flow, - struct nfp_fl_payload *nfp_flow) + struct nfp_fl_payload *nfp_flow, + struct net_device *netdev) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *check_entry; @@ -348,7 +351,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app, nfp_flow->stats.bytes = 0; nfp_flow->stats.used = jiffies; - check_entry = nfp_flower_search_fl_table(app, flow->cookie); + check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev, + NFP_FL_STATS_CTX_DONT_CARE); if (check_entry) { if (nfp_release_stats_entry(app, stats_cxt)) return -EINVAL; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 114d2ab02a38..70ec9d821b91 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -345,7 +345,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } static struct nfp_fl_payload * -nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) +nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer, bool egress) { struct nfp_fl_payload *flow_pay; @@ -371,6 +371,8 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) flow_pay->meta.flags = 0; spin_lock_init(&flow_pay->lock); + flow_pay->ingress_offload = !egress; + return flow_pay; err_free_mask: @@ -402,8 +404,20 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload *flow_pay; struct nfp_fl_key_ls *key_layer; + struct net_device *ingr_dev; int err; + ingr_dev = egress ? NULL : netdev; + flow_pay = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); + if (flow_pay) { + /* Ignore as duplicate if it has been added by different cb. */ + if (flow_pay->ingress_offload && egress) + return 0; + else + return -EOPNOTSUPP; + } + key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL); if (!key_layer) return -ENOMEM; @@ -413,12 +427,14 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_free_key_ls; - flow_pay = nfp_flower_allocate_new(key_layer); + flow_pay = nfp_flower_allocate_new(key_layer, egress); if (!flow_pay) { err = -ENOMEM; goto err_free_key_ls; } + flow_pay->ingress_dev = egress ? NULL : netdev; + err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay, tun_type); if (err) @@ -428,7 +444,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_destroy_flow; - err = nfp_compile_flow_metadata(app, flow, flow_pay); + err = nfp_compile_flow_metadata(app, flow, flow_pay, + flow_pay->ingress_dev); if (err) goto err_destroy_flow; @@ -462,6 +479,7 @@ err_free_key_ls: * @app: Pointer to the APP handle * @netdev: netdev structure. * @flow: TC flower classifier offload structure + * @egress: Netdev is the egress dev. * * Removes a flow from the repeated hash structure and clears the * action payload. @@ -470,15 +488,18 @@ err_free_key_ls: */ static int nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flow) + struct tc_cls_flower_offload *flow, bool egress) { struct nfp_port *port = nfp_port_from_netdev(netdev); struct nfp_fl_payload *nfp_flow; + struct net_device *ingr_dev; int err; - nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + ingr_dev = egress ? NULL : netdev; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); if (!nfp_flow) - return -ENOENT; + return egress ? 0 : -ENOENT; err = nfp_modify_flow_metadata(app, nfp_flow); if (err) @@ -505,7 +526,9 @@ err_free_flow: /** * nfp_flower_get_stats() - Populates flow stats obtained from hardware. * @app: Pointer to the APP handle + * @netdev: Netdev structure. * @flow: TC flower classifier offload structure + * @egress: Netdev is the egress dev. * * Populates a flow statistics structure which which corresponds to a * specific flow. @@ -513,14 +536,21 @@ err_free_flow: * Return: negative value on error, 0 if stats populated successfully. */ static int -nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow) +nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, + struct tc_cls_flower_offload *flow, bool egress) { struct nfp_fl_payload *nfp_flow; + struct net_device *ingr_dev; - nfp_flow = nfp_flower_search_fl_table(app, flow->cookie); + ingr_dev = egress ? NULL : netdev; + nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, ingr_dev, + NFP_FL_STATS_CTX_DONT_CARE); if (!nfp_flow) return -EINVAL; + if (nfp_flow->ingress_offload && egress) + return 0; + spin_lock_bh(&nfp_flow->lock); tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes, nfp_flow->stats.pkts, nfp_flow->stats.used); @@ -543,9 +573,9 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, case TC_CLSFLOWER_REPLACE: return nfp_flower_add_offload(app, netdev, flower, egress); case TC_CLSFLOWER_DESTROY: - return nfp_flower_del_offload(app, netdev, flower); + return nfp_flower_del_offload(app, netdev, flower, egress); case TC_CLSFLOWER_STATS: - return nfp_flower_get_stats(app, flower); + return nfp_flower_get_stats(app, netdev, flower, egress); } return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index c4b1f344b4da..0ade122805ad 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -486,6 +486,10 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_disable_msix; } + err = nfp_resource_table_init(pf->cpp); + if (err) + goto err_cpp_free; + pf->hwinfo = nfp_hwinfo_read(pf->cpp); dev_info(&pdev->dev, "Assembly: %s%s%s-%s CPLD: %s\n", @@ -548,6 +552,7 @@ err_fw_unload: vfree(pf->dumpspec); err_hwinfo_free: kfree(pf->hwinfo); +err_cpp_free: nfp_cpp_free(pf->cpp); err_disable_msix: destroy_workqueue(pf->wq); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h index ced62d112aa2..f44d0a857314 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h @@ -94,6 +94,8 @@ int nfp_nsp_read_sensors(struct nfp_nsp *state, unsigned int sensor_mask, /* MAC Statistics Accumulator */ #define NFP_RESOURCE_MAC_STATISTICS "mac.stat" +int nfp_resource_table_init(struct nfp_cpp *cpp); + struct nfp_resource * nfp_resource_acquire(struct nfp_cpp *cpp, const char *name); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index cd678323bacb..a0e336bd1d85 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -1330,6 +1330,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev) /* Finished with card initialization. */ dev_info(&pdev->dev, "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n"); + pcie_print_link_status(pdev); nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); if (!nfp) { diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index c8f2c064cce3..4e19add1c539 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -295,6 +295,8 @@ void nfp_cpp_mutex_free(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_lock(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_unlock(struct nfp_cpp_mutex *mutex); int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex); +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address); /** * nfp_cppcore_pcie_unit() - Get PCI Unit of a CPP handle diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c index cb28ac03e4ca..c88bf673cb76 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_mutex.c @@ -59,6 +59,11 @@ static u32 nfp_mutex_unlocked(u16 interface) return (u32)interface << 16 | 0x0000; } +static u32 nfp_mutex_owner(u32 val) +{ + return val >> 16; +} + static bool nfp_mutex_is_locked(u32 val) { return (val & 0xffff) == 0x000f; @@ -351,3 +356,43 @@ int nfp_cpp_mutex_trylock(struct nfp_cpp_mutex *mutex) return nfp_mutex_is_locked(tmp) ? -EBUSY : -EINVAL; } + +/** + * nfp_cpp_mutex_reclaim() - Unlock mutex if held by local endpoint + * @cpp: NFP CPP handle + * @target: NFP CPP target ID (ie NFP_CPP_TARGET_CLS or NFP_CPP_TARGET_MU) + * @address: Offset into the address space of the NFP CPP target ID + * + * Release lock if held by local system. Extreme care is advised, call only + * when no local lock users can exist. + * + * Return: 0 if the lock was OK, 1 if locked by us, -errno on invalid mutex + */ +int nfp_cpp_mutex_reclaim(struct nfp_cpp *cpp, int target, + unsigned long long address) +{ + const u32 mur = NFP_CPP_ID(target, 3, 0); /* atomic_read */ + const u32 muw = NFP_CPP_ID(target, 4, 0); /* atomic_write */ + u16 interface = nfp_cpp_interface(cpp); + int err; + u32 tmp; + + err = nfp_cpp_mutex_validate(interface, &target, address); + if (err) + return err; + + /* Check lock */ + err = nfp_cpp_readl(cpp, mur, address, &tmp); + if (err < 0) + return err; + + if (nfp_mutex_is_unlocked(tmp) || nfp_mutex_owner(tmp) != interface) + return 0; + + /* Bust the lock */ + err = nfp_cpp_writel(cpp, muw, address, nfp_mutex_unlocked(interface)); + if (err < 0) + return err; + + return 1; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c index 7e14725055c7..2dd89dba9311 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c @@ -338,3 +338,62 @@ u64 nfp_resource_size(struct nfp_resource *res) { return res->size; } + +/** + * nfp_resource_table_init() - Run initial checks on the resource table + * @cpp: NFP CPP handle + * + * Start-of-day init procedure for resource table. Must be called before + * any local resource table users may exist. + * + * Return: 0 on success, -errno on failure + */ +int nfp_resource_table_init(struct nfp_cpp *cpp) +{ + struct nfp_cpp_mutex *dev_mutex; + int i, err; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE); + if (err < 0) { + nfp_err(cpp, "Error: failed to reclaim resource table mutex\n"); + return err; + } + if (err) + nfp_warn(cpp, "Warning: busted main resource table mutex\n"); + + dev_mutex = nfp_cpp_mutex_alloc(cpp, NFP_RESOURCE_TBL_TARGET, + NFP_RESOURCE_TBL_BASE, + NFP_RESOURCE_TBL_KEY); + if (!dev_mutex) + return -ENOMEM; + + if (nfp_cpp_mutex_lock(dev_mutex)) { + nfp_err(cpp, "Error: failed to claim resource table mutex\n"); + nfp_cpp_mutex_free(dev_mutex); + return -EINVAL; + } + + /* Resource 0 is the dev_mutex, start from 1 */ + for (i = 1; i < NFP_RESOURCE_TBL_ENTRIES; i++) { + u64 addr = NFP_RESOURCE_TBL_BASE + + sizeof(struct nfp_resource_entry) * i; + + err = nfp_cpp_mutex_reclaim(cpp, NFP_RESOURCE_TBL_TARGET, addr); + if (err < 0) { + nfp_err(cpp, + "Error: failed to reclaim resource %d mutex\n", + i); + goto err_unlock; + } + if (err) + nfp_warn(cpp, "Warning: busted resource %d mutex\n", i); + } + + err = 0; +err_unlock: + nfp_cpp_mutex_unlock(dev_mutex); + nfp_cpp_mutex_free(dev_mutex); + + return err; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 4926c5532fba..39124b594a36 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -419,6 +419,7 @@ struct phy_defs { #define NUM_RSS_MEM_TYPES 5 #define NUM_BIG_RAM_TYPES 3 +#define BIG_RAM_NAME_LEN 3 #define NUM_PHY_TBUS_ADDRESSES 2048 #define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2) @@ -3650,8 +3651,8 @@ static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, BIT(big_ram->is_256b_bit_offset[dev_data->chip_id]) ? 256 : 128; - strscpy(type_name, big_ram->instance_name, sizeof(type_name)); - strscpy(mem_name, big_ram->instance_name, sizeof(mem_name)); + strncpy(type_name, big_ram->instance_name, BIG_RAM_NAME_LEN); + strncpy(mem_name, big_ram->instance_name, BIG_RAM_NAME_LEN); /* Dump memory header */ offset += qed_grc_dump_mem_hdr(p_hwfn, @@ -7778,6 +7779,57 @@ int qed_dbg_igu_fifo_size(struct qed_dev *cdev) return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO); } +int qed_dbg_nvm_image_length(struct qed_hwfn *p_hwfn, + enum qed_nvm_images image_id, u32 *length) +{ + struct qed_nvm_image_att image_att; + int rc; + + *length = 0; + rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att); + if (rc) + return rc; + + *length = image_att.length; + + return rc; +} + +int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes, enum qed_nvm_images image_id) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + u32 len_rounded, i; + __be32 val; + int rc; + + *num_dumped_bytes = 0; + rc = qed_dbg_nvm_image_length(p_hwfn, image_id, &len_rounded); + if (rc) + return rc; + + DP_NOTICE(p_hwfn->cdev, + "Collecting a debug feature [\"nvram image %d\"]\n", + image_id); + + len_rounded = roundup(len_rounded, sizeof(u32)); + rc = qed_mcp_get_nvm_image(p_hwfn, image_id, buffer, len_rounded); + if (rc) + return rc; + + /* QED_NVM_IMAGE_NVM_META image is not swapped like other images */ + if (image_id != QED_NVM_IMAGE_NVM_META) + for (i = 0; i < len_rounded; i += 4) { + val = cpu_to_be32(*(u32 *)(buffer + i)); + *(u32 *)(buffer + i) = val; + } + + *num_dumped_bytes = len_rounded; + + return rc; +} + int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) { @@ -7831,6 +7883,9 @@ enum debug_print_features { IGU_FIFO = 6, PHY = 7, FW_ASSERTS = 8, + NVM_CFG1 = 9, + DEFAULT_CFG = 10, + NVM_META = 11, }; static u32 qed_calc_regdump_header(enum debug_print_features feature, @@ -7965,13 +8020,61 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); } + /* nvm cfg1 */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_NVM_CFG1); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(NVM_CFG1, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_NVM_CFG1, "QED_NVM_IMAGE_NVM_CFG1", rc); + } + + /* nvm default */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_DEFAULT_CFG); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(DEFAULT_CFG, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_DEFAULT_CFG, "QED_NVM_IMAGE_DEFAULT_CFG", + rc); + } + + /* nvm meta */ + rc = qed_dbg_nvm_image(cdev, + (u8 *)buffer + offset + REGDUMP_HEADER_SIZE, + &feature_size, QED_NVM_IMAGE_NVM_META); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(NVM_META, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else if (rc != -ENOENT) { + DP_ERR(cdev, + "qed_dbg_nvm_image failed for image %d (%s), rc = %d\n", + QED_NVM_IMAGE_NVM_META, "QED_NVM_IMAGE_NVM_META", rc); + } + return 0; } int qed_dbg_all_data_size(struct qed_dev *cdev) { + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + u32 regs_len = 0, image_len = 0; u8 cur_engine, org_engine; - u32 regs_len = 0; org_engine = qed_get_debug_engine(cdev); for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { @@ -7993,6 +8096,15 @@ int qed_dbg_all_data_size(struct qed_dev *cdev) /* Engine common */ regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev); + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_CFG1, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_DEFAULT_CFG, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; + qed_dbg_nvm_image_length(p_hwfn, QED_NVM_IMAGE_NVM_META, &image_len); + if (image_len) + regs_len += REGDUMP_HEADER_SIZE + image_len; return regs_len; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 9854aa9139af..d1d3787affe8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1894,15 +1894,8 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type, u8 *buf, u16 len) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); - struct qed_ptt *ptt = qed_ptt_acquire(hwfn); - int rc; - - if (!ptt) - return -EAGAIN; - rc = qed_mcp_get_nvm_image(hwfn, ptt, type, buf, len); - qed_ptt_release(hwfn, ptt); - return rc; + return qed_mcp_get_nvm_image(hwfn, type, buf, len); } static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index ec0d425766a7..0550f0ee11b3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -2529,9 +2529,8 @@ err0: return rc; } -static int +int qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, struct qed_nvm_image_att *p_image_att) { @@ -2546,6 +2545,15 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, case QED_NVM_IMAGE_FCOE_CFG: type = NVM_TYPE_FCOE_CFG; break; + case QED_NVM_IMAGE_NVM_CFG1: + type = NVM_TYPE_NVM_CFG1; + break; + case QED_NVM_IMAGE_DEFAULT_CFG: + type = NVM_TYPE_DEFAULT_CFG; + break; + case QED_NVM_IMAGE_NVM_META: + type = NVM_TYPE_META; + break; default: DP_NOTICE(p_hwfn, "Unknown request of image_id %08x\n", image_id); @@ -2569,7 +2577,6 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, } int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, u8 *p_buffer, u32 buffer_len) { @@ -2578,7 +2585,7 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, memset(p_buffer, 0, buffer_len); - rc = qed_mcp_get_nvm_image_att(p_hwfn, p_ptt, image_id, &image_att); + rc = qed_mcp_get_nvm_image_att(p_hwfn, image_id, &image_att); if (rc) return rc; @@ -2590,9 +2597,6 @@ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, return -EINVAL; } - /* Each NVM image is suffixed by CRC; Upper-layer has no need for it */ - image_att.length -= 4; - if (image_att.length > buffer_len) { DP_VERBOSE(p_hwfn, QED_MSG_STORAGE, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 8a5c988d0c3c..3af3896420b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -486,7 +486,20 @@ struct qed_nvm_image_att { * @brief Allows reading a whole nvram image * * @param p_hwfn - * @param p_ptt + * @param image_id - image to get attributes for + * @param p_image_att - image attributes structure into which to fill data + * + * @return int - 0 - operation was successful. + */ +int +qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, + enum qed_nvm_images image_id, + struct qed_nvm_image_att *p_image_att); + +/** + * @brief Allows reading a whole nvram image + * + * @param p_hwfn * @param image_id - image requested for reading * @param p_buffer - allocated buffer into which to fill data * @param buffer_len - length of the allocated buffer. @@ -494,7 +507,6 @@ struct qed_nvm_image_att { * @return 0 iff p_buffer now contains the nvram image. */ int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, enum qed_nvm_images image_id, u8 *p_buffer, u32 buffer_len); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index fcd42d0387b9..a5d00ee94245 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2483,7 +2483,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { }; static void rtl8169_get_mac_version(struct rtl8169_private *tp, - struct net_device *dev, u8 default_version) + u8 default_version) { /* * The driver currently handles the 8168Bf and the 8168Be identically @@ -2588,8 +2588,8 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, tp->mac_version = p->mac_version; if (tp->mac_version == RTL_GIGA_MAC_NONE) { - netif_notice(tp, probe, dev, - "unknown MAC, using family default\n"); + dev_notice(tp_to_dev(tp), + "unknown MAC, using family default\n"); tp->mac_version = default_version; } else if (tp->mac_version == RTL_GIGA_MAC_VER_42) { tp->mac_version = tp->mii.supports_gmii ? @@ -8107,40 +8107,39 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); if (rc < 0) { - netif_err(tp, probe, dev, "enable failure\n"); + dev_err(&pdev->dev, "enable failure\n"); return rc; } if (pcim_set_mwi(pdev) < 0) - netif_info(tp, probe, dev, "Mem-Wr-Inval unavailable\n"); + dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n"); /* use first MMIO region */ region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1; if (region < 0) { - netif_err(tp, probe, dev, "no MMIO resource found\n"); + dev_err(&pdev->dev, "no MMIO resource found\n"); return -ENODEV; } /* check for weird/broken PCI region reporting */ if (pci_resource_len(pdev, region) < R8169_REGS_SIZE) { - netif_err(tp, probe, dev, - "Invalid PCI region size(s), aborting\n"); + dev_err(&pdev->dev, "Invalid PCI region size(s), aborting\n"); return -ENODEV; } rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME); if (rc < 0) { - netif_err(tp, probe, dev, "cannot remap MMIO, aborting\n"); + dev_err(&pdev->dev, "cannot remap MMIO, aborting\n"); return rc; } tp->mmio_addr = pcim_iomap_table(pdev)[region]; if (!pci_is_pcie(pdev)) - netif_info(tp, probe, dev, "not PCI Express\n"); + dev_info(&pdev->dev, "not PCI Express\n"); /* Identify chip attached to board */ - rtl8169_get_mac_version(tp, dev, cfg->default_ver); + rtl8169_get_mac_version(tp, cfg->default_ver); tp->cp_cmd = 0; @@ -8157,7 +8156,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } else { rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (rc < 0) { - netif_err(tp, probe, dev, "DMA configuration failed\n"); + dev_err(&pdev->dev, "DMA configuration failed\n"); return rc; } } @@ -8185,7 +8184,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc = rtl_alloc_irq(tp); if (rc < 0) { - netif_err(tp, probe, dev, "Can't allocate interrupt\n"); + dev_err(&pdev->dev, "Can't allocate interrupt\n"); return rc; } diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 83ce229f4eb7..63036d9bf3e6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -3999,29 +3999,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx) atomic_set(&efx->active_queues, 0); } -static bool efx_ef10_filter_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right) -{ - if ((left->match_flags ^ right->match_flags) | - ((left->flags ^ right->flags) & - (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) - return false; - - return memcmp(&left->outer_vid, &right->outer_vid, - sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; -} - -static unsigned int efx_ef10_filter_hash(const struct efx_filter_spec *spec) -{ - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, - (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, - 0); - /* XXX should we randomise the initval? */ -} - /* Decide whether a filter should be exclusive or else should allow * delivery to additional recipients. Currently we decide that * filters for specific local unicast MAC and IP addresses are @@ -4346,7 +4323,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, goto out_unlock; match_pri = rc; - hash = efx_ef10_filter_hash(spec); + hash = efx_filter_spec_hash(spec); is_mc_recip = efx_filter_is_mc_recipient(spec); if (is_mc_recip) bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); @@ -4378,7 +4355,7 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, if (!saved_spec) { if (ins_index < 0) ins_index = i; - } else if (efx_ef10_filter_equal(spec, saved_spec)) { + } else if (efx_filter_spec_equal(spec, saved_spec)) { if (spec->priority < saved_spec->priority && spec->priority != EFX_FILTER_PRI_AUTO) { rc = -EPERM; @@ -4762,27 +4739,62 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, unsigned int filter_idx) { + struct efx_filter_spec *spec, saved_spec; struct efx_ef10_filter_table *table; - struct efx_filter_spec *spec; - bool ret; + struct efx_arfs_rule *rule = NULL; + bool ret = true, force = false; + u16 arfs_id; down_read(&efx->filter_sem); table = efx->filter_state; down_write(&table->lock); spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (!spec || spec->priority != EFX_FILTER_PRI_HINT) { - ret = true; + if (!spec || spec->priority != EFX_FILTER_PRI_HINT) goto out_unlock; - } - if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, 0)) { - ret = false; - goto out_unlock; + spin_lock_bh(&efx->rps_hash_lock); + if (!efx->rps_hash_table) { + /* In the absence of the table, we always return 0 to ARFS. */ + arfs_id = 0; + } else { + rule = efx_rps_hash_find(efx, spec); + if (!rule) + /* ARFS table doesn't know of this filter, so remove it */ + goto expire; + arfs_id = rule->arfs_id; + ret = efx_rps_check_rule(rule, filter_idx, &force); + if (force) + goto expire; + if (!ret) { + spin_unlock_bh(&efx->rps_hash_lock); + goto out_unlock; + } } - + if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id)) + ret = false; + else if (rule) + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; +expire: + saved_spec = *spec; /* remove operation will kfree spec */ + spin_unlock_bh(&efx->rps_hash_lock); + /* At this point (since we dropped the lock), another thread might queue + * up a fresh insertion request (but the actual insertion will be held + * up by our possession of the filter table lock). In that case, it + * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that + * the rule is not removed by efx_rps_hash_del() below. + */ ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, filter_idx, true) == 0; + /* While we can't safely dereference rule (we dropped the lock), we can + * still test it for NULL. + */ + if (ret && rule) { + /* Expiring, so remove entry from ARFS table */ + spin_lock_bh(&efx->rps_hash_lock); + efx_rps_hash_del(efx, &saved_spec); + spin_unlock_bh(&efx->rps_hash_lock); + } out_unlock: up_write(&table->lock); up_read(&efx->filter_sem); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index c30b9e26f131..88c1eee677c2 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3059,6 +3059,10 @@ static int efx_init_struct(struct efx_nic *efx, mutex_init(&efx->mac_lock); #ifdef CONFIG_RFS_ACCEL mutex_init(&efx->rps_mutex); + spin_lock_init(&efx->rps_hash_lock); + /* Failure to allocate is not fatal, but may degrade ARFS performance */ + efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, + sizeof(*efx->rps_hash_table), GFP_KERNEL); #endif efx->phy_op = &efx_dummy_phy_operations; efx->mdio.dev = net_dev; @@ -3102,6 +3106,10 @@ static void efx_fini_struct(struct efx_nic *efx) { int i; +#ifdef CONFIG_RFS_ACCEL + kfree(efx->rps_hash_table); +#endif + for (i = 0; i < EFX_MAX_CHANNELS; i++) kfree(efx->channel[i]); @@ -3124,6 +3132,141 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); } +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); +} + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force) +{ + if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { + /* ARFS is currently updating this entry, leave it */ + return false; + } + if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { + /* ARFS tried and failed to update this, so it's probably out + * of date. Remove the filter and the ARFS rule entry. + */ + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + *force = true; + return true; + } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ + /* ARFS has moved on, so old filter is not needed. Since we did + * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will + * not be removed by efx_rps_hash_del() subsequently. + */ + *force = true; + return true; + } + /* Remove it iff ARFS wants to. */ + return true; +} + +struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + u32 hash = efx_filter_spec_hash(spec); + + WARN_ON(!spin_is_locked(&efx->rps_hash_lock)); + if (!efx->rps_hash_table) + return NULL; + return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; +} + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) + return rule; + } + return NULL; +} + +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + *new = false; + return rule; + } + } + rule = kmalloc(sizeof(*rule), GFP_ATOMIC); + *new = true; + if (rule) { + memcpy(&rule->spec, spec, sizeof(rule->spec)); + hlist_add_head(&rule->node, head); + } + return rule; +} + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (WARN_ON(!head)) + return; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + /* Someone already reused the entry. We know that if + * this check doesn't fire (i.e. filter_id == REMOVING) + * then the REMOVING mark was put there by our caller, + * because caller is holding a lock on filter table and + * only holders of that lock set REMOVING. + */ + if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) + return; + hlist_del(node); + kfree(rule); + return; + } + } + /* We didn't find it. */ + WARN_ON(1); +} +#endif + /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because * (a) this is an infrequent control-plane operation and (b) n is small (max 64) */ diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index a3140e16fcef..3f759ebdcf10 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -186,6 +186,27 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) {} #endif bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right); +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force); + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec); + +/* @new is written to indicate if entry was newly added (true) or if an old + * entry was found and returned (false). + */ +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new); + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); +#endif + /* RSS contexts */ struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 7174ef5e5c5e..c72adf8b52ea 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -2905,18 +2905,45 @@ bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, { struct efx_farch_filter_state *state = efx->filter_state; struct efx_farch_filter_table *table; - bool ret = false; + bool ret = false, force = false; + u16 arfs_id; down_write(&state->lock); + spin_lock_bh(&efx->rps_hash_lock); table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; if (test_bit(index, table->used_bitmap) && - table->spec[index].priority == EFX_FILTER_PRI_HINT && - rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id, - flow_id, 0)) { - efx_farch_filter_table_clear_entry(efx, table, index); - ret = true; + table->spec[index].priority == EFX_FILTER_PRI_HINT) { + struct efx_arfs_rule *rule = NULL; + struct efx_filter_spec spec; + + efx_farch_filter_to_gen_spec(&spec, &table->spec[index]); + if (!efx->rps_hash_table) { + /* In the absence of the table, we always returned 0 to + * ARFS, so use the same to query it. + */ + arfs_id = 0; + } else { + rule = efx_rps_hash_find(efx, &spec); + if (!rule) { + /* ARFS table doesn't know of this filter, remove it */ + force = true; + } else { + arfs_id = rule->arfs_id; + if (!efx_rps_check_rule(rule, index, &force)) + goto out_unlock; + } + } + if (force || rps_may_expire_flow(efx->net_dev, spec.dmaq_id, + flow_id, arfs_id)) { + if (rule) + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + efx_rps_hash_del(efx, &spec); + efx_farch_filter_table_clear_entry(efx, table, index); + ret = true; + } } - +out_unlock: + spin_unlock_bh(&efx->rps_hash_lock); up_write(&state->lock); return ret; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index eea3808b3f25..65568925c3ef 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -734,6 +734,35 @@ struct efx_rss_context { }; #ifdef CONFIG_RFS_ACCEL +/* Order of these is important, since filter_id >= %EFX_ARFS_FILTER_ID_PENDING + * is used to test if filter does or will exist. + */ +#define EFX_ARFS_FILTER_ID_PENDING -1 +#define EFX_ARFS_FILTER_ID_ERROR -2 +#define EFX_ARFS_FILTER_ID_REMOVING -3 +/** + * struct efx_arfs_rule - record of an ARFS filter and its IDs + * @node: linkage into hash table + * @spec: details of the filter (used as key for hash table). Use efx->type to + * determine which member to use. + * @rxq_index: channel to which the filter will steer traffic. + * @arfs_id: filter ID which was returned to ARFS + * @filter_id: index in software filter table. May be + * %EFX_ARFS_FILTER_ID_PENDING if filter was not inserted yet, + * %EFX_ARFS_FILTER_ID_ERROR if filter insertion failed, or + * %EFX_ARFS_FILTER_ID_REMOVING if expiry is currently removing the filter. + */ +struct efx_arfs_rule { + struct hlist_node node; + struct efx_filter_spec spec; + u16 rxq_index; + u16 arfs_id; + s32 filter_id; +}; + +/* Size chosen so that the table is one page (4kB) */ +#define EFX_ARFS_HASH_TABLE_SIZE 512 + /** * struct efx_async_filter_insertion - Request to asynchronously insert a filter * @net_dev: Reference to the netdevice @@ -873,6 +902,10 @@ struct efx_async_filter_insertion { * @rps_expire_channel's @rps_flow_id * @rps_slot_map: bitmap of in-flight entries in @rps_slot * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() + * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and + * @rps_next_id). + * @rps_hash_table: Mapping between ARFS filters and their various IDs + * @rps_next_id: next arfs_id for an ARFS filter * @active_queues: Count of RX and TX queues that haven't been flushed and drained. * @rxq_flush_pending: Count of number of receive queues that need to be flushed. * Decremented when the efx_flush_rx_queue() is called. @@ -1029,6 +1062,9 @@ struct efx_nic { unsigned int rps_expire_index; unsigned long rps_slot_map; struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; + spinlock_t rps_hash_lock; + struct hlist_head *rps_hash_table; + u32 rps_next_id; #endif atomic_t active_queues; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 9c593c661cbf..64a94f242027 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -834,9 +834,29 @@ static void efx_filter_rfs_work(struct work_struct *data) struct efx_nic *efx = netdev_priv(req->net_dev); struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); int slot_idx = req - efx->rps_slot; + struct efx_arfs_rule *rule; + u16 arfs_id = 0; int rc; rc = efx->type->filter_insert(efx, &req->spec, true); + if (efx->rps_hash_table) { + spin_lock_bh(&efx->rps_hash_lock); + rule = efx_rps_hash_find(efx, &req->spec); + /* The rule might have already gone, if someone else's request + * for the same spec was already worked and then expired before + * we got around to our work. In that case we have nothing + * tying us to an arfs_id, meaning that as soon as the filter + * is considered for expiry it will be removed. + */ + if (rule) { + if (rc < 0) + rule->filter_id = EFX_ARFS_FILTER_ID_ERROR; + else + rule->filter_id = rc; + arfs_id = rule->arfs_id; + } + spin_unlock_bh(&efx->rps_hash_lock); + } if (rc >= 0) { /* Remember this so we can check whether to expire the filter * later. @@ -848,18 +868,18 @@ static void efx_filter_rfs_work(struct work_struct *data) if (req->spec.ether_type == htons(ETH_P_IP)) netif_info(efx, rx_status, efx->net_dev, - "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", + "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n", (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), - req->rxq_index, req->flow_id, rc); + req->rxq_index, req->flow_id, rc, arfs_id); else netif_info(efx, rx_status, efx->net_dev, - "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", + "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n", (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), - req->rxq_index, req->flow_id, rc); + req->rxq_index, req->flow_id, rc, arfs_id); } /* Release references */ @@ -872,8 +892,10 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_async_filter_insertion *req; + struct efx_arfs_rule *rule; struct flow_keys fk; int slot_idx; + bool new; int rc; /* find a free slot */ @@ -926,12 +948,42 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, req->spec.rem_port = fk.ports.src; req->spec.loc_port = fk.ports.dst; + if (efx->rps_hash_table) { + /* Add it to ARFS hash table */ + spin_lock(&efx->rps_hash_lock); + rule = efx_rps_hash_add(efx, &req->spec, &new); + if (!rule) { + rc = -ENOMEM; + goto out_unlock; + } + if (new) + rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER; + rc = rule->arfs_id; + /* Skip if existing or pending filter already does the right thing */ + if (!new && rule->rxq_index == rxq_index && + rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING) + goto out_unlock; + rule->rxq_index = rxq_index; + rule->filter_id = EFX_ARFS_FILTER_ID_PENDING; + spin_unlock(&efx->rps_hash_lock); + } else { + /* Without an ARFS hash table, we just use arfs_id 0 for all + * filters. This means if multiple flows hash to the same + * flow_id, all but the most recently touched will be eligible + * for expiry. + */ + rc = 0; + } + + /* Queue the request */ dev_hold(req->net_dev = net_dev); INIT_WORK(&req->work, efx_filter_rfs_work); req->rxq_index = rxq_index; req->flow_id = flow_id; schedule_work(&req->work); - return 0; + return rc; +out_unlock: + spin_unlock(&efx->rps_hash_lock); out_clear: clear_bit(slot_idx, &efx->rps_slot_map); return rc; diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 972e4ef6d414..e3b578b4f7cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -4,7 +4,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ chain_mode.o dwmac_lib.o dwmac1000_core.o dwmac1000_dma.o \ dwmac100_core.o dwmac100_dma.o enh_desc.o norm_desc.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ - dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o $(stmmac-y) + dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ + $(stmmac-y) # Ordering matters. Generic driver must be last. obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 59673c6d2e52..627e905b6d76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -39,6 +39,7 @@ #define DWMAC_CORE_3_40 0x34 #define DWMAC_CORE_3_50 0x35 #define DWMAC_CORE_4_00 0x40 +#define DWMAC_CORE_4_10 0x41 #define DWMAC_CORE_5_00 0x50 #define DWMAC_CORE_5_10 0x51 #define STMMAC_CHAN0 0 /* Always supported and default for all chips */ @@ -428,12 +429,9 @@ struct stmmac_rx_routing { u32 reg_shift; }; -struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, - int *synopsys_id); -struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id); -struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, int *synopsys_id); +int dwmac100_setup(struct stmmac_priv *priv); +int dwmac1000_setup(struct stmmac_priv *priv); +int dwmac4_setup(struct stmmac_priv *priv); void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6], unsigned int high, unsigned int low); @@ -453,24 +451,4 @@ extern const struct stmmac_mode_ops ring_mode_ops; extern const struct stmmac_mode_ops chain_mode_ops; extern const struct stmmac_desc_ops dwmac4_desc_ops; -/** - * stmmac_get_synopsys_id - return the SYINID. - * @priv: driver private structure - * Description: this simple function is to decode and return the SYINID - * starting from the HW core register. - */ -static inline u32 stmmac_get_synopsys_id(u32 hwid) -{ - /* Check Synopsys Id (not available on old chips) */ - if (likely(hwid)) { - u32 uid = ((hwid & 0x0000ff00) >> 8); - u32 synid = (hwid & 0x000000ff); - - pr_info("stmmac - user ID: 0x%x, Synopsys ID: 0x%x\n", - uid, synid); - - return synid; - } - return 0; -} #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index c02d36629c52..184ca13c8f79 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -29,7 +29,6 @@ #define GMAC_MII_DATA 0x00000014 /* MII Data */ #define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ #define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ -#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC_DEBUG 0x00000024 /* GMAC debug register */ #define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index ef10baf14186..0877bde6e860 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -27,6 +27,7 @@ #include <linux/ethtool.h> #include <net/dsa.h> #include <asm/io.h> +#include "stmmac.h" #include "stmmac_pcs.h" #include "dwmac1000.h" @@ -498,7 +499,7 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } -static const struct stmmac_ops dwmac1000_ops = { +const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -519,28 +520,21 @@ static const struct stmmac_ops dwmac1000_ops = { .pcs_get_adv_lp = dwmac1000_get_adv_lp, }; -struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, - int *synopsys_id) +int dwmac1000_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; - u32 hwid = readl(ioaddr + GMAC_VERSION); + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; + dev_info(priv->device, "\tDWMAC1000\n"); - mac->pcsr = ioaddr; - mac->multicast_filter_bins = mcbins; - mac->unicast_filter_entries = perfect_uc_entries; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; mac->mcast_bits_log2 = 0; if (mac->multicast_filter_bins) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); - mac->mac = &dwmac1000_ops; - mac->dma = &dwmac1000_dma_ops; - mac->link.duplex = GMAC_CONTROL_DM; mac->link.speed10 = GMAC_CONTROL_PS; mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES; @@ -555,8 +549,5 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); - /* Get and dump the chip ID */ - *synopsys_id = stmmac_get_synopsys_id(hwid); - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 91b23f9db31a..b735143987e1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -27,6 +27,7 @@ #include <linux/crc32.h> #include <net/dsa.h> #include <asm/io.h> +#include "stmmac.h" #include "dwmac100.h" static void dwmac100_core_init(struct mac_device_info *hw, @@ -159,7 +160,7 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode) return; } -static const struct stmmac_ops dwmac100_ops = { +const struct stmmac_ops dwmac100_ops = { .core_init = dwmac100_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac100_rx_ipc_enable, @@ -172,20 +173,13 @@ static const struct stmmac_ops dwmac100_ops = { .get_umac_addr = dwmac100_get_umac_addr, }; -struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id) +int dwmac100_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; - - pr_info("\tDWMAC100\n"); - - mac->pcsr = ioaddr; - mac->mac = &dwmac100_ops; - mac->dma = &dwmac100_dma_ops; + dev_info(priv->device, "\tDWMAC100\n"); + mac->pcsr = priv->ioaddr; mac->link.duplex = MAC_CONTROL_F; mac->link.speed10 = 0; mac->link.speed100 = 0; @@ -200,8 +194,5 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id) mac->mii.clk_csr_shift = 2; mac->mii.clk_csr_mask = GENMASK(5, 2); - /* Synopsys Id is not available on old chips */ - *synopsys_id = 0; - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index dedd40613090..03eab9077c1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -34,7 +34,6 @@ #define GMAC_PCS_BASE 0x000000e0 #define GMAC_PHYIF_CONTROL_STATUS 0x000000f8 #define GMAC_PMT 0x000000c0 -#define GMAC_VERSION 0x00000110 #define GMAC_DEBUG 0x00000114 #define GMAC_HW_FEATURE0 0x0000011c #define GMAC_HW_FEATURE1 0x00000120 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 517b1f6736a8..7289b3b47d8e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -18,6 +18,7 @@ #include <linux/ethtool.h> #include <linux/io.h> #include <net/dsa.h> +#include "stmmac.h" #include "stmmac_pcs.h" #include "dwmac4.h" #include "dwmac5.h" @@ -700,7 +701,7 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } -static const struct stmmac_ops dwmac4_ops = { +const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -731,7 +732,7 @@ static const struct stmmac_ops dwmac4_ops = { .set_filter = dwmac4_set_filter, }; -static const struct stmmac_ops dwmac410_ops = { +const struct stmmac_ops dwmac410_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -762,7 +763,7 @@ static const struct stmmac_ops dwmac410_ops = { .set_filter = dwmac4_set_filter, }; -static const struct stmmac_ops dwmac510_ops = { +const struct stmmac_ops dwmac510_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_dwmac4_set_mac, .rx_ipc = dwmac4_rx_ipc_enable, @@ -796,19 +797,16 @@ static const struct stmmac_ops dwmac510_ops = { .safety_feat_dump = dwmac5_safety_feat_dump, }; -struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, - int perfect_uc_entries, int *synopsys_id) +int dwmac4_setup(struct stmmac_priv *priv) { - struct mac_device_info *mac; - u32 hwid = readl(ioaddr + GMAC_VERSION); + struct mac_device_info *mac = priv->hw; - mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); - if (!mac) - return NULL; + dev_info(priv->device, "\tDWMAC4/5\n"); - mac->pcsr = ioaddr; - mac->multicast_filter_bins = mcbins; - mac->unicast_filter_entries = perfect_uc_entries; + priv->dev->priv_flags |= IFF_UNICAST_FLT; + mac->pcsr = priv->ioaddr; + mac->multicast_filter_bins = priv->plat->multicast_filter_bins; + mac->unicast_filter_entries = priv->plat->unicast_filter_entries; mac->mcast_bits_log2 = 0; if (mac->multicast_filter_bins) @@ -828,20 +826,5 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins, mac->mii.clk_csr_shift = 8; mac->mii.clk_csr_mask = GENMASK(11, 8); - /* Get and dump the chip ID */ - *synopsys_id = stmmac_get_synopsys_id(hwid); - - if (*synopsys_id > DWMAC_CORE_4_00) - mac->dma = &dwmac410_dma_ops; - else - mac->dma = &dwmac4_dma_ops; - - if (*synopsys_id >= DWMAC_CORE_5_10) - mac->mac = &dwmac510_ops; - else if (*synopsys_id >= DWMAC_CORE_4_00) - mac->mac = &dwmac410_ops; - else - mac->mac = &dwmac4_ops; - - return mac; + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c new file mode 100644 index 000000000000..2b0a7e79de00 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates. + * stmmac HW Interface Handling + */ + +#include "common.h" +#include "stmmac.h" + +static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) +{ + u32 reg = readl(priv->ioaddr + id_reg); + + if (!reg) { + dev_info(priv->device, "Version ID not available\n"); + return 0x0; + } + + dev_info(priv->device, "User ID: 0x%x, Synopsys ID: 0x%x\n", + (unsigned int)(reg & GENMASK(15, 8)) >> 8, + (unsigned int)(reg & GENMASK(7, 0))); + return reg & GENMASK(7, 0); +} + +static void stmmac_dwmac_mode_quirk(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + if (priv->chain_mode) { + dev_info(priv->device, "Chain mode enabled\n"); + priv->mode = STMMAC_CHAIN_MODE; + mac->mode = &chain_mode_ops; + } else { + dev_info(priv->device, "Ring mode enabled\n"); + priv->mode = STMMAC_RING_MODE; + mac->mode = &ring_mode_ops; + } +} + +static int stmmac_dwmac1_quirks(struct stmmac_priv *priv) +{ + struct mac_device_info *mac = priv->hw; + + if (priv->plat->enh_desc) { + dev_info(priv->device, "Enhanced/Alternate descriptors\n"); + + /* GMAC older than 3.50 has no extended descriptors */ + if (priv->synopsys_id >= DWMAC_CORE_3_50) { + dev_info(priv->device, "Enabled extended descriptors\n"); + priv->extend_desc = 1; + } else { + dev_warn(priv->device, "Extended descriptors not supported\n"); + } + + mac->desc = &enh_desc_ops; + } else { + dev_info(priv->device, "Normal descriptors\n"); + mac->desc = &ndesc_ops; + } + + stmmac_dwmac_mode_quirk(priv); + return 0; +} + +static int stmmac_dwmac4_quirks(struct stmmac_priv *priv) +{ + stmmac_dwmac_mode_quirk(priv); + return 0; +} + +static const struct stmmac_hwif_entry { + bool gmac; + bool gmac4; + u32 min_id; + const void *desc; + const void *dma; + const void *mac; + const void *hwtimestamp; + const void *mode; + int (*setup)(struct stmmac_priv *priv); + int (*quirks)(struct stmmac_priv *priv); +} stmmac_hw[] = { + /* NOTE: New HW versions shall go to the end of this table */ + { + .gmac = false, + .gmac4 = false, + .min_id = 0, + .desc = NULL, + .dma = &dwmac100_dma_ops, + .mac = &dwmac100_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac100_setup, + .quirks = stmmac_dwmac1_quirks, + }, { + .gmac = true, + .gmac4 = false, + .min_id = 0, + .desc = NULL, + .dma = &dwmac1000_dma_ops, + .mac = &dwmac1000_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac1000_setup, + .quirks = stmmac_dwmac1_quirks, + }, { + .gmac = false, + .gmac4 = true, + .min_id = 0, + .desc = &dwmac4_desc_ops, + .dma = &dwmac4_dma_ops, + .mac = &dwmac4_ops, + .hwtimestamp = &stmmac_ptp, + .mode = NULL, + .setup = dwmac4_setup, + .quirks = stmmac_dwmac4_quirks, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_4_00, + .desc = &dwmac4_desc_ops, + .dma = &dwmac4_dma_ops, + .mac = &dwmac410_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_4_10, + .desc = &dwmac4_desc_ops, + .dma = &dwmac410_dma_ops, + .mac = &dwmac410_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + }, { + .gmac = false, + .gmac4 = true, + .min_id = DWMAC_CORE_5_10, + .desc = &dwmac4_desc_ops, + .dma = &dwmac410_dma_ops, + .mac = &dwmac510_ops, + .hwtimestamp = &stmmac_ptp, + .mode = &dwmac4_ring_mode_ops, + .setup = dwmac4_setup, + .quirks = NULL, + } +}; + +int stmmac_hwif_init(struct stmmac_priv *priv) +{ + bool needs_gmac4 = priv->plat->has_gmac4; + bool needs_gmac = priv->plat->has_gmac; + const struct stmmac_hwif_entry *entry; + struct mac_device_info *mac; + int i, ret; + u32 id; + + if (needs_gmac) { + id = stmmac_get_id(priv, GMAC_VERSION); + } else { + id = stmmac_get_id(priv, GMAC4_VERSION); + } + + /* Save ID for later use */ + priv->synopsys_id = id; + + /* Check for HW specific setup first */ + if (priv->plat->setup) { + priv->hw = priv->plat->setup(priv); + if (!priv->hw) + return -ENOMEM; + return 0; + } + + mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); + if (!mac) + return -ENOMEM; + + /* Fallback to generic HW */ + for (i = ARRAY_SIZE(stmmac_hw) - 1; i >= 0; i--) { + entry = &stmmac_hw[i]; + + if (needs_gmac ^ entry->gmac) + continue; + if (needs_gmac4 ^ entry->gmac4) + continue; + if (id < entry->min_id) + continue; + + mac->desc = entry->desc; + mac->dma = entry->dma; + mac->mac = entry->mac; + mac->ptp = entry->hwtimestamp; + mac->mode = entry->mode; + + priv->hw = mac; + + /* Entry found */ + ret = entry->setup(priv); + if (ret) + return ret; + + /* Run quirks, if needed */ + if (entry->quirks) { + ret = entry->quirks(priv); + if (ret) + return ret; + } + + return 0; + } + + dev_err(priv->device, "Failed to find HW IF (id=0x%x, gmac=%d/%d)\n", + id, needs_gmac, needs_gmac4); + return -EINVAL; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index f81ded4a9946..bfad61607f07 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -418,4 +418,21 @@ struct stmmac_mode_ops { #define stmmac_clean_desc3(__priv, __args...) \ stmmac_do_void_callback(__priv, mode, clean_desc3, __args) +struct stmmac_priv; + +extern const struct stmmac_ops dwmac100_ops; +extern const struct stmmac_dma_ops dwmac100_dma_ops; +extern const struct stmmac_ops dwmac1000_ops; +extern const struct stmmac_dma_ops dwmac1000_dma_ops; +extern const struct stmmac_ops dwmac4_ops; +extern const struct stmmac_dma_ops dwmac4_dma_ops; +extern const struct stmmac_ops dwmac410_ops; +extern const struct stmmac_dma_ops dwmac410_dma_ops; +extern const struct stmmac_ops dwmac510_ops; + +#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ + +int stmmac_hwif_init(struct stmmac_priv *priv); + #endif /* __STMMAC_HWIF_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index da50451f8999..2443f20e07bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -130,6 +130,7 @@ struct stmmac_priv { int eee_active; int tx_lpi_timer; unsigned int mode; + unsigned int chain_mode; int extend_desc; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 48b55407a953..0135fd3aa6ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -769,7 +769,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) netdev_info(priv->dev, "IEEE 1588-2008 Advanced Timestamp supported\n"); - priv->hw->ptp = &stmmac_ptp; priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; @@ -2122,32 +2121,6 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) } /** - * stmmac_selec_desc_mode - to select among: normal/alternate/extend descriptors - * @priv: driver private structure - * Description: select the Enhanced/Alternate or Normal descriptors. - * In case of Enhanced/Alternate, it checks if the extended descriptors are - * supported by the HW capability register. - */ -static void stmmac_selec_desc_mode(struct stmmac_priv *priv) -{ - if (priv->plat->enh_desc) { - dev_info(priv->device, "Enhanced/Alternate descriptors\n"); - - /* GMAC older than 3.50 has no extended descriptors */ - if (priv->synopsys_id >= DWMAC_CORE_3_50) { - dev_info(priv->device, "Enabled extended descriptors\n"); - priv->extend_desc = 1; - } else - dev_warn(priv->device, "Extended descriptors not supported\n"); - - priv->hw->desc = &enh_desc_ops; - } else { - dev_info(priv->device, "Normal descriptors\n"); - priv->hw->desc = &ndesc_ops; - } -} - -/** * stmmac_get_hw_features - get MAC capabilities from the HW cap. register. * @priv: driver private structure * Description: @@ -4098,49 +4071,17 @@ static void stmmac_service_task(struct work_struct *work) */ static int stmmac_hw_init(struct stmmac_priv *priv) { - struct mac_device_info *mac; - - /* Identify the MAC HW device */ - if (priv->plat->setup) { - mac = priv->plat->setup(priv); - } else if (priv->plat->has_gmac) { - priv->dev->priv_flags |= IFF_UNICAST_FLT; - mac = dwmac1000_setup(priv->ioaddr, - priv->plat->multicast_filter_bins, - priv->plat->unicast_filter_entries, - &priv->synopsys_id); - } else if (priv->plat->has_gmac4) { - priv->dev->priv_flags |= IFF_UNICAST_FLT; - mac = dwmac4_setup(priv->ioaddr, - priv->plat->multicast_filter_bins, - priv->plat->unicast_filter_entries, - &priv->synopsys_id); - } else { - mac = dwmac100_setup(priv->ioaddr, &priv->synopsys_id); - } - if (!mac) - return -ENOMEM; - - priv->hw = mac; + int ret; /* dwmac-sun8i only work in chain mode */ if (priv->plat->has_sun8i) chain_mode = 1; + priv->chain_mode = chain_mode; - /* To use the chained or ring mode */ - if (priv->synopsys_id >= DWMAC_CORE_4_00) { - priv->hw->mode = &dwmac4_ring_mode_ops; - } else { - if (chain_mode) { - priv->hw->mode = &chain_mode_ops; - dev_info(priv->device, "Chain mode enabled\n"); - priv->mode = STMMAC_CHAIN_MODE; - } else { - priv->hw->mode = &ring_mode_ops; - dev_info(priv->device, "Ring mode enabled\n"); - priv->mode = STMMAC_RING_MODE; - } - } + /* Initialize HW Interface */ + ret = stmmac_hwif_init(priv); + if (ret) + return ret; /* Get the HW capability (new GMAC newer than 3.50a) */ priv->hw_cap_support = stmmac_get_hw_features(priv); @@ -4174,12 +4115,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv) dev_info(priv->device, "No HW DMA feature register supported\n"); } - /* To use alternate (extended), normal or GMAC4 descriptor structures */ - if (priv->synopsys_id >= DWMAC_CORE_4_00) - priv->hw->desc = &dwmac4_desc_ops; - else - stmmac_selec_desc_mode(priv); - if (priv->plat->rx_coe) { priv->hw->rx_csum = priv->plat->rx_coe; dev_info(priv->device, "RX Checksum Offload Engine supported\n"); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 30371274409d..74f828412055 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -129,7 +129,7 @@ do { \ #define RX_PRIORITY_MAPPING 0x76543210 #define TX_PRIORITY_MAPPING 0x33221100 -#define CPDMA_TX_PRIORITY_MAP 0x01234567 +#define CPDMA_TX_PRIORITY_MAP 0x76543210 #define CPSW_VLAN_AWARE BIT(1) #define CPSW_RX_VLAN_ENCAP BIT(2) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index c180b480f8ef..13e4c1eff353 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -217,7 +217,7 @@ static int kiss_esc_crc(unsigned char *s, unsigned char *d, unsigned short crc, c = *s++; else if (len > 1) c = crc >> 8; - else if (len > 0) + else c = crc & 0xff; len--; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 725f4b4afc6d..adde8fc45588 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -514,6 +514,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) const struct macvlan_dev *vlan = netdev_priv(dev); const struct macvlan_port *port = vlan->port; const struct macvlan_dev *dest; + void *accel_priv = NULL; if (vlan->mode == MACVLAN_MODE_BRIDGE) { const struct ethhdr *eth = (void *)skb->data; @@ -533,9 +534,14 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) } } + /* For packets that are non-multicast and not bridged we will pass + * the necessary information so that the lowerdev can distinguish + * the source of the packets via the accel_priv value. + */ + accel_priv = vlan->accel_priv; xmit_world: skb->dev = vlan->lowerdev; - return dev_queue_xmit(skb); + return dev_queue_xmit_accel(skb, accel_priv); } static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb) @@ -552,19 +558,14 @@ static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, str static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; int ret; - struct macvlan_dev *vlan = netdev_priv(dev); if (unlikely(netpoll_tx_running(dev))) return macvlan_netpoll_send_skb(vlan, skb); - if (vlan->fwd_priv) { - skb->dev = vlan->lowerdev; - ret = dev_queue_xmit_accel(skb, vlan->fwd_priv); - } else { - ret = macvlan_queue_xmit(skb, dev); - } + ret = macvlan_queue_xmit(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vlan_pcpu_stats *pcpu_stats; @@ -613,26 +614,27 @@ static int macvlan_open(struct net_device *dev) goto hash_add; } - if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) { - vlan->fwd_priv = - lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); - - /* If we get a NULL pointer back, or if we get an error - * then we should just fall through to the non accelerated path - */ - if (IS_ERR_OR_NULL(vlan->fwd_priv)) { - vlan->fwd_priv = NULL; - } else - return 0; - } - err = -EBUSY; if (macvlan_addr_busy(vlan->port, dev->dev_addr)) goto out; - err = dev_uc_add(lowerdev, dev->dev_addr); - if (err < 0) - goto out; + /* Attempt to populate accel_priv which is used to offload the L2 + * forwarding requests for unicast packets. + */ + if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) + vlan->accel_priv = + lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev); + + /* If earlier attempt to offload failed, or accel_priv is not + * populated we must add the unicast address to the lower device. + */ + if (IS_ERR_OR_NULL(vlan->accel_priv)) { + vlan->accel_priv = NULL; + err = dev_uc_add(lowerdev, dev->dev_addr); + if (err < 0) + goto out; + } + if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(lowerdev, 1); if (err < 0) @@ -653,13 +655,14 @@ clear_multi: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(lowerdev, -1); del_unicast: - dev_uc_del(lowerdev, dev->dev_addr); -out: - if (vlan->fwd_priv) { + if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, - vlan->fwd_priv); - vlan->fwd_priv = NULL; + vlan->accel_priv); + vlan->accel_priv = NULL; + } else { + dev_uc_del(lowerdev, dev->dev_addr); } +out: return err; } @@ -668,11 +671,10 @@ static int macvlan_stop(struct net_device *dev) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (vlan->fwd_priv) { + if (vlan->accel_priv) { lowerdev->netdev_ops->ndo_dfwd_del_station(lowerdev, - vlan->fwd_priv); - vlan->fwd_priv = NULL; - return 0; + vlan->accel_priv); + vlan->accel_priv = NULL; } dev_uc_unsync(lowerdev, dev); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index c22e8e383247..25e2a099b71c 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1393,6 +1393,15 @@ static int m88e1318_set_wol(struct phy_device *phydev, if (err < 0) goto error; + /* If WOL event happened once, the LED[2] interrupt pin + * will not be cleared unless we reading the interrupt status + * register. If interrupts are in use, the normal interrupt + * handling will clear the WOL event. Clear the WOL event + * before enabling it if !phy_interrupt_is_valid() + */ + if (!phy_interrupt_is_valid(phydev)) + phy_read(phydev, MII_M1011_IEVENT); + /* Enable the WOL interrupt */ err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, MII_88E1318S_PHY_CSIER_WOL_EIE); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac23322a32e1..9e4ba8e80a18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -535,8 +535,17 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR1, and put them in the upper half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); - if (phy_reg < 0) + if (phy_reg < 0) { + /* if there is no device, return without an error so scanning + * the bus works properly + */ + if (phy_reg == -EIO || phy_reg == -ENODEV) { + *phy_id = 0xffffffff; + return 0; + } + return -EIO; + } *phy_id = (phy_reg & 0xffff) << 16; diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 1483bc7b01e1..7df07337d69c 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -620,6 +620,10 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppox)) + goto end; + if (sp->sa_protocol != PX_PROTO_OE) goto end; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 5c77fa445d2d..9dbd390ace34 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1072,14 +1072,11 @@ static void team_port_leave(struct team *team, struct team_port *port) } #ifdef CONFIG_NET_POLL_CONTROLLER -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int __team_port_enable_netpoll(struct team_port *port) { struct netpoll *np; int err; - if (!team->dev->npinfo) - return 0; - np = kzalloc(sizeof(*np), GFP_KERNEL); if (!np) return -ENOMEM; @@ -1093,6 +1090,14 @@ static int team_port_enable_netpoll(struct team *team, struct team_port *port) return err; } +static int team_port_enable_netpoll(struct team_port *port) +{ + if (!port->team->dev->npinfo) + return 0; + + return __team_port_enable_netpoll(port); +} + static void team_port_disable_netpoll(struct team_port *port) { struct netpoll *np = port->np; @@ -1107,7 +1112,7 @@ static void team_port_disable_netpoll(struct team_port *port) kfree(np); } #else -static int team_port_enable_netpoll(struct team *team, struct team_port *port) +static int team_port_enable_netpoll(struct team_port *port) { return 0; } @@ -1221,7 +1226,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_vids_add; } - err = team_port_enable_netpoll(team, port); + err = team_port_enable_netpoll(port); if (err) { netdev_err(dev, "Failed to enable netpoll on device %s\n", portname); @@ -1918,7 +1923,7 @@ static int team_netpoll_setup(struct net_device *dev, mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { - err = team_port_enable_netpoll(team, port); + err = __team_port_enable_netpoll(port); if (err) { __team_netpoll_cleanup(team); break; diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 85997184e047..9d36473dc2a2 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -103,8 +103,7 @@ config NVDIMM_DAX Select Y if unsure config OF_PMEM - # FIXME: make tristate once OF_NUMA dependency removed - bool "Device-tree support for persistent memory regions" + tristate "Device-tree support for persistent memory regions" depends on OF default LIBNVDIMM help diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index e00d45522b80..8d348b22ba45 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -88,9 +88,9 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + int rc = validate_dimm(ndd), cmd_rc = 0; struct nd_cmd_get_config_data_hdr *cmd; struct nvdimm_bus_descriptor *nd_desc; - int rc = validate_dimm(ndd); u32 max_cmd_size, config_size; size_t offset; @@ -124,9 +124,11 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd), NULL); - if (rc || cmd->status) { - rc = -ENXIO; + cmd->in_length + sizeof(*cmd), &cmd_rc); + if (rc < 0) + break; + if (cmd_rc < 0) { + rc = cmd_rc; break; } memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); @@ -140,9 +142,9 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len) { - int rc = validate_dimm(ndd); size_t max_cmd_size, buf_offset; struct nd_cmd_set_config_hdr *cmd; + int rc = validate_dimm(ndd), cmd_rc = 0; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; @@ -164,7 +166,6 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, for (buf_offset = 0; len; len -= cmd->in_length, buf_offset += cmd->in_length) { size_t cmd_size; - u32 *status; cmd->in_offset = offset + buf_offset; cmd->in_length = min(max_cmd_size, len); @@ -172,12 +173,13 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, /* status is output in the last 4-bytes of the command buffer */ cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); - status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); - if (rc || *status) { - rc = rc ? rc : -ENXIO; + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, &cmd_rc); + if (rc < 0) + break; + if (cmd_rc < 0) { + rc = cmd_rc; break; } } diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 85013bad35de..0a701837dfc0 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -67,7 +67,7 @@ static int of_pmem_region_probe(struct platform_device *pdev) */ memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = of_node_to_nid(np); + ndr_desc.numa_node = dev_to_node(&pdev->dev); ndr_desc.res = &pdev->resource[i]; ndr_desc.of_node = np; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 9d27016c899e..0434ab7b6497 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -740,10 +740,7 @@ static int do_dma_request(struct mport_dma_req *req, tx->callback = dma_xfer_callback; tx->callback_param = req; - req->dmach = chan; - req->sync = sync; req->status = DMA_IN_PROGRESS; - init_completion(&req->req_comp); kref_get(&req->refcount); cookie = dmaengine_submit(tx); @@ -831,13 +828,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, if (!req) return -ENOMEM; - kref_init(&req->refcount); - ret = get_dma_channel(priv); if (ret) { kfree(req); return ret; } + chan = priv->dmach; + + kref_init(&req->refcount); + init_completion(&req->req_comp); + req->dir = dir; + req->filp = filp; + req->priv = priv; + req->dmach = chan; + req->sync = sync; /* * If parameter loc_addr != NULL, we are transferring data from/to @@ -925,11 +929,6 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, xfer->offset, xfer->length); } - req->dir = dir; - req->filp = filp; - req->priv = priv; - chan = priv->dmach; - nents = dma_map_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir); if (nents == 0) { diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 4326715dc13e..78b98b3e7efa 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -557,7 +557,6 @@ enum qeth_prot_versions { enum qeth_cmd_buffer_state { BUF_STATE_FREE, BUF_STATE_LOCKED, - BUF_STATE_PROCESSED, }; enum qeth_cq { @@ -601,7 +600,6 @@ struct qeth_channel { struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO]; atomic_t irq_pending; int io_buf_no; - int buf_no; }; /** diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 04fefa5bb08d..dffd820731f2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -706,7 +706,6 @@ void qeth_clear_ipacmd_list(struct qeth_card *card) qeth_put_reply(reply); } spin_unlock_irqrestore(&card->lock, flags); - atomic_set(&card->write.irq_pending, 0); } EXPORT_SYMBOL_GPL(qeth_clear_ipacmd_list); @@ -818,7 +817,6 @@ void qeth_clear_cmd_buffers(struct qeth_channel *channel) for (cnt = 0; cnt < QETH_CMD_BUFFER_NO; cnt++) qeth_release_buffer(channel, &channel->iob[cnt]); - channel->buf_no = 0; channel->io_buf_no = 0; } EXPORT_SYMBOL_GPL(qeth_clear_cmd_buffers); @@ -924,7 +922,6 @@ static int qeth_setup_channel(struct qeth_channel *channel) kfree(channel->iob[cnt].data); return -ENOMEM; } - channel->buf_no = 0; channel->io_buf_no = 0; atomic_set(&channel->irq_pending, 0); spin_lock_init(&channel->iob_lock); @@ -1100,16 +1097,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, { int rc; int cstat, dstat; - struct qeth_cmd_buffer *buffer; + struct qeth_cmd_buffer *iob = NULL; struct qeth_channel *channel; struct qeth_card *card; - struct qeth_cmd_buffer *iob; - __u8 index; - - if (__qeth_check_irb_error(cdev, intparm, irb)) - return; - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; card = CARD_FROM_CDEV(cdev); if (!card) @@ -1127,6 +1117,19 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, channel = &card->data; QETH_CARD_TEXT(card, 5, "data"); } + + if (qeth_intparm_is_iob(intparm)) + iob = (struct qeth_cmd_buffer *) __va((addr_t)intparm); + + if (__qeth_check_irb_error(cdev, intparm, irb)) { + /* IO was terminated, free its resources. */ + if (iob) + qeth_release_buffer(iob->channel, iob); + atomic_set(&channel->irq_pending, 0); + wake_up(&card->wait_q); + return; + } + atomic_set(&channel->irq_pending, 0); if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC)) @@ -1150,6 +1153,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, /* we don't have to handle this further */ intparm = 0; } + + cstat = irb->scsw.cmd.cstat; + dstat = irb->scsw.cmd.dstat; + if ((dstat & DEV_STAT_UNIT_EXCEP) || (dstat & DEV_STAT_UNIT_CHECK) || (cstat)) { @@ -1182,25 +1189,15 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm, channel->state = CH_STATE_RCD_DONE; goto out; } - if (intparm) { - buffer = (struct qeth_cmd_buffer *) __va((addr_t)intparm); - buffer->state = BUF_STATE_PROCESSED; - } if (channel == &card->data) return; if (channel == &card->read && channel->state == CH_STATE_UP) __qeth_issue_next_read(card); - iob = channel->iob; - index = channel->buf_no; - while (iob[index].state == BUF_STATE_PROCESSED) { - if (iob[index].callback != NULL) - iob[index].callback(channel, iob + index); + if (iob && iob->callback) + iob->callback(iob->channel, iob); - index = (index + 1) % QETH_CMD_BUFFER_NO; - } - channel->buf_no = index; out: wake_up(&card->wait_q); return; @@ -1870,8 +1867,8 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(SETUP, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - &channel->ccw, (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw, + (addr_t) iob, 0, 0, QETH_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); if (rc) { @@ -1888,7 +1885,6 @@ static int qeth_idx_activate_get_answer(struct qeth_channel *channel, if (channel->state != CH_STATE_UP) { rc = -ETIME; QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc); - qeth_clear_cmd_buffers(channel); } else rc = 0; return rc; @@ -1942,8 +1938,8 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0); QETH_DBF_TEXT(SETUP, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - &channel->ccw, (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(channel->ccwdev, &channel->ccw, + (addr_t) iob, 0, 0, QETH_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); if (rc) { @@ -1964,7 +1960,6 @@ static int qeth_idx_activate_channel(struct qeth_channel *channel, QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n", dev_name(&channel->ccwdev->dev)); QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME); - qeth_clear_cmd_buffers(channel); return -ETIME; } return qeth_idx_activate_get_answer(channel, idx_reply_cb); @@ -2166,8 +2161,8 @@ int qeth_send_control_data(struct qeth_card *card, int len, QETH_CARD_TEXT(card, 6, "noirqpnd"); spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags); - rc = ccw_device_start(card->write.ccwdev, &card->write.ccw, - (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw, + (addr_t) iob, 0, 0, event_timeout); spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags); if (rc) { QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: " @@ -2199,8 +2194,6 @@ int qeth_send_control_data(struct qeth_card *card, int len, } } - if (reply->rc == -EIO) - goto error; rc = reply->rc; qeth_put_reply(reply); return rc; @@ -2211,10 +2204,6 @@ time_err: list_del_init(&reply->list); spin_unlock_irqrestore(&reply->card->lock, flags); atomic_inc(&reply->received); -error: - atomic_set(&card->write.irq_pending, 0); - qeth_release_buffer(iob->channel, iob); - card->write.buf_no = (card->write.buf_no + 1) % QETH_CMD_BUFFER_NO; rc = reply->rc; qeth_put_reply(reply); return rc; @@ -3033,28 +3022,23 @@ static int qeth_send_startlan(struct qeth_card *card) return rc; } -static int qeth_default_setadapterparms_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) +static int qeth_setadpparms_inspect_rc(struct qeth_ipa_cmd *cmd) { - struct qeth_ipa_cmd *cmd; - - QETH_CARD_TEXT(card, 4, "defadpcb"); - - cmd = (struct qeth_ipa_cmd *) data; - if (cmd->hdr.return_code == 0) + if (!cmd->hdr.return_code) cmd->hdr.return_code = cmd->data.setadapterparms.hdr.return_code; - return 0; + return cmd->hdr.return_code; } static int qeth_query_setadapterparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 3, "quyadpcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (cmd->data.setadapterparms.data.query_cmds_supp.lan_type & 0x7f) { card->info.link_type = cmd->data.setadapterparms.data.query_cmds_supp.lan_type; @@ -3062,7 +3046,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card, } card->options.adp.supported_funcs = cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds; - return qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); + return 0; } static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card, @@ -3154,22 +3138,20 @@ EXPORT_SYMBOL_GPL(qeth_query_ipassists); static int qeth_query_switch_attributes_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; - struct qeth_switch_info *sw_info; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_query_switch_attributes *attrs; + struct qeth_switch_info *sw_info; QETH_CARD_TEXT(card, 2, "qswiatcb"); - cmd = (struct qeth_ipa_cmd *) data; - sw_info = (struct qeth_switch_info *)reply->param; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - attrs = &cmd->data.setadapterparms.data.query_switch_attributes; - sw_info->capabilities = attrs->capabilities; - sw_info->settings = attrs->settings; - QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, - sw_info->settings); - } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; + sw_info = (struct qeth_switch_info *)reply->param; + attrs = &cmd->data.setadapterparms.data.query_switch_attributes; + sw_info->capabilities = attrs->capabilities; + sw_info->settings = attrs->settings; + QETH_CARD_TEXT_(card, 2, "%04x%04x", sw_info->capabilities, + sw_info->settings); return 0; } @@ -4207,16 +4189,13 @@ EXPORT_SYMBOL_GPL(qeth_do_send_packet); static int qeth_setadp_promisc_mode_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_setadpparms *setparms; QETH_CARD_TEXT(card, 4, "prmadpcb"); - cmd = (struct qeth_ipa_cmd *) data; setparms = &(cmd->data.setadapterparms); - - qeth_default_setadapterparms_cb(card, reply, (unsigned long)cmd); - if (cmd->hdr.return_code) { + if (qeth_setadpparms_inspect_rc(cmd)) { QETH_CARD_TEXT_(card, 4, "prmrc%x", cmd->hdr.return_code); setparms->data.mode = SET_PROMISC_MODE_OFF; } @@ -4286,18 +4265,18 @@ EXPORT_SYMBOL_GPL(qeth_get_stats); static int qeth_setadpparms_change_macaddr_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 4, "chgmaccb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *) data; if (!card->options.layer2 || !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) { ether_addr_copy(card->dev->dev_addr, cmd->data.setadapterparms.data.change_addr.addr); card->info.mac_bits |= QETH_LAYER2_MAC_READ; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4328,13 +4307,15 @@ EXPORT_SYMBOL_GPL(qeth_setadpparms_change_macaddr); static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_set_access_ctrl *access_ctrl_req; int fallback = *(int *)reply->param; QETH_CARD_TEXT(card, 4, "setaccb"); + if (cmd->hdr.return_code) + return 0; + qeth_setadpparms_inspect_rc(cmd); - cmd = (struct qeth_ipa_cmd *) data; access_ctrl_req = &cmd->data.setadapterparms.data.set_access_ctrl; QETH_DBF_TEXT_(SETUP, 2, "setaccb"); QETH_DBF_TEXT_(SETUP, 2, "%s", card->gdev->dev.kobj.name); @@ -4407,7 +4388,6 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card, card->options.isolation = card->options.prev_isolation; break; } - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); return 0; } @@ -4695,14 +4675,15 @@ out: static int qeth_setadpparms_query_oat_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_qoat_priv *priv; char *resdata; int resdatalen; QETH_CARD_TEXT(card, 3, "qoatcb"); + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - cmd = (struct qeth_ipa_cmd *)data; priv = (struct qeth_qoat_priv *)reply->param; resdatalen = cmd->data.setadapterparms.hdr.cmdlength; resdata = (char *)data + 28; @@ -4796,21 +4777,18 @@ out: static int qeth_query_card_info_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct carrier_info *carrier_info = (struct carrier_info *)reply->param; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *)data; struct qeth_query_card_info *card_info; - struct carrier_info *carrier_info; QETH_CARD_TEXT(card, 2, "qcrdincb"); - carrier_info = (struct carrier_info *)reply->param; - cmd = (struct qeth_ipa_cmd *)data; - card_info = &cmd->data.setadapterparms.data.card_info; - if (cmd->data.setadapterparms.hdr.return_code == 0) { - carrier_info->card_type = card_info->card_type; - carrier_info->port_mode = card_info->port_mode; - carrier_info->port_speed = card_info->port_speed; - } + if (qeth_setadpparms_inspect_rc(cmd)) + return 0; - qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd); + card_info = &cmd->data.setadapterparms.data.card_info; + carrier_info->card_type = card_info->card_type; + carrier_info->port_mode = card_info->port_mode; + carrier_info->port_speed = card_info->port_speed; return 0; } @@ -4857,7 +4835,7 @@ int qeth_vm_request_mac(struct qeth_card *card) goto out; } - ccw_device_get_id(CARD_DDEV(card), &id); + ccw_device_get_id(CARD_RDEV(card), &id); request->resp_buf_len = sizeof(*response); request->resp_version = DIAG26C_VERSION2; request->op_code = DIAG26C_GET_MAC; @@ -6563,10 +6541,14 @@ static int __init qeth_core_init(void) mutex_init(&qeth_mod_mutex); qeth_wq = create_singlethread_workqueue("qeth_wq"); + if (!qeth_wq) { + rc = -ENOMEM; + goto out_err; + } rc = qeth_register_dbf_views(); if (rc) - goto out_err; + goto dbf_err; qeth_core_root_dev = root_device_register("qeth"); rc = PTR_ERR_OR_ZERO(qeth_core_root_dev); if (rc) @@ -6603,6 +6585,8 @@ slab_err: root_device_unregister(qeth_core_root_dev); register_err: qeth_unregister_dbf_views(); +dbf_err: + destroy_workqueue(qeth_wq); out_err: pr_err("Initializing the qeth device driver failed\n"); return rc; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 619f897b4bb0..f4d1ec0b8f5a 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -35,6 +35,18 @@ extern unsigned char IPA_PDU_HEADER[]; #define QETH_HALT_CHANNEL_PARM -11 #define QETH_RCD_PARM -12 +static inline bool qeth_intparm_is_iob(unsigned long intparm) +{ + switch (intparm) { + case QETH_CLEAR_CHANNEL_PARM: + case QETH_HALT_CHANNEL_PARM: + case QETH_RCD_PARM: + case 0: + return false; + } + return true; +} + /*****************************************************************************/ /* IP Assist related definitions */ /*****************************************************************************/ diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2ad6f12f3d49..b8079f2a65b3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -121,13 +121,10 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) QETH_CARD_TEXT(card, 2, "L2Setmac"); rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC); if (rc == 0) { - card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - ether_addr_copy(card->dev->dev_addr, mac); dev_info(&card->gdev->dev, - "MAC address %pM successfully registered on device %s\n", - card->dev->dev_addr, card->dev->name); + "MAC address %pM successfully registered on device %s\n", + mac, card->dev->name); } else { - card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED; switch (rc) { case -EEXIST: dev_warn(&card->gdev->dev, @@ -142,19 +139,6 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) return rc; } -static int qeth_l2_send_delmac(struct qeth_card *card, __u8 *mac) -{ - int rc; - - QETH_CARD_TEXT(card, 2, "L2Delmac"); - if (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - return 0; - rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_DELVMAC); - if (rc == 0) - card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED; - return rc; -} - static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) { enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? @@ -519,6 +503,7 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct qeth_card *card = dev->ml_priv; + u8 old_addr[ETH_ALEN]; int rc = 0; QETH_CARD_TEXT(card, 3, "setmac"); @@ -530,14 +515,35 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) return -EOPNOTSUPP; } QETH_CARD_HEX(card, 3, addr->sa_data, ETH_ALEN); + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { QETH_CARD_TEXT(card, 3, "setmcREC"); return -ERESTARTSYS; } - rc = qeth_l2_send_delmac(card, &card->dev->dev_addr[0]); - if (!rc || (rc == -ENOENT)) - rc = qeth_l2_send_setmac(card, addr->sa_data); - return rc ? -EINVAL : 0; + + if (!qeth_card_hw_is_reachable(card)) { + ether_addr_copy(dev->dev_addr, addr->sa_data); + return 0; + } + + /* don't register the same address twice */ + if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && + (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + return 0; + + /* add the new address, switch over, drop the old */ + rc = qeth_l2_send_setmac(card, addr->sa_data); + if (rc) + return rc; + ether_addr_copy(old_addr, dev->dev_addr); + ether_addr_copy(dev->dev_addr, addr->sa_data); + + if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) + qeth_l2_remove_mac(card, old_addr); + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + return 0; } static void qeth_promisc_to_bridge(struct qeth_card *card) @@ -1067,8 +1073,9 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) goto out_remove; } - if (card->info.type != QETH_CARD_TYPE_OSN) - qeth_l2_send_setmac(card, &card->dev->dev_addr[0]); + if (card->info.type != QETH_CARD_TYPE_OSN && + !qeth_l2_send_setmac(card, card->dev->dev_addr)) + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && @@ -1338,8 +1345,8 @@ static int qeth_osn_send_control_data(struct qeth_card *card, int len, qeth_prepare_control_data(card, len, iob); QETH_CARD_TEXT(card, 6, "osnoirqp"); spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags); - rc = ccw_device_start(card->write.ccwdev, &card->write.ccw, - (addr_t) iob, 0, 0); + rc = ccw_device_start_timeout(CARD_WDEV(card), &card->write.ccw, + (addr_t) iob, 0, 0, QETH_IPA_TIMEOUT); spin_unlock_irqrestore(get_ccwdev_lock(card->write.ccwdev), flags); if (rc) { QETH_DBF_MESSAGE(2, "qeth_osn_send_control_data: " diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index bbf38befefb2..c4b49fca4871 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -46,8 +46,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" #define VHOST_NET_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. - * Using this limit prevents one virtqueue from starving rx. */ -#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2) + * Using this limit prevents one virtqueue from starving others with small + * pkts. + */ +#define VHOST_NET_PKT_WEIGHT 256 /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 @@ -587,7 +589,7 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT) || - unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) { + unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } @@ -769,6 +771,7 @@ static void handle_rx(struct vhost_net *net) struct socket *sock; struct iov_iter fixup; __virtio16 num_buffers; + int recv_pkts = 0; mutex_lock_nested(&vq->mutex, 0); sock = vq->private_data; @@ -872,7 +875,8 @@ static void handle_rx(struct vhost_net *net) if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + if (unlikely(total_len >= VHOST_NET_WEIGHT) || + unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); goto out; } diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index a5b8eb21201f..1abe4d021fd2 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -55,6 +55,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table); #define WDT_CTRL_WDT_INTR BIT(2) #define WDT_CTRL_RESET_SYSTEM BIT(1) #define WDT_CTRL_ENABLE BIT(0) +#define WDT_TIMEOUT_STATUS 0x10 +#define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1) /* * WDT_RESET_WIDTH controls the characteristics of the external pulse (if @@ -192,6 +194,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev) struct device_node *np; const char *reset_type; u32 duration; + u32 status; int ret; wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); @@ -307,6 +310,10 @@ static int aspeed_wdt_probe(struct platform_device *pdev) writel(duration - 1, wdt->base + WDT_RESET_WIDTH); } + status = readl(wdt->base + WDT_TIMEOUT_STATUS); + if (status & WDT_TIMEOUT_STATUS_BOOT_SECONDARY) + wdt->wdd.bootstatus = WDIOF_CARDRESET; + ret = devm_watchdog_register_device(&pdev->dev, &wdt->wdd); if (ret) { dev_err(&pdev->dev, "failed to register\n"); diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c index 6b8c6ddfe30b..514db5cc1595 100644 --- a/drivers/watchdog/renesas_wdt.c +++ b/drivers/watchdog/renesas_wdt.c @@ -121,7 +121,8 @@ static int rwdt_restart(struct watchdog_device *wdev, unsigned long action, } static const struct watchdog_info rwdt_ident = { - .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT, + .options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_CARDRESET, .identity = "Renesas WDT Watchdog", }; @@ -197,9 +198,10 @@ static int rwdt_probe(struct platform_device *pdev) return PTR_ERR(clk); pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); priv->clk_rate = clk_get_rate(clk); + priv->wdev.bootstatus = (readb_relaxed(priv->base + RWTCSRA) & + RWTCSRA_WOVF) ? WDIOF_CARDRESET : 0; pm_runtime_put(&pdev->dev); if (!priv->clk_rate) { diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index 43d0cbb7ba0b..814cdf539b0f 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -299,7 +299,7 @@ static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd, if (sch311x_wdt_set_heartbeat(new_timeout)) return -EINVAL; sch311x_wdt_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, p); default: diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c index 20e2bba10400..672b61a7f9a3 100644 --- a/drivers/watchdog/w83977f_wdt.c +++ b/drivers/watchdog/w83977f_wdt.c @@ -427,7 +427,7 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; wdt_keepalive(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, uarg.i); diff --git a/drivers/watchdog/wafer5823wdt.c b/drivers/watchdog/wafer5823wdt.c index db0da7ea4fd8..93c5b610e264 100644 --- a/drivers/watchdog/wafer5823wdt.c +++ b/drivers/watchdog/wafer5823wdt.c @@ -178,7 +178,7 @@ static long wafwdt_ioctl(struct file *file, unsigned int cmd, timeout = new_timeout; wafwdt_stop(); wafwdt_start(); - /* Fall */ + /* Fall through */ case WDIOC_GETTIMEOUT: return put_user(timeout, p); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 82e8f6edfb48..b12e37f27530 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -749,7 +749,7 @@ static int autofs4_dir_mkdir(struct inode *dir, autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 41e04183e4ce..4ad6f669fe34 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -377,10 +377,10 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, } else map_addr = vm_mmap(filep, addr, size, prot, type, off); - if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr)) - pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n", - task_pid_nr(current), current->comm, - (void *)addr); + if ((type & MAP_FIXED_NOREPLACE) && + PTR_ERR((void *)map_addr) == -EEXIST) + pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n", + task_pid_nr(current), current->comm, (void *)addr); return(map_addr); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5474ef14d6e6..2771cc56a622 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,25 @@ struct btrfs_block_rsv { unsigned short full; unsigned short type; unsigned short failfast; + + /* + * Qgroup equivalent for @size @reserved + * + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care + * about things like csum size nor how many tree blocks it will need to + * reserve. + * + * Qgroup cares more about net change of the extent usage. + * + * So for one newly inserted file extent, in worst case it will cause + * leaf split and level increase, nodesize for each file extent is + * already too much. + * + * In short, qgroup_size/reserved is the upper limit of possible needed + * qgroup metadata reservation. + */ + u64 qgroup_rsv_size; + u64 qgroup_rsv_reserved; }; /* @@ -714,6 +733,12 @@ struct btrfs_delayed_root; */ #define BTRFS_FS_EXCL_OP 16 +/* + * To info transaction_kthread we need an immediate commit so it doesn't + * need to wait for commit_interval + */ +#define BTRFS_FS_NEED_ASYNC_COMMIT 17 + struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 06ec8ab6d9ba..a8d492dbd3e7 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, dst_rsv = &fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + + /* + * Here we migrate space rsv from transaction rsv, since have already + * reserved space when starting a transaction. So no need to reserve + * qgroup space here. + */ ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); if (!ret) { trace_btrfs_space_reservation(fs_info, "delayed_item", @@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, return; rsv = &fs_info->delayed_block_rsv; - btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved); + /* + * Check btrfs_delayed_item_reserve_metadata() to see why we don't need + * to release/reserve qgroup space. + */ trace_btrfs_space_reservation(fs_info, "delayed_item", item->key.objectid, item->bytes_reserved, 0); @@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata( num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); - if (ret < 0) - return ret; /* * btrfs_dirty_inode will update the inode under btrfs_join_transaction * which doesn't reserve space for speed. This is a problem since we @@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { + ret = btrfs_qgroup_reserve_meta_prealloc(root, + fs_info->nodesize, true); + if (ret < 0) + return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, BTRFS_RESERVE_NO_FLUSH); /* @@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata( "delayed_inode", btrfs_ino(inode), num_bytes, 1); + } else { + btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); } return ret; } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9e98295de7ce..e1b0651686f7 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_qgroup_extent_record *qrecord, u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, - int action, int is_data, int *qrecord_inserted_ret, + int action, int is_data, int is_system, + int *qrecord_inserted_ret, int *old_ref_mod, int *new_ref_mod) + { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; @@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref->ref_mod = count_mod; head_ref->must_insert_reserved = must_insert_reserved; head_ref->is_data = is_data; + head_ref->is_system = is_system; head_ref->ref_tree = RB_ROOT; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); @@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; int qrecord_inserted; + int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID); BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); @@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, 0, 0, action, 0, - &qrecord_inserted, old_ref_mod, - new_ref_mod); + is_system, &qrecord_inserted, + old_ref_mod, new_ref_mod); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, level, action); @@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, */ head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, bytenr, num_bytes, ref_root, reserved, - action, 1, &qrecord_inserted, + action, 1, 0, &qrecord_inserted, old_ref_mod, new_ref_mod); add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, @@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); + /* + * extent_ops just modify the flags of an extent and they don't result + * in ref count changes, hence it's safe to pass false/0 for is_system + * argument + */ add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, - extent_op->is_data, NULL, NULL, NULL); + extent_op->is_data, 0, NULL, NULL, NULL); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 741869dbc316..7f00db50bd24 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head { */ unsigned int must_insert_reserved:1; unsigned int is_data:1; + unsigned int is_system:1; unsigned int processing:1; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4ac8b1d21baf..60caa68c3618 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg) now = get_seconds(); if (cur->state < TRANS_STATE_BLOCKED && + !test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) && (now < cur->start_time || now - cur->start_time < fs_info->commit_interval)) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 75cfb80d2551..e2f16b68fcbf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, trace_run_delayed_ref_head(fs_info, head, 0); if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; + struct btrfs_space_info *space_info; + u64 flags; - cache = btrfs_lookup_block_group(fs_info, head->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, + if (head->is_data) + flags = BTRFS_BLOCK_GROUP_DATA; + else if (head->is_system) + flags = BTRFS_BLOCK_GROUP_SYSTEM; + else + flags = BTRFS_BLOCK_GROUP_METADATA; + space_info = __find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add(&space_info->total_bytes_pinned, -head->num_bytes); - btrfs_put_block_group(cache); if (head->is_data) { spin_lock(&delayed_refs->lock); @@ -5559,14 +5565,18 @@ again: static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes) + struct btrfs_block_rsv *dest, u64 num_bytes, + u64 *qgroup_to_release_ret) { struct btrfs_space_info *space_info = block_rsv->space_info; + u64 qgroup_to_release = 0; u64 ret; spin_lock(&block_rsv->lock); - if (num_bytes == (u64)-1) + if (num_bytes == (u64)-1) { num_bytes = block_rsv->size; + qgroup_to_release = block_rsv->qgroup_rsv_size; + } block_rsv->size -= num_bytes; if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; @@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + qgroup_to_release = block_rsv->qgroup_rsv_reserved - + block_rsv->qgroup_rsv_size; + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; + } else { + qgroup_to_release = 0; + } spin_unlock(&block_rsv->lock); ret = num_bytes; @@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info_add_old_bytes(fs_info, space_info, num_bytes); } + if (qgroup_to_release_ret) + *qgroup_to_release_ret = qgroup_to_release; return ret; } @@ -5738,17 +5757,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, struct btrfs_root *root = inode->root; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 num_bytes = 0; + u64 qgroup_num_bytes = 0; int ret = -ENOSPC; spin_lock(&block_rsv->lock); if (block_rsv->reserved < block_rsv->size) num_bytes = block_rsv->size - block_rsv->reserved; + if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size) + qgroup_num_bytes = block_rsv->qgroup_rsv_size - + block_rsv->qgroup_rsv_reserved; spin_unlock(&block_rsv->lock); if (num_bytes == 0) return 0; - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true); if (ret) return ret; ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); @@ -5756,7 +5779,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, block_rsv_add_bytes(block_rsv, num_bytes, 0); trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), num_bytes, 1); - } + + /* Don't forget to increase qgroup_rsv_reserved */ + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_num_bytes; + spin_unlock(&block_rsv->lock); + } else + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); return ret; } @@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 released = 0; + u64 qgroup_to_release = 0; /* * Since we statically set the block_rsv->size we just want to say we * are releasing 0 bytes, and then we'll just get the reservation over * the size free'd. */ - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0, + &qgroup_to_release); if (released > 0) trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), released, 0); if (qgroup_free) - btrfs_qgroup_free_meta_prealloc(inode->root, released); + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); else - btrfs_qgroup_convert_reserved_meta(inode->root, released); + btrfs_qgroup_convert_reserved_meta(inode->root, + qgroup_to_release); } void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, @@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, if (global_rsv == block_rsv || block_rsv->space_info != global_rsv->space_info) global_rsv = NULL; - block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes); + block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL); } static void update_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) static void release_global_block_rsv(struct btrfs_fs_info *fs_info) { block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, - (u64)-1); + (u64)-1, NULL); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); @@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) WARN_ON_ONCE(!list_empty(&trans->new_bgs)); block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, - trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved, NULL); trans->chunk_bytes_reserved = 0; } @@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, { struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; + u64 qgroup_rsv_size = 0; u64 csum_leaves; unsigned outstanding_extents; @@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, inode->csum_bytes); reserve_size += btrfs_calc_trans_metadata_size(fs_info, csum_leaves); + /* + * For qgroup rsv, the calculation is very simple: + * account one nodesize for each outstanding extent + * + * This is overestimating in most cases. + */ + qgroup_rsv_size = outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; + block_rsv->qgroup_rsv_size = qgroup_rsv_size; spin_unlock(&block_rsv->lock); } @@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u32 blocksize) { block_rsv_add_bytes(block_rsv, blocksize, 0); - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0); + block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); } /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0167a9c97c9c..f660ba1e5e58 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1748,7 +1748,7 @@ again: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes, - (ret != 0)); + true); if (ret) { btrfs_drop_pages(pages, num_pages); break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e064c49c9a9a..d241285a0d2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -31,6 +31,7 @@ #include <linux/uio.h> #include <linux/magic.h> #include <linux/iversion.h> +#include <asm/unaligned.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -5905,11 +5906,13 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) struct dir_entry *entry = addr; char *name = (char *)(entry + 1); - ctx->pos = entry->offset; - if (!dir_emit(ctx, name, entry->name_len, entry->ino, - entry->type)) + ctx->pos = get_unaligned(&entry->offset); + if (!dir_emit(ctx, name, get_unaligned(&entry->name_len), + get_unaligned(&entry->ino), + get_unaligned(&entry->type))) return 1; - addr += sizeof(struct dir_entry) + entry->name_len; + addr += sizeof(struct dir_entry) + + get_unaligned(&entry->name_len); ctx->pos++; } return 0; @@ -5999,14 +6002,15 @@ again: } entry = addr; - entry->name_len = name_len; + put_unaligned(name_len, &entry->name_len); name_ptr = (char *)(entry + 1); read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1), name_len); - entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)], + &entry->type); btrfs_dir_item_key_to_cpu(leaf, di, &location); - entry->ino = location.objectid; - entry->offset = found_key.offset; + put_unaligned(location.objectid, &entry->ino); + put_unaligned(found_key.offset, &entry->offset); entries++; addr += sizeof(struct dir_entry) + name_len; total_len += sizeof(struct dir_entry) + name_len; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 124276bba8cf..21a831d3d087 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -189,9 +189,10 @@ void btrfs_print_leaf(struct extent_buffer *l) fs_info = l->fs_info; nr = btrfs_header_nritems(l); - btrfs_info(fs_info, "leaf %llu total ptrs %d free space %d", - btrfs_header_bytenr(l), nr, - btrfs_leaf_free_space(fs_info, l)); + btrfs_info(fs_info, + "leaf %llu gen %llu total ptrs %d free space %d owner %llu", + btrfs_header_bytenr(l), btrfs_header_generation(l), nr, + btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(i); btrfs_item_key_to_cpu(l, &key, i); @@ -325,7 +326,7 @@ void btrfs_print_leaf(struct extent_buffer *l) } } -void btrfs_print_tree(struct extent_buffer *c) +void btrfs_print_tree(struct extent_buffer *c, bool follow) { struct btrfs_fs_info *fs_info; int i; u32 nr; @@ -342,15 +343,19 @@ void btrfs_print_tree(struct extent_buffer *c) return; } btrfs_info(fs_info, - "node %llu level %d total ptrs %d free spc %u", - btrfs_header_bytenr(c), level, nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr); + "node %llu level %d gen %llu total ptrs %d free spc %u owner %llu", + btrfs_header_bytenr(c), level, btrfs_header_generation(c), + nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr, + btrfs_header_owner(c)); for (i = 0; i < nr; i++) { btrfs_node_key_to_cpu(c, &key, i); - pr_info("\tkey %d (%llu %u %llu) block %llu\n", + pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n", i, key.objectid, key.type, key.offset, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_node_ptr_generation(c, i)); } + if (!follow) + return; for (i = 0; i < nr; i++) { struct btrfs_key first_key; struct extent_buffer *next; @@ -372,7 +377,7 @@ void btrfs_print_tree(struct extent_buffer *c) if (btrfs_header_level(next) != level - 1) BUG(); - btrfs_print_tree(next); + btrfs_print_tree(next, follow); free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 4a98481688f4..e6bb38fd75ad 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -7,6 +7,6 @@ #define BTRFS_PRINT_TREE_H void btrfs_print_leaf(struct extent_buffer *l); -void btrfs_print_tree(struct extent_buffer *c); +void btrfs_print_tree(struct extent_buffer *c, bool follow); #endif diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 09c7e4fd550f..9fb758d5077a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/btrfs.h> +#include <linux/sizes.h> #include "ctree.h" #include "transaction.h" @@ -2375,8 +2376,21 @@ out: return ret; } -static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) +/* + * Two limits to commit transaction in advance. + * + * For RATIO, it will be 1/RATIO of the remaining limit + * (excluding data and prealloc meta) as threshold. + * For SIZE, it will be in byte unit as threshold. + */ +#define QGROUP_PERTRANS_RATIO 32 +#define QGROUP_PERTRANS_SIZE SZ_32M +static bool qgroup_check_limits(struct btrfs_fs_info *fs_info, + const struct btrfs_qgroup *qg, u64 num_bytes) { + u64 limit; + u64 threshold; + if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) return false; @@ -2385,6 +2399,31 @@ static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) return false; + /* + * Even if we passed the check, it's better to check if reservation + * for meta_pertrans is pushing us near limit. + * If there is too much pertrans reservation or it's near the limit, + * let's try commit transaction to free some, using transaction_kthread + */ + if ((qg->lim_flags & (BTRFS_QGROUP_LIMIT_MAX_RFER | + BTRFS_QGROUP_LIMIT_MAX_EXCL))) { + if (qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) + limit = qg->max_excl; + else + limit = qg->max_rfer; + threshold = (limit - qg->rsv.values[BTRFS_QGROUP_RSV_DATA] - + qg->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC]) / + QGROUP_PERTRANS_RATIO; + threshold = min_t(u64, threshold, QGROUP_PERTRANS_SIZE); + + /* + * Use transaction_kthread to commit transaction, so we no + * longer need to bother nested transaction nor lock context. + */ + if (qg->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > threshold) + btrfs_commit_transaction_locksafe(fs_info); + } + return true; } @@ -2434,7 +2473,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, qg = unode_aux_to_qgroup(unode); - if (enforce && !qgroup_check_limits(qg, num_bytes)) { + if (enforce && !qgroup_check_limits(fs_info, qg, num_bytes)) { ret = -EDQUOT; goto out; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 63fdcab64b01..c944b4769e3c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2267,6 +2267,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ cur_trans->state = TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); + clear_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); spin_lock(&fs_info->trans_lock); list_del_init(&cur_trans->list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c88fccd80bc5..d8c0826bc2c7 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -199,6 +199,20 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, int wait_for_unblock); + +/* + * Try to commit transaction asynchronously, so this is safe to call + * even holding a spinlock. + * + * It's done by informing transaction_kthread to commit transaction without + * waiting for commit interval. + */ +static inline void btrfs_commit_transaction_locksafe( + struct btrfs_fs_info *fs_info) +{ + set_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags); + wake_up_process(fs_info->transaction_kthread); +} int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_should_end_transaction(struct btrfs_trans_handle *trans); void btrfs_throttle(struct btrfs_fs_info *fs_info); diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index fe5567655662..0e74690d11bc 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -54,7 +54,7 @@ do { \ pr_debug_ ## ratefunc("%s: " \ fmt, __FILE__, ##__VA_ARGS__); \ } else if ((type) & VFS) { \ - pr_err_ ## ratefunc("CuIFS VFS: " \ + pr_err_ ## ratefunc("CIFS VFS: " \ fmt, ##__VA_ARGS__); \ } else if ((type) & NOISY && (NOISY != 0)) { \ pr_debug_ ## ratefunc(fmt, ##__VA_ARGS__); \ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 81ba6e0d88d8..925844343038 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -684,6 +684,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, goto mknod_out; } + if (!S_ISCHR(mode) && !S_ISBLK(mode)) + goto mknod_out; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) goto mknod_out; @@ -692,10 +695,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { - kfree(full_path); rc = -ENOMEM; - free_xid(xid); - return rc; + goto mknod_out; } if (backup_cred(cifs_sb)) @@ -742,7 +743,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, pdev->minor = cpu_to_le64(MINOR(device_number)); rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); - } /* else if (S_ISFIFO) */ + } tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(direntry); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4bcd4e838b47..23fd430fe74a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3462,7 +3462,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) * If the page is mmap'ed into a process' page tables, then we need to make * sure that it doesn't change while being written back. */ -static int +static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b4ae932ea134..38ebf3f357d2 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1452,7 +1452,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct kvec err_iov = {NULL, 0}; - struct smb2_err_rsp *err_buf = NULL; + struct smb2_err_rsp *err_buf; struct smb2_symlink_err_rsp *symlink; unsigned int sub_len; unsigned int sub_offset; @@ -1476,7 +1476,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov); - if (!rc || !err_buf) { + if (!rc || !err_iov.iov_base) { kfree(utf16_path); return -ENOENT; } diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 5008af546dd1..87817ddcc096 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1028,7 +1028,7 @@ static int smbd_post_send(struct smbd_connection *info, for (i = 0; i < request->num_sge; i++) { log_rdma_send(INFO, "rdma_request sge[%d] addr=%llu length=%u\n", - i, request->sge[0].addr, request->sge[0].length); + i, request->sge[i].addr, request->sge[i].length); ib_dma_sync_single_for_device( info->id->device, request->sge[i].addr, @@ -2139,6 +2139,10 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } + cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen); + for (i = 0; i < rqst->rq_nvec-1; i++) + dump_smb(iov[i].iov_base, iov[i].iov_len); + remaining_data_length = buflen; log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d " @@ -2194,6 +2198,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } i++; + if (i == rqst->rq_nvec) + break; } start = i; buflen = 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b12ba70a895..47d7c151fcba 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } diff --git a/fs/proc/base.c b/fs/proc/base.c index eafa39a3a88c..1b2ede6abcdf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1693,6 +1693,12 @@ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t uid; kgid_t gid; + if (unlikely(task->flags & PF_KTHREAD)) { + *ruid = GLOBAL_ROOT_UID; + *rgid = GLOBAL_ROOT_GID; + return; + } + /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index a000d7547479..b572cc865b92 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v) LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - idr_get_cursor(&task_active_pid_ns(current)->idr)); + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 65ae54659833..c486ad4b43f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1310,9 +1310,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); + unsigned long offset = swp_offset(entry); + offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + (offset << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; @@ -1332,6 +1334,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, break; if (pm->show_pfn && (flags & PM_PRESENT)) frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); } spin_unlock(ptl); return err; diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h index 562fa7df2637..98e63d870139 100644 --- a/include/drm/drm_hdcp.h +++ b/include/drm/drm_hdcp.h @@ -19,7 +19,7 @@ #define DRM_HDCP_RI_LEN 2 #define DRM_HDCP_V_PRIME_PART_LEN 4 #define DRM_HDCP_V_PRIME_NUM_PARTS 5 -#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x3f) +#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x7f) #define DRM_HDCP_MAX_CASCADE_EXCEEDED(x) (x & BIT(3)) #define DRM_HDCP_MAX_DEVICE_EXCEEDED(x) (x & BIT(7)) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index bfe86b54f6c1..0bd432a4d7bd 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -223,6 +223,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) set_wb_congested(bdi->wb.congested, sync); } +struct wb_lock_cookie { + bool locked; + unsigned long flags; +}; + #ifdef CONFIG_CGROUP_WRITEBACK /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f6be4b0b6c18..72ca0f3d39f3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -347,7 +347,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode - * @lockedp: temp bool output param, to be passed to the end function + * @cookie: output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't * holding inode->i_lock, the i_pages lock or wb->list_lock. This @@ -355,12 +355,12 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). * - * The caller must call unlocked_inode_to_wb_end() with *@lockdep - * afterwards and can't sleep during transaction. IRQ may or may not be - * disabled on return. + * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and + * can't sleep during the transaction. IRQs may or may not be disabled on + * return. */ static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { rcu_read_lock(); @@ -368,10 +368,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) * Paired with store_release in inode_switch_wb_work_fn() and * ensures that we see the new wb if we see cleared I_WB_SWITCH. */ - *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; - if (unlikely(*lockedp)) - xa_lock_irq(&inode->i_mapping->i_pages); + if (unlikely(cookie->locked)) + xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags); /* * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages @@ -383,12 +383,13 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) /** * unlocked_inode_to_wb_end - end inode wb access transaction * @inode: target inode - * @locked: *@lockedp from unlocked_inode_to_wb_begin() + * @cookie: @cookie from unlocked_inode_to_wb_begin() */ -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { - if (unlikely(locked)) - xa_unlock_irq(&inode->i_mapping->i_pages); + if (unlikely(cookie->locked)) + xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags); rcu_read_unlock(); } @@ -435,12 +436,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) } static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { return inode_to_wb(inode); } -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ee5275e7d4df..38ebbc61ed99 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -33,6 +33,7 @@ struct bpf_map_ops { void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); + void (*map_release_uref)(struct bpf_map *map); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -353,8 +354,8 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, struct bpf_prog *old_prog); int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, - __u32 __user *prog_ids, u32 request_cnt, - __u32 __user *prog_cnt); + u32 *prog_ids, u32 request_cnt, + u32 *prog_cnt); int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, @@ -365,6 +366,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog **_prog, *__prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ + preempt_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ @@ -376,6 +378,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, } \ _out: \ rcu_read_unlock(); \ + preempt_enable_no_resched(); \ _ret; \ }) @@ -448,7 +451,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); -void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ceb96ecab96e..7d98e263e048 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -25,6 +25,9 @@ #define __SANITIZE_ADDRESS__ #endif +#undef __no_sanitize_address +#define __no_sanitize_address __attribute__((no_sanitize("address"))) + /* Clang doesn't have a way to turn it off per-function, yet. */ #ifdef __noretpoline #undef __noretpoline diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index edfeaba95429..a1a959ba24ff 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _LINUX_CORESIGHT_PMU_H diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index ebe41811ed34..b32cd2062f18 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -310,6 +310,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, * fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS * instead of the latter), any change to them will be overwritten * by kernel. Returns a negative error code or zero. + * @get_fecparam: Get the network device Forward Error Correction parameters. + * @set_fecparam: Set the network device Forward Error Correction parameters. * * All operations are optional (i.e. the function pointer may be set * to %NULL) and callers must take this into account. Callers must diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 4cb7aeeafce0..2e55e4cdbd8a 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -21,7 +21,7 @@ struct macvlan_dev { struct hlist_node hlist; struct macvlan_port *port; struct net_device *lowerdev; - void *fwd_priv; + void *accel_priv; struct vlan_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); @@ -61,10 +61,6 @@ extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); -extern void macvlan_count_rx(const struct macvlan_dev *vlan, - unsigned int len, bool success, - bool multicast); - extern void macvlan_dellink(struct net_device *dev, struct list_head *head); extern int macvlan_link_register(struct rtnl_link_ops *ops); @@ -86,4 +82,27 @@ macvlan_dev_real_dev(const struct net_device *dev) } #endif +static inline void *macvlan_accel_priv(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->accel_priv; +} + +static inline bool macvlan_supports_dest_filter(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + return macvlan->mode == MACVLAN_MODE_PRIVATE || + macvlan->mode == MACVLAN_MODE_VEPA || + macvlan->mode == MACVLAN_MODE_BRIDGE; +} + +static inline int macvlan_release_l2fw_offload(struct net_device *dev) +{ + struct macvlan_dev *macvlan = netdev_priv(dev); + + macvlan->accel_priv = NULL; + return dev_uc_add(macvlan->lowerdev, dev->dev_addr); +} #endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h index 29ed8fd6379a..db99240d00bd 100644 --- a/include/linux/net_dim.h +++ b/include/linux/net_dim.h @@ -103,11 +103,12 @@ enum { #define NET_DIM_PARAMS_NUM_PROFILES 5 /* Adaptive moderation profiles */ #define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 #define NET_DIM_DEF_PROFILE_CQE 1 #define NET_DIM_DEF_PROFILE_EQE 1 /* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */ -#define NET_DIM_EQE_PROFILES { \ +#define NET_DIM_RX_EQE_PROFILES { \ {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ @@ -115,7 +116,7 @@ enum { {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ } -#define NET_DIM_CQE_PROFILES { \ +#define NET_DIM_RX_CQE_PROFILES { \ {2, 256}, \ {8, 128}, \ {16, 64}, \ @@ -123,32 +124,68 @@ enum { {64, 64} \ } +#define NET_DIM_TX_EQE_PROFILES { \ + {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ +} + +#define NET_DIM_TX_CQE_PROFILES { \ + {5, 128}, \ + {8, 64}, \ + {16, 32}, \ + {32, 32}, \ + {64, 32} \ +} + static const struct net_dim_cq_moder -profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { - NET_DIM_EQE_PROFILES, - NET_DIM_CQE_PROFILES, +rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_RX_EQE_PROFILES, + NET_DIM_RX_CQE_PROFILES, }; -static inline struct net_dim_cq_moder net_dim_get_profile(u8 cq_period_mode, - int ix) +static const struct net_dim_cq_moder +tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = { + NET_DIM_TX_EQE_PROFILES, + NET_DIM_TX_CQE_PROFILES, +}; + +static inline struct net_dim_cq_moder +net_dim_get_rx_moderation(u8 cq_period_mode, int ix) { - struct net_dim_cq_moder cq_moder; + struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix]; - cq_moder = profile[cq_period_mode][ix]; cq_moder.cq_period_mode = cq_period_mode; return cq_moder; } -static inline struct net_dim_cq_moder net_dim_get_def_profile(u8 rx_cq_period_mode) +static inline struct net_dim_cq_moder +net_dim_get_def_rx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; + + return net_dim_get_rx_moderation(cq_period_mode, profile_ix); +} + +static inline struct net_dim_cq_moder +net_dim_get_tx_moderation(u8 cq_period_mode, int ix) { - int default_profile_ix; + struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix]; + + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; +} - if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE) - default_profile_ix = NET_DIM_DEF_PROFILE_CQE; - else /* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */ - default_profile_ix = NET_DIM_DEF_PROFILE_EQE; +static inline struct net_dim_cq_moder +net_dim_get_def_tx_moderation(u8 cq_period_mode) +{ + u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ? + NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE; - return net_dim_get_profile(rx_cq_period_mode, default_profile_ix); + return net_dim_get_tx_moderation(cq_period_mode, profile_ix); } static inline bool net_dim_on_top(struct net_dim *dim) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 35b79f47a13d..fe2f3b30960e 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -55,8 +55,9 @@ enum { NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ + NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_UDP_BIT, + NETIF_F_GSO_UDP_L4_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -147,6 +148,7 @@ enum { #define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define NETIF_F_RX_UDP_TUNNEL_PORT __NETIF_F(RX_UDP_TUNNEL_PORT) #define NETIF_F_HW_TLS_RECORD __NETIF_F(HW_TLS_RECORD) +#define NETIF_F_GSO_UDP_L4 __NETIF_F(GSO_UDP_L4) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) @@ -216,6 +218,7 @@ enum { NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | \ + NETIF_F_GSO_UDP_L4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 14e0777ffcfb..366c32891158 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4186,6 +4186,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b5b2bc9eacca..e53f9c7c2809 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -159,6 +159,9 @@ struct qed_dcbx_get { enum qed_nvm_images { QED_NVM_IMAGE_ISCSI_CFG, QED_NVM_IMAGE_FCOE_CFG, + QED_NVM_IMAGE_NVM_CFG1, + QED_NVM_IMAGE_DEFAULT_CFG, + QED_NVM_IMAGE_NVM_META, }; struct qed_link_eee_params { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 1f8ad121eb43..4e1f535c2034 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -836,9 +836,8 @@ out: * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, @@ -866,9 +865,8 @@ static inline int rhashtable_insert_fast( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, @@ -890,9 +888,8 @@ static inline int rhltable_insert_key( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, @@ -922,9 +919,8 @@ static inline int rhltable_insert( * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ static inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, @@ -981,9 +977,8 @@ static inline void *rhashtable_lookup_get_insert_fast( * * Lookups may occur in parallel with hashtable mutations and resizing. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. * * Returns zero on success. */ @@ -1134,8 +1129,8 @@ static inline int __rhashtable_remove_fast( * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30%. * * Returns zero on success, -ENOENT if the entry could not be found. */ @@ -1156,8 +1151,8 @@ static inline int rhashtable_remove_fast( * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30% * * Returns zero on success, -ENOENT if the entry could not be found. */ @@ -1273,8 +1268,9 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d274059529eb..a4a5c0c5cba8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -573,6 +573,8 @@ enum { SKB_GSO_ESP = 1 << 15, SKB_GSO_UDP = 1 << 16, + + SKB_GSO_UDP_L4 = 1 << 17, }; #if BITS_PER_LONG > 32 diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 34f053a150a9..cf2862bd134a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -43,11 +43,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) -#else -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) -#endif +#define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) /* * flag set/clear/test wrappers diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h index af4114d5dc17..3616b4becb59 100644 --- a/include/linux/timekeeping32.h +++ b/include/linux/timekeeping32.h @@ -9,9 +9,6 @@ extern void do_gettimeofday(struct timeval *tv); unsigned long get_seconds(void); -/* does not take xtime_lock */ -struct timespec __current_kernel_time(void); - static inline struct timespec current_kernel_time(void) { struct timespec64 now = current_kernel_time64(); diff --git a/include/linux/timer.h b/include/linux/timer.h index 2448f9cc48a3..7b066fd38248 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -8,8 +8,6 @@ #include <linux/debugobjects.h> #include <linux/stringify.h> -struct tvec_base; - struct timer_list { /* * All fields that change during normal runtime grouped to the diff --git a/include/linux/udp.h b/include/linux/udp.h index eaea63bc79bb..ca840345571b 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -55,6 +55,7 @@ struct udp_sock { * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ + __u16 gso_size; /* * Fields specific to UDP-Lite. */ @@ -87,6 +88,8 @@ struct udp_sock { int forward_deficit; }; +#define UDP_MAX_SEGMENTS (1 << 6UL) + static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e5cfcfc7dd93..b473df5b9512 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -75,7 +75,8 @@ struct fib_rules_ops { int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, - struct nlattr **); + struct nlattr **, + struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, diff --git a/include/net/ife.h b/include/net/ife.h index 44b9c00f7223..e117617e3c34 100644 --- a/include/net/ife.h +++ b/include/net/ife.h @@ -12,7 +12,8 @@ void *ife_encode(struct sk_buff *skb, u16 metalen); void *ife_decode(struct sk_buff *skb, u16 *metalen); -void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen); +void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype, + u16 *dlen, u16 *totlen); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 0a671c32d6b9..83d5b3c2ac42 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -147,6 +147,7 @@ struct inet_cork { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; struct inet_cork_full { diff --git a/include/net/ip.h b/include/net/ip.h index dc4a2d6e58a5..bada1f1f871e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags); + struct inet_cork *cork, unsigned int flags); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 68b167d98879..0a872a7c33c8 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,6 +298,7 @@ struct ipcm6_cookie { __s16 tclass; __s8 dontfrag; struct ipv6_txoptions *opt; + __u16 gso_size; }; static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) @@ -950,6 +951,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, + struct inet_cork_full *cork, const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index 5c40f118c0fa..df528a623548 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -97,6 +97,7 @@ static __inline__ char llc_backlog_type(struct sk_buff *skb) struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); +void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e421f86af043..6c1eecd56a4d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } - +static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags = 0; + + if (!(flags & NEIGH_UPDATE_F_ADMIN)) + return; + + ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + if (ndm_flags & NTF_EXT_LEARNED) + neigh->flags |= NTF_EXT_LEARNED; + else + neigh->flags &= ~NTF_EXT_LEARNED; + *notify = 1; + } +} #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 97b3a54579c8..c978a31b0f84 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_cnt; int max_dst_opts_len; int max_hbh_opts_len; + int seg6_flowlabel; }; struct netns_ipv6 { diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2d0e782c9055..f4b657478a30 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a0ec462bc1a9..05594b248e52 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -2091,10 +2091,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, enum sctp_transport_cmd command, sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); -struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, - struct net *, - const union sctp_addr *, - const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); int sctp_assoc_update(struct sctp_association *old, struct sctp_association *new); diff --git a/include/net/udp.h b/include/net/udp.h index 0676b272f6ac..05990746810e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -174,6 +174,10 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); +struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features, + unsigned int mss, __sum16 check); + static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { struct udphdr *uh; @@ -269,6 +273,7 @@ int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); +int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 3dd68029d77a..c1a5284713b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -10,6 +10,7 @@ #include <linux/tracepoint.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <linux/sock_diag.h> #define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ do { \ @@ -113,7 +114,7 @@ DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset, */ DECLARE_EVENT_CLASS(tcp_event_sk, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk), @@ -125,6 +126,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) __array(__u8, daddr_v6, 16) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -144,24 +146,34 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); + + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, - __entry->saddr_v6, __entry->daddr_v6) + __entry->saddr_v6, __entry->daddr_v6, + __entry->sock_cookie) ); DEFINE_EVENT(tcp_event_sk, tcp_receive_reset, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock, - TP_PROTO(const struct sock *sk), + TP_PROTO(struct sock *sk), + + TP_ARGS(sk) +); + +DEFINE_EVENT(tcp_event_sk, tcp_rcv_space_adjust, + + TP_PROTO(struct sock *sk), TP_ARGS(sk) ); @@ -232,6 +244,7 @@ TRACE_EVENT(tcp_probe, __field(__u32, snd_wnd) __field(__u32, srtt) __field(__u32, rcv_wnd) + __field(__u64, sock_cookie) ), TP_fast_assign( @@ -256,15 +269,14 @@ TRACE_EVENT(tcp_probe, __entry->rcv_wnd = tp->rcv_wnd; __entry->ssthresh = tcp_current_ssthresh(sk); __entry->srtt = tp->srtt_us >> 3; + __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x " - "snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u " - "rcv_wnd=%u", + TP_printk("src=%pISpc dest=%pISpc mark=%#x length=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", __entry->saddr, __entry->daddr, __entry->mark, __entry->length, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, - __entry->srtt, __entry->rcv_wnd) + __entry->srtt, __entry->rcv_wnd, __entry->sock_cookie) ); #endif /* _TRACE_TCP_H */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 912b85b52344..b8e288a1f740 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -650,11 +650,23 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* - * Indicates that the content of PERF_SAMPLE_IP points to - * the actual instruction that triggered the event. See also - * perf_event_attr::precise_ip. + * These PERF_RECORD_MISC_* flags below are safely reused + * for the following events: + * + * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * + * + * PERF_RECORD_MISC_EXACT_IP: + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + * + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: + * Indicates that thread was preempted in TASK_RUNNING state. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) +#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index c34f4490d025..26ee91300e3e 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -35,6 +35,9 @@ /* Clear the entropy pool and associated counters. (Superuser only.) */ #define RNDCLEARPOOL _IO( 'R', 0x06 ) +/* Reseed CRNG. (Superuser only.) */ +#define RNDRESEEDCRNG _IO( 'R', 0x07 ) + struct rand_pool_info { int entropy_count; int buf_size; diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index efb7b5991c2f..09d00f8c442b 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -32,6 +32,7 @@ struct udphdr { #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ +#define UDP_SEGMENT 103 /* Set GSO segmentation size */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 02a189339381..0fd8d8f1a398 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -526,7 +526,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr) } /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_fd_array_map_clear(struct bpf_map *map) +static void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -545,6 +545,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, + .map_release_uref = bpf_fd_array_map_clear, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d315b393abdd..ba03ec39efb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) return cnt; } +static bool bpf_prog_array_copy_core(struct bpf_prog **prog, + u32 *prog_ids, + u32 request_cnt) +{ + int i = 0; + + for (; *prog; prog++) { + if (*prog == &dummy_bpf_prog.prog) + continue; + prog_ids[i] = (*prog)->aux->id; + if (++i == request_cnt) { + prog++; + break; + } + } + + return !!(*prog); +} + int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt) { struct bpf_prog **prog; unsigned long err = 0; - u32 i = 0, *ids; bool nospc; + u32 *ids; /* users of this function are doing: * cnt = bpf_prog_array_length(); @@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return -ENOMEM; rcu_read_lock(); prog = rcu_dereference(progs)->progs; - for (; *prog; prog++) { - if (*prog == &dummy_bpf_prog.prog) - continue; - ids[i] = (*prog)->aux->id; - if (++i == cnt) { - prog++; - break; - } - } - nospc = !!(*prog); + nospc = bpf_prog_array_copy_core(prog, ids, cnt); rcu_read_unlock(); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); @@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, } int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, - __u32 __user *prog_ids, u32 request_cnt, - __u32 __user *prog_cnt) + u32 *prog_ids, u32 request_cnt, + u32 *prog_cnt) { + struct bpf_prog **prog; u32 cnt = 0; if (array) cnt = bpf_prog_array_length(array); - if (copy_to_user(prog_cnt, &cnt, sizeof(cnt))) - return -EFAULT; + *prog_cnt = cnt; /* return early if user requested only program count or nothing to copy */ if (!request_cnt || !cnt) return 0; - return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt); + /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ + prog = rcu_dereference_check(array, 1)->progs; + return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC + : 0; } static void bpf_prog_free_deferred(struct work_struct *work) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 8dd9210d7db7..634415c7fbcd 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -43,6 +43,7 @@ #include <net/tcp.h> #include <linux/ptr_ring.h> #include <net/inet_common.h> +#include <linux/sched/signal.h> #define SOCK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -523,8 +524,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, i = md->sg_start; do { - r->sg_data[i] = md->sg_data[i]; - size = (apply && apply_bytes < md->sg_data[i].length) ? apply_bytes : md->sg_data[i].length; @@ -535,6 +534,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, } sk_mem_charge(sk, size); + r->sg_data[i] = md->sg_data[i]; r->sg_data[i].length = size; md->sg_data[i].length -= size; md->sg_data[i].offset += size; @@ -732,6 +732,26 @@ out_err: return err; } +static int bpf_wait_data(struct sock *sk, + struct smap_psock *psk, int flags, + long timeo, int *err) +{ + int rc; + + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + rc = sk_wait_event(sk, &timeo, + !list_empty(&psk->ingress) || + !skb_queue_empty(&sk->sk_receive_queue), + &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + + return rc; +} + static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -755,6 +775,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); +bytes_ready: while (copied != len) { struct scatterlist *sg; struct sk_msg_buff *md; @@ -809,6 +830,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } + if (!copied) { + long timeo; + int data; + int err = 0; + + timeo = sock_rcvtimeo(sk, nonblock); + data = bpf_wait_data(sk, psock, flags, timeo, &err); + + if (data) { + if (!skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + smap_release_sock(psock, sk); + copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return copied; + } + goto bytes_ready; + } + + if (err) + copied = err; + } + release_sock(sk); smap_release_sock(psock, sk); return copied; @@ -1442,9 +1485,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - if (attr->value_size > KMALLOC_MAX_SIZE) - return ERR_PTR(-E2BIG); - err = bpf_tcp_ulp_register(); if (err && err != -EEXIST) return ERR_PTR(err); @@ -1834,7 +1874,7 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } -static void sock_map_release(struct bpf_map *map, struct file *map_file) +static void sock_map_release(struct bpf_map *map) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_prog *orig; @@ -1858,7 +1898,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, - .map_release = sock_map_release, + .map_release_uref = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7bb4ff1c770a..0bd2944eafb9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -260,8 +260,8 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { if (atomic_dec_and_test(&map->usercnt)) { - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) - bpf_fd_array_map_clear(map); + if (map->ops->map_release_uref) + map->ops->map_release_uref(map); } } diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 772a43fea825..c187aa3df3c8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -119,23 +119,20 @@ int get_callchain_buffers(int event_max_stack) goto exit; } - if (count > 1) { - /* If the allocation failed, give up */ - if (!callchain_cpus_entries) - err = -ENOMEM; - /* - * If requesting per event more than the global cap, - * return a different error to help userspace figure - * this out. - * - * And also do it here so that we have &callchain_mutex held. - */ - if (event_max_stack > sysctl_perf_event_max_stack) - err = -EOVERFLOW; + /* + * If requesting per event more than the global cap, + * return a different error to help userspace figure + * this out. + * + * And also do it here so that we have &callchain_mutex held. + */ + if (event_max_stack > sysctl_perf_event_max_stack) { + err = -EOVERFLOW; goto exit; } - err = alloc_callchain_buffers(); + if (count == 1) + err = alloc_callchain_buffers(); exit: if (err) atomic_dec(&nr_callchain_events); diff --git a/kernel/events/core.c b/kernel/events/core.c index 2d5fe26551f8..67612ce359ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7587,6 +7587,10 @@ static void perf_event_switch(struct task_struct *task, }, }; + if (!sched_in && task->state == TASK_RUNNING) + switch_event.event_id.header.misc |= + PERF_RECORD_MISC_SWITCH_OUT_PREEMPT; + perf_iterate_sb(perf_event_switch_output, &switch_event, NULL); @@ -10205,9 +10209,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, * __u16 sample size limit. */ if (attr->sample_stack_user >= USHRT_MAX) - ret = -EINVAL; + return -EINVAL; else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) - ret = -EINVAL; + return -EINVAL; } if (!attr->sample_max_stack) diff --git a/kernel/fork.c b/kernel/fork.c index 242c8c93d285..a5d21c42acfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -216,10 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; -#ifdef CONFIG_DEBUG_KMEMLEAK /* Clear stale pointers from reused stack. */ memset(s->addr, 0, THREAD_SIZE); -#endif + tsk->stack_vm_area = s; return s->addr; } diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2541bd89f20e..5a6251ac6f7a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1205,10 +1205,12 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, u64 *newval, u64 *oldval) { u64 now; + int ret; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); + ret = cpu_timer_sample_group(clock_idx, tsk, &now); - if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) { + if (oldval && ret != -EINVAL) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index c1f518e7aa80..6fe615d57ebb 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -82,16 +82,15 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || !tick_device_is_functional(dev)) { - printk(KERN_INFO "Clockevents: " - "could not switch to one-shot mode:"); + pr_info("Clockevents: could not switch to one-shot mode:"); if (!dev) { - printk(" no tick device\n"); + pr_cont(" no tick device\n"); } else { if (!tick_device_is_functional(dev)) - printk(" %s is not functional.\n", dev->name); + pr_cont(" %s is not functional.\n", dev->name); else - printk(" %s does not support one-shot mode.\n", - dev->name); + pr_cont(" %s does not support one-shot mode.\n", + dev->name); } return -EINVAL; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ca90219a1e73..dcf7f20fcd12 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2139,13 +2139,6 @@ unsigned long get_seconds(void) } EXPORT_SYMBOL(get_seconds); -struct timespec __current_kernel_time(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - - return timespec64_to_timespec(tk_xtime(tk)); -} - struct timespec64 current_kernel_time64(void) { struct timekeeper *tk = &tk_core.timekeeper; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d88e96d4e12c..56ba0f2a01db 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) { struct perf_event_query_bpf __user *uquery = info; struct perf_event_query_bpf query = {}; + u32 *ids, prog_cnt, ids_len; int ret; if (!capable(CAP_SYS_ADMIN)) @@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; - if (query.ids_len > BPF_TRACE_MAX_PROGS) + + ids_len = query.ids_len; + if (ids_len > BPF_TRACE_MAX_PROGS) return -E2BIG; + ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); + if (!ids) + return -ENOMEM; + /* + * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which + * is required when user only wants to check for uquery->prog_cnt. + * There is no need to check for it since the case is handled + * gracefully in bpf_prog_array_copy_info. + */ mutex_lock(&bpf_event_mutex); ret = bpf_prog_array_copy_info(event->tp_event->prog_array, - uquery->ids, - query.ids_len, - &uquery->prog_cnt); + ids, + ids_len, + &prog_cnt); mutex_unlock(&bpf_event_mutex); + if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || + copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) + ret = -EFAULT; + + kfree(ids); return ret; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1cd3fb4d70f8..02aed76e0978 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -512,8 +512,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk) if (ret == 0) tk->tp.flags |= TP_FLAG_REGISTERED; else { - pr_warn("Could not insert probe at %s+%lu: %d\n", - trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret); if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) { pr_warn("This probe might be able to register after target module is loaded. Continue.\n"); ret = 0; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 2b2b79974b61..9427b5766134 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ @@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; + bool rhlist = ht->rhlist; rcu_read_lock(); @@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl && !iter->end_of_table) { + if (iter->end_of_table) + return 0; + if (!iter->walker.tbl) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); + iter->slot = 0; + iter->skip = 0; return -EAGAIN; } + if (iter->p && !rhlist) { + /* + * We need to validate that 'p' is still in the table, and + * if so, update 'skip' + */ + struct rhash_head *p; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + skip++; + if (p == iter->p) { + iter->skip = skip; + goto found; + } + } + iter->p = NULL; + } else if (iter->p && rhlist) { + /* Need to validate that 'list' is still in the table, and + * if so, update 'skip' and 'p'. + */ + struct rhash_head *p; + struct rhlist_head *list; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + for (list = container_of(p, struct rhlist_head, rhead); + list; + list = rcu_dereference(list->next)) { + skip++; + if (list == iter->list) { + iter->p = p; + skip = skip; + goto found; + } + } + } + iter->p = NULL; + } +found: return 0; } EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); @@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) iter->walker.tbl = NULL; spin_unlock(&ht->lock); - iter->p = NULL; - out: rcu_read_unlock(); } diff --git a/mm/filemap.c b/mm/filemap.c index 9276bdb2343c..0604cb02e6f3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -786,7 +786,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) VM_BUG_ON_PAGE(!PageLocked(new), new); VM_BUG_ON_PAGE(new->mapping, new); - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK); if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); @@ -842,7 +842,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK); if (error) { if (!huge) mem_cgroup_cancel_charge(page, memcg, false); @@ -1585,8 +1585,7 @@ no_page: if (fgp_flags & FGP_ACCESSED) __SetPageReferenced(page); - err = add_to_page_cache_lru(page, mapping, offset, - gfp_mask & GFP_RECLAIM_MASK); + err = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (unlikely(err)) { put_page(page); page = NULL; @@ -2387,7 +2386,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) if (!page) return -ENOMEM; - ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL); + ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 14ed6ee5e02f..a3a1815f8e11 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2925,7 +2925,10 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) pmde = maybe_pmd_mkwrite(pmde, vma); flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); - page_add_anon_rmap(new, vma, mmun_start, true); + if (PageAnon(new)) + page_add_anon_rmap(new, vma, mmun_start, true); + else + page_add_file_rmap(new, true); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); if (vma->vm_flags & VM_LOCKED) mlock_vma_page(new); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e074f7c637aa..2bd3df3d101a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2192,7 +2192,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, { struct memcg_kmem_cache_create_work *cw; - cw = kmalloc(sizeof(*cw), GFP_NOWAIT); + cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN); if (!cw) return; diff --git a/mm/migrate.c b/mm/migrate.c index f65dd69e1fd1..568433023831 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -472,7 +472,7 @@ int migrate_page_move_mapping(struct address_space *mapping, pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page)); - expected_count += 1 + page_has_private(page); + expected_count += hpage_nr_pages(page) + page_has_private(page); if (page_count(page) != expected_count || radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) { @@ -505,7 +505,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - get_page(newpage); /* add cache reference */ + page_ref_add(newpage, hpage_nr_pages(page)); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -524,13 +524,26 @@ int migrate_page_move_mapping(struct address_space *mapping, } radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); + if (PageTransHuge(page)) { + int i; + int index = page_index(page); + + for (i = 0; i < HPAGE_PMD_NR; i++) { + pslot = radix_tree_lookup_slot(&mapping->i_pages, + index + i); + radix_tree_replace_slot(&mapping->i_pages, pslot, + newpage + i); + } + } else { + radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); + } /* * Drop cache reference from old page by unfreezing * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - 1); + page_ref_unfreeze(page, expected_count - hpage_nr_pages(page)); xa_unlock(&mapping->i_pages); /* Leave irq disabled to prevent preemption while updating stats */ @@ -1622,6 +1635,9 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, current_node = NUMA_NO_NODE; } out_flush: + if (list_empty(&pagelist)) + return err; + /* Make sure we do not overwrite the existing error */ err1 = do_move_pages_to_node(mm, &pagelist, current_node); if (!err1) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5c1a3279e63f..337c6afb3345 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2502,13 +2502,13 @@ void account_page_redirty(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); current->nr_dirtied--; dec_node_page_state(page, NR_DIRTIED); dec_wb_stat(wb, WB_DIRTIED); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); } } EXPORT_SYMBOL(account_page_redirty); @@ -2614,15 +2614,15 @@ void __cancel_dirty_page(struct page *page) if (mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; lock_page_memcg(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) account_page_cleaned(page, mapping, wb); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); unlock_page_memcg(page); } else { ClearPageDirty(page); @@ -2654,7 +2654,7 @@ int clear_page_dirty_for_io(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; /* * Yes, Virginia, this is indeed insane. @@ -2691,14 +2691,14 @@ int clear_page_dirty_for_io(struct page *page) * always locked coming in here, so we get the desired * exclusion. */ - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) { dec_lruvec_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); return ret; } return TestClearPageDirty(page); diff --git a/mm/rmap.c b/mm/rmap.c index f0dd4e4565bc..8d5337fed37b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1374,9 +1374,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (!pvmw.pte && (flags & TTU_MIGRATION)) { VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); - if (!PageAnon(page)) - continue; - set_pmd_migration_entry(&pvmw, page); continue; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 032e0fe45940..28a4c3490359 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1825,13 +1825,14 @@ static int compat_table_info(const struct ebt_table_info *info, { unsigned int size = info->entries_size; const void *entries = info->entries; - int ret; newinfo->entries_size = size; - - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); - if (ret) - return ret; + if (info->nentries) { + int ret = xt_compat_init_offsets(NFPROTO_BRIDGE, + info->nentries); + if (ret) + return ret; + } return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 03416e6dd5d7..4650fd6d678c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -92,6 +92,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", + [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 33958f84c173..126ffc5bc630 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -387,247 +387,304 @@ unsigned int fib_rules_seq_read(struct net *net, int family) } EXPORT_SYMBOL_GPL(fib_rules_seq_read); -static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, - struct fib_rules_ops *ops) -{ - int err = -EINVAL; - - if (frh->src_len) - if (tb[FRA_SRC] == NULL || - frh->src_len > (ops->addr_size * 8) || - nla_len(tb[FRA_SRC]) != ops->addr_size) - goto errout; - - if (frh->dst_len) - if (tb[FRA_DST] == NULL || - frh->dst_len > (ops->addr_size * 8) || - nla_len(tb[FRA_DST]) != ops->addr_size) - goto errout; - - err = 0; -errout: - return err; -} - -static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, - struct nlattr **tb, struct fib_rule *rule) +static struct fib_rule *rule_find(struct fib_rules_ops *ops, + struct fib_rule_hdr *frh, + struct nlattr **tb, + struct fib_rule *rule, + bool user_priority) { struct fib_rule *r; list_for_each_entry(r, &ops->rules_list, list) { - if (r->action != rule->action) + if (rule->action && r->action != rule->action) continue; - if (r->table != rule->table) + if (rule->table && r->table != rule->table) continue; - if (r->pref != rule->pref) + if (user_priority && r->pref != rule->pref) continue; - if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) + if (rule->iifname[0] && + memcmp(r->iifname, rule->iifname, IFNAMSIZ)) continue; - if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) + if (rule->oifname[0] && + memcmp(r->oifname, rule->oifname, IFNAMSIZ)) continue; - if (r->mark != rule->mark) + if (rule->mark && r->mark != rule->mark) continue; - if (r->mark_mask != rule->mark_mask) + if (rule->mark_mask && r->mark_mask != rule->mark_mask) continue; - if (r->tun_id != rule->tun_id) + if (rule->tun_id && r->tun_id != rule->tun_id) continue; if (r->fr_net != rule->fr_net) continue; - if (r->l3mdev != rule->l3mdev) + if (rule->l3mdev && r->l3mdev != rule->l3mdev) continue; - if (!uid_eq(r->uid_range.start, rule->uid_range.start) || - !uid_eq(r->uid_range.end, rule->uid_range.end)) + if (uid_range_set(&rule->uid_range) && + (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end))) continue; - if (r->ip_proto != rule->ip_proto) + if (rule->ip_proto && r->ip_proto != rule->ip_proto) continue; - if (!fib_rule_port_range_compare(&r->sport_range, + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) continue; - if (!fib_rule_port_range_compare(&r->dport_range, + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_range_compare(&r->dport_range, &rule->dport_range)) continue; if (!ops->compare(r, frh, tb)) continue; - return 1; + return r; + } + + return NULL; +} + +#ifdef CONFIG_NET_L3_MASTER_DEV +static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, + struct netlink_ext_ack *extack) +{ + nlrule->l3mdev = nla_get_u8(nla); + if (nlrule->l3mdev != 1) { + NL_SET_ERR_MSG(extack, "Invalid l3mdev attribute"); + return -1; } + return 0; } +#else +static int fib_nl2rule_l3mdev(struct nlattr *nla, struct fib_rule *nlrule, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "l3mdev support is not enabled in kernel"); + return -1; +} +#endif -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, + struct fib_rules_ops *ops, + struct nlattr *tb[], + struct fib_rule **rule, + bool *user_priority) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *r, *last = NULL; - struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL, unresolved = 0; - - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) - goto errout; + struct fib_rule *nlrule = NULL; + int err = -EINVAL; - ops = lookup_rules_ops(net, frh->family); - if (ops == NULL) { - err = -EAFNOSUPPORT; - goto errout; + if (frh->src_len) + if (!tb[FRA_SRC] || + frh->src_len > (ops->addr_size * 8) || + nla_len(tb[FRA_SRC]) != ops->addr_size) { + NL_SET_ERR_MSG(extack, "Invalid source address"); + goto errout; } - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); - if (err < 0) - goto errout; - - err = validate_rulemsg(frh, tb, ops); - if (err < 0) - goto errout; + if (frh->dst_len) + if (!tb[FRA_DST] || + frh->dst_len > (ops->addr_size * 8) || + nla_len(tb[FRA_DST]) != ops->addr_size) { + NL_SET_ERR_MSG(extack, "Invalid dst address"); + goto errout; + } - rule = kzalloc(ops->rule_size, GFP_KERNEL); - if (rule == NULL) { + nlrule = kzalloc(ops->rule_size, GFP_KERNEL); + if (!nlrule) { err = -ENOMEM; goto errout; } - refcount_set(&rule->refcnt, 1); - rule->fr_net = net; + refcount_set(&nlrule->refcnt, 1); + nlrule->fr_net = net; - rule->pref = tb[FRA_PRIORITY] ? nla_get_u32(tb[FRA_PRIORITY]) - : fib_default_rule_pref(ops); + if (tb[FRA_PRIORITY]) { + nlrule->pref = nla_get_u32(tb[FRA_PRIORITY]); + *user_priority = true; + } else { + nlrule->pref = fib_default_rule_pref(ops); + } - rule->proto = tb[FRA_PROTOCOL] ? + nlrule->proto = tb[FRA_PROTOCOL] ? nla_get_u8(tb[FRA_PROTOCOL]) : RTPROT_UNSPEC; if (tb[FRA_IIFNAME]) { struct net_device *dev; - rule->iifindex = -1; - nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->iifname); + nlrule->iifindex = -1; + nla_strlcpy(nlrule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, nlrule->iifname); if (dev) - rule->iifindex = dev->ifindex; + nlrule->iifindex = dev->ifindex; } if (tb[FRA_OIFNAME]) { struct net_device *dev; - rule->oifindex = -1; - nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->oifname); + nlrule->oifindex = -1; + nla_strlcpy(nlrule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, nlrule->oifname); if (dev) - rule->oifindex = dev->ifindex; + nlrule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { - rule->mark = nla_get_u32(tb[FRA_FWMARK]); - if (rule->mark) + nlrule->mark = nla_get_u32(tb[FRA_FWMARK]); + if (nlrule->mark) /* compatibility: if the mark value is non-zero all bits * are compared unless a mask is explicitly specified. */ - rule->mark_mask = 0xFFFFFFFF; + nlrule->mark_mask = 0xFFFFFFFF; } if (tb[FRA_FWMASK]) - rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); + nlrule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); if (tb[FRA_TUN_ID]) - rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); + nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); err = -EINVAL; - if (tb[FRA_L3MDEV]) { -#ifdef CONFIG_NET_L3_MASTER_DEV - rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]); - if (rule->l3mdev != 1) -#endif - goto errout_free; - } + if (tb[FRA_L3MDEV] && + fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0) + goto errout_free; - rule->action = frh->action; - rule->flags = frh->flags; - rule->table = frh_get_table(frh, tb); + nlrule->action = frh->action; + nlrule->flags = frh->flags; + nlrule->table = frh_get_table(frh, tb); if (tb[FRA_SUPPRESS_PREFIXLEN]) - rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); + nlrule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]); else - rule->suppress_prefixlen = -1; + nlrule->suppress_prefixlen = -1; if (tb[FRA_SUPPRESS_IFGROUP]) - rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); + nlrule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]); else - rule->suppress_ifgroup = -1; + nlrule->suppress_ifgroup = -1; if (tb[FRA_GOTO]) { - if (rule->action != FR_ACT_GOTO) + if (nlrule->action != FR_ACT_GOTO) { + NL_SET_ERR_MSG(extack, "Unexpected goto"); goto errout_free; + } - rule->target = nla_get_u32(tb[FRA_GOTO]); + nlrule->target = nla_get_u32(tb[FRA_GOTO]); /* Backward jumps are prohibited to avoid endless loops */ - if (rule->target <= rule->pref) + if (nlrule->target <= nlrule->pref) { + NL_SET_ERR_MSG(extack, "Backward goto not supported"); goto errout_free; - - list_for_each_entry(r, &ops->rules_list, list) { - if (r->pref == rule->target) { - RCU_INIT_POINTER(rule->ctarget, r); - break; - } } - - if (rcu_dereference_protected(rule->ctarget, 1) == NULL) - unresolved = 1; - } else if (rule->action == FR_ACT_GOTO) + } else if (nlrule->action == FR_ACT_GOTO) { + NL_SET_ERR_MSG(extack, "Missing goto target for action goto"); goto errout_free; + } - if (rule->l3mdev && rule->table) + if (nlrule->l3mdev && nlrule->table) { + NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive"); goto errout_free; + } if (tb[FRA_UID_RANGE]) { if (current_user_ns() != net->user_ns) { err = -EPERM; + NL_SET_ERR_MSG(extack, "No permission to set uid"); goto errout_free; } - rule->uid_range = nla_get_kuid_range(tb); + nlrule->uid_range = nla_get_kuid_range(tb); - if (!uid_range_set(&rule->uid_range) || - !uid_lte(rule->uid_range.start, rule->uid_range.end)) + if (!uid_range_set(&nlrule->uid_range) || + !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) { + NL_SET_ERR_MSG(extack, "Invalid uid range"); goto errout_free; + } } else { - rule->uid_range = fib_kuid_range_unset; + nlrule->uid_range = fib_kuid_range_unset; } if (tb[FRA_IP_PROTO]) - rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); + nlrule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); if (tb[FRA_SPORT_RANGE]) { err = nla_get_port_range(tb[FRA_SPORT_RANGE], - &rule->sport_range); - if (err) + &nlrule->sport_range); + if (err) { + NL_SET_ERR_MSG(extack, "Invalid sport range"); goto errout_free; + } } if (tb[FRA_DPORT_RANGE]) { err = nla_get_port_range(tb[FRA_DPORT_RANGE], - &rule->dport_range); - if (err) + &nlrule->dport_range); + if (err) { + NL_SET_ERR_MSG(extack, "Invalid dport range"); goto errout_free; + } } + *rule = nlrule; + + return 0; + +errout_free: + kfree(nlrule); +errout: + return err; +} + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule = NULL, *r, *last = NULL; + struct nlattr *tb[FRA_MAX + 1]; + int err = -EINVAL, unresolved = 0; + bool user_priority = false; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + NL_SET_ERR_MSG(extack, "Invalid msg length"); + goto errout; + } + + ops = lookup_rules_ops(net, frh->family); + if (!ops) { + err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Rule family not supported"); + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); + if (err < 0) { + NL_SET_ERR_MSG(extack, "Error parsing msg"); + goto errout; + } + + err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority); + if (err) + goto errout; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && - rule_exists(ops, frh, tb, rule)) { + rule_find(ops, frh, tb, rule, user_priority)) { err = -EEXIST; goto errout_free; } - err = ops->configure(rule, skb, frh, tb); + err = ops->configure(rule, skb, frh, tb, extack); if (err < 0) goto errout_free; @@ -637,6 +694,16 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout_free; list_for_each_entry(r, &ops->rules_list, list) { + if (r->pref == rule->target) { + RCU_INIT_POINTER(rule->ctarget, r); + break; + } + } + + if (rcu_dereference_protected(rule->ctarget, 1) == NULL) + unresolved = 1; + + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; last = r; @@ -690,171 +757,97 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rule_port_range sprange = {0, 0}; - struct fib_rule_port_range dprange = {0, 0}; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *r; + struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - struct fib_kuid_range range; int err = -EINVAL; + bool user_priority = false; - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { + NL_SET_ERR_MSG(extack, "Invalid msg length"); goto errout; + } ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Rule family not supported"); goto errout; } err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack); - if (err < 0) + if (err < 0) { + NL_SET_ERR_MSG(extack, "Error parsing msg"); goto errout; + } - err = validate_rulemsg(frh, tb, ops); - if (err < 0) + err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority); + if (err) goto errout; - if (tb[FRA_UID_RANGE]) { - range = nla_get_kuid_range(tb); - if (!uid_range_set(&range)) { - err = -EINVAL; - goto errout; - } - } else { - range = fib_kuid_range_unset; + rule = rule_find(ops, frh, tb, nlrule, user_priority); + if (!rule) { + err = -ENOENT; + goto errout; } - if (tb[FRA_SPORT_RANGE]) { - err = nla_get_port_range(tb[FRA_SPORT_RANGE], - &sprange); - if (err) - goto errout; + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; } - if (tb[FRA_DPORT_RANGE]) { - err = nla_get_port_range(tb[FRA_DPORT_RANGE], - &dprange); + if (ops->delete) { + err = ops->delete(rule); if (err) goto errout; } - list_for_each_entry(rule, &ops->rules_list, list) { - if (tb[FRA_PROTOCOL] && - (rule->proto != nla_get_u8(tb[FRA_PROTOCOL]))) - continue; - - if (frh->action && (frh->action != rule->action)) - continue; - - if (frh_get_table(frh, tb) && - (frh_get_table(frh, tb) != rule->table)) - continue; - - if (tb[FRA_PRIORITY] && - (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) - continue; - - if (tb[FRA_IIFNAME] && - nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) - continue; - - if (tb[FRA_OIFNAME] && - nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) - continue; - - if (tb[FRA_FWMARK] && - (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) - continue; - - if (tb[FRA_FWMASK] && - (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) - continue; - - if (tb[FRA_TUN_ID] && - (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) - continue; - - if (tb[FRA_L3MDEV] && - (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) - continue; - - if (uid_range_set(&range) && - (!uid_eq(rule->uid_range.start, range.start) || - !uid_eq(rule->uid_range.end, range.end))) - continue; - - if (tb[FRA_IP_PROTO] && - (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO]))) - continue; - - if (fib_rule_port_range_set(&sprange) && - !fib_rule_port_range_compare(&rule->sport_range, &sprange)) - continue; - - if (fib_rule_port_range_set(&dprange) && - !fib_rule_port_range_compare(&rule->dport_range, &dprange)) - continue; - - if (!ops->compare(rule, frh, tb)) - continue; - - if (rule->flags & FIB_RULE_PERMANENT) { - err = -EPERM; - goto errout; - } - - if (ops->delete) { - err = ops->delete(rule); - if (err) - goto errout; - } + if (rule->tun_id) + ip_tunnel_unneed_metadata(); - if (rule->tun_id) - ip_tunnel_unneed_metadata(); + list_del_rcu(&rule->list); - list_del_rcu(&rule->list); - - if (rule->action == FR_ACT_GOTO) { - ops->nr_goto_rules--; - if (rtnl_dereference(rule->ctarget) == NULL) - ops->unresolved_rules--; - } + if (rule->action == FR_ACT_GOTO) { + ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } - /* - * Check if this rule is a target to any of them. If so, - * adjust to the next one with the same preference or - * disable them. As this operation is eventually very - * expensive, it is only performed if goto rules, except - * current if it is goto rule, have actually been added. - */ - if (ops->nr_goto_rules > 0) { - struct fib_rule *n; - - n = list_next_entry(rule, list); - if (&n->list == &ops->rules_list || n->pref != rule->pref) - n = NULL; - list_for_each_entry(r, &ops->rules_list, list) { - if (rtnl_dereference(r->ctarget) != rule) - continue; - rcu_assign_pointer(r->ctarget, n); - if (!n) - ops->unresolved_rules++; - } + /* + * Check if this rule is a target to any of them. If so, + * adjust to the next one with the same preference or + * disable them. As this operation is eventually very + * expensive, it is only performed if goto rules, except + * current if it is goto rule, have actually been added. + */ + if (ops->nr_goto_rules > 0) { + struct fib_rule *n; + + n = list_next_entry(rule, list); + if (&n->list == &ops->rules_list || n->pref != rule->pref) + n = NULL; + list_for_each_entry(r, &ops->rules_list, list) { + if (rtnl_dereference(r->ctarget) != rule) + continue; + rcu_assign_pointer(r->ctarget, n); + if (!n) + ops->unresolved_rules++; } - - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); - fib_rule_put(rule); - flush_route_cache(ops); - rules_ops_put(ops); - return 0; } - err = -ENOENT; + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, + NULL); + notify_rule_change(RTM_DELRULE, rule, ops, nlh, + NETLINK_CB(skb).portid); + fib_rule_put(rule); + flush_route_cache(ops); + rules_ops_put(ops); + kfree(nlrule); + return 0; + errout: + if (nlrule) + kfree(nlrule); rules_ops_put(ops); return err; } diff --git a/net/core/filter.c b/net/core/filter.c index 8e45c6c7ab08..d3781daa26ab 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3281,6 +3281,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, skb_dst_set(skb, (struct dst_entry *) md); info = &md->u.tun_info; + memset(info, 0, sizeof(*info)); info->mode = IP_TUNNEL_INFO_TX; info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ce519861be59..5afae29367c1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -820,7 +820,8 @@ static void neigh_periodic_work(struct work_struct *work) write_lock(&n->lock); state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || + (n->flags & NTF_EXT_LEARNED)) { write_unlock(&n->lock); goto next_elt; } @@ -1136,6 +1137,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (neigh->dead) goto out; + neigh_update_ext_learned(neigh, flags, ¬ify); + if (!(new & NUD_VALID)) { neigh_del_timer(neigh); if (old & NUD_CONNECTED) @@ -1781,6 +1784,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, flags &= ~NEIGH_UPDATE_F_OVERRIDE; } + if (ndm->ndm_flags & NTF_EXT_LEARNED) + flags |= NEIGH_UPDATE_F_EXT_LEARNED; + if (ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ff49e352deea..c647cfe114e0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4940,6 +4940,8 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) thlen = tcp_hdrlen(skb); } else if (unlikely(skb_is_gso_sctp(skb))) { thlen = sizeof(struct sctphdr); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + thlen = sizeof(struct udphdr); } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index c795c3f509c9..72236695db3d 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -121,13 +121,16 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { int err = -EINVAL; struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (frh->tos) + if (frh->tos) { + NL_SET_ERR_MSG(extack, "Invalid tos value"); goto errout; + } if (rule->table == RT_TABLE_UNSPEC) { if (rule->action == FR_ACT_TO_TBL) { diff --git a/net/ife/ife.c b/net/ife/ife.c index 7d1ec76e7f43..13bbf8cb6a39 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -69,6 +69,9 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen) int total_pull; u16 ifehdrln; + if (!pskb_may_pull(skb, skb->dev->hard_header_len + IFE_METAHDRLEN)) + return NULL; + ifehdr = (struct ifeheadr *) (skb->data + skb->dev->hard_header_len); ifehdrln = ntohs(ifehdr->metalen); total_pull = skb->dev->hard_header_len + ifehdrln; @@ -92,12 +95,43 @@ struct meta_tlvhdr { __be16 len; }; +static bool __ife_tlv_meta_valid(const unsigned char *skbdata, + const unsigned char *ifehdr_end) +{ + const struct meta_tlvhdr *tlv; + u16 tlvlen; + + if (unlikely(skbdata + sizeof(*tlv) > ifehdr_end)) + return false; + + tlv = (const struct meta_tlvhdr *)skbdata; + tlvlen = ntohs(tlv->len); + + /* tlv length field is inc header, check on minimum */ + if (tlvlen < NLA_HDRLEN) + return false; + + /* overflow by NLA_ALIGN check */ + if (NLA_ALIGN(tlvlen) < tlvlen) + return false; + + if (unlikely(skbdata + NLA_ALIGN(tlvlen) > ifehdr_end)) + return false; + + return true; +} + /* Caller takes care of presenting data in network order */ -void *ife_tlv_meta_decode(void *skbdata, u16 *attrtype, u16 *dlen, u16 *totlen) +void *ife_tlv_meta_decode(void *skbdata, const void *ifehdr_end, u16 *attrtype, + u16 *dlen, u16 *totlen) { - struct meta_tlvhdr *tlv = (struct meta_tlvhdr *) skbdata; + struct meta_tlvhdr *tlv; + + if (!__ife_tlv_meta_valid(skbdata, ifehdr_end)) + return NULL; + tlv = (struct meta_tlvhdr *)skbdata; *dlen = ntohs(tlv->len) - NLA_HDRLEN; *attrtype = ntohs(tlv->type); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 737d11bc8838..f8eb78d042a4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (frh->tos & ~IPTOS_TOS_MASK) + if (frh->tos & ~IPTOS_TOS_MASK) { + NL_SET_ERR_MSG(extack, "Invalid tos"); goto errout; + } /* split local/main if they are not already split */ err = fib_unmerge(net); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 83c73bab2c3d..f2338e40c37d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk, struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; + bool paged; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; + paged = !!cork->gso_size; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -906,7 +909,7 @@ static int __ip_append_data(struct sock *sk, if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && !exthdrlen) csummode = CHECKSUM_PARTIAL; @@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; @@ -953,8 +957,12 @@ alloc_new_skb: if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else + else if (!paged) alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += exthdrlen; @@ -998,7 +1006,7 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen + exthdrlen); + data = skb_put(skb, fraglen + exthdrlen - pagedlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); @@ -1014,7 +1022,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); @@ -1022,7 +1030,7 @@ alloc_new_skb: } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -1135,6 +1143,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, *rtp = NULL; cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : rt->dst.dev->mtu; + + cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; @@ -1214,7 +1224,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EOPNOTSUPP; hh_len = LL_RESERVED_SPACE(rt->dst.dev); - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -1470,9 +1480,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) + struct inet_cork *cork, unsigned int flags) { - struct inet_cork cork; struct sk_buff_head queue; int err; @@ -1481,22 +1490,22 @@ struct sk_buff *ip_make_skb(struct sock *sk, __skb_queue_head_init(&queue); - cork.flags = 0; - cork.addr = 0; - cork.opt = NULL; - err = ip_setup_cork(sk, &cork, ipc, rtp); + cork->flags = 0; + cork->addr = 0; + cork->opt = NULL; + err = ip_setup_cork(sk, cork, ipc, rtp); if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, + err = __ip_append_data(sk, fl4, &queue, cork, ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { - __ip_flush_pending_frames(sk, &queue, &cork); + __ip_flush_pending_frames(sk, &queue, cork); return ERR_PTR(err); } - return __ip_make_skb(sk, fl4, &queue, &cork); + return __ip_make_skb(sk, fl4, &queue, cork); } /* diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 43f620feb1c4..d839d74853fc 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -28,6 +28,9 @@ * * Multiple Nameservers in /proc/net/pnp * -- Josef Siemes <jsiemes@web.de>, Aug 2002 + * + * NTP servers in /proc/net/ipconfig/ntp_servers + * -- Chris Novakovic <chris@chrisn.me.uk>, April 2018 */ #include <linux/types.h> @@ -93,6 +96,7 @@ #define CONF_TIMEOUT_MAX (HZ*30) /* Maximum allowed timeout */ #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers - '3' from resolv.h */ +#define CONF_NTP_SERVERS_MAX 3 /* Maximum number of NTP servers */ #define NONE cpu_to_be32(INADDR_NONE) #define ANY cpu_to_be32(INADDR_ANY) @@ -152,12 +156,16 @@ static int ic_proto_used; /* Protocol used, if any */ #define ic_proto_used 0 #endif static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ +static __be32 ic_ntp_servers[CONF_NTP_SERVERS_MAX]; /* NTP server IP addresses */ static u8 ic_domain[64]; /* DNS (not NIS) domain name */ /* * Private state. */ +/* proc_dir_entry for /proc/net/ipconfig */ +static struct proc_dir_entry *ipconfig_dir; + /* Name of user-selected boot device */ static char user_dev_name[IFNAMSIZ] __initdata = { 0, }; @@ -576,6 +584,15 @@ static inline void __init ic_nameservers_predef(void) ic_nameservers[i] = NONE; } +/* Predefine NTP servers */ +static inline void __init ic_ntp_servers_predef(void) +{ + int i; + + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) + ic_ntp_servers[i] = NONE; +} + /* * DHCP/BOOTP support. */ @@ -671,6 +688,7 @@ ic_dhcp_init_options(u8 *options, struct ic_device *d) 17, /* Boot path */ 26, /* MTU */ 40, /* NIS domain name */ + 42, /* NTP servers */ }; *e++ = 55; /* Parameter request list */ @@ -721,9 +739,11 @@ static void __init ic_bootp_init_ext(u8 *e) *e++ = 3; /* Default gateway request */ *e++ = 4; e += 4; - *e++ = 5; /* Name server request */ - *e++ = 8; - e += 8; +#if CONF_NAMESERVERS_MAX > 0 + *e++ = 6; /* (DNS) name server request */ + *e++ = 4 * CONF_NAMESERVERS_MAX; + e += 4 * CONF_NAMESERVERS_MAX; +#endif *e++ = 12; /* Host name request */ *e++ = 32; e += 32; @@ -748,7 +768,13 @@ static void __init ic_bootp_init_ext(u8 *e) */ static inline void __init ic_bootp_init(void) { + /* Re-initialise all name servers and NTP servers to NONE, in case any + * were set via the "ip=" or "nfsaddrs=" kernel command line parameters: + * any IP addresses specified there will already have been decoded but + * are no longer needed + */ ic_nameservers_predef(); + ic_ntp_servers_predef(); dev_add_pack(&bootp_packet_type); } @@ -912,6 +938,15 @@ static void __init ic_do_bootp_ext(u8 *ext) ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); break; + case 42: /* NTP servers */ + servers = *ext / 4; + if (servers > CONF_NTP_SERVERS_MAX) + servers = CONF_NTP_SERVERS_MAX; + for (i = 0; i < servers; i++) { + if (ic_ntp_servers[i] == NONE) + memcpy(&ic_ntp_servers[i], ext+1+4*i, 4); + } + break; } } @@ -1258,6 +1293,7 @@ static int __init ic_dynamic(void) #ifdef CONFIG_PROC_FS +/* Name servers: */ static int pnp_seq_show(struct seq_file *seq, void *v) { int i; @@ -1294,6 +1330,62 @@ static const struct file_operations pnp_seq_fops = { .llseek = seq_lseek, .release = single_release, }; + +/* Create the /proc/net/ipconfig directory */ +static int __init ipconfig_proc_net_init(void) +{ + ipconfig_dir = proc_net_mkdir(&init_net, "ipconfig", init_net.proc_net); + if (!ipconfig_dir) + return -ENOMEM; + + return 0; +} + +/* Create a new file under /proc/net/ipconfig */ +static int ipconfig_proc_net_create(const char *name, + const struct file_operations *fops) +{ + char *pname; + struct proc_dir_entry *p; + + if (!ipconfig_dir) + return -ENOMEM; + + pname = kasprintf(GFP_KERNEL, "%s%s", "ipconfig/", name); + if (!pname) + return -ENOMEM; + + p = proc_create(pname, 0444, init_net.proc_net, fops); + kfree(pname); + if (!p) + return -ENOMEM; + + return 0; +} + +/* Write NTP server IP addresses to /proc/net/ipconfig/ntp_servers */ +static int ntp_servers_seq_show(struct seq_file *seq, void *v) +{ + int i; + + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + if (ic_ntp_servers[i] != NONE) + seq_printf(seq, "%pI4\n", &ic_ntp_servers[i]); + } + return 0; +} + +static int ntp_servers_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, ntp_servers_seq_show, NULL); +} + +static const struct file_operations ntp_servers_seq_fops = { + .open = ntp_servers_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ /* @@ -1368,8 +1460,20 @@ static int __init ip_auto_config(void) int err; unsigned int i; + /* Initialise all name servers and NTP servers to NONE (but only if the + * "ip=" or "nfsaddrs=" kernel command line parameters weren't decoded, + * otherwise we'll overwrite the IP addresses specified there) + */ + if (ic_set_manually == 0) { + ic_nameservers_predef(); + ic_ntp_servers_predef(); + } + #ifdef CONFIG_PROC_FS proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops); + + if (ipconfig_proc_net_init() == 0) + ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) @@ -1481,16 +1585,32 @@ static int __init ip_auto_config(void) &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) pr_cont(", mtu=%d", ic_dev_mtu); - for (i = 0; i < CONF_NAMESERVERS_MAX; i++) + /* Name servers (if any): */ + for (i = 0; i < CONF_NAMESERVERS_MAX; i++) { if (ic_nameservers[i] != NONE) { - pr_cont(" nameserver%u=%pI4", - i, &ic_nameservers[i]); - break; + if (i == 0) + pr_info(" nameserver%u=%pI4", + i, &ic_nameservers[i]); + else + pr_cont(", nameserver%u=%pI4", + i, &ic_nameservers[i]); } - for (i++; i < CONF_NAMESERVERS_MAX; i++) - if (ic_nameservers[i] != NONE) - pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]); - pr_cont("\n"); + if (i + 1 == CONF_NAMESERVERS_MAX) + pr_cont("\n"); + } + /* NTP servers (if any): */ + for (i = 0; i < CONF_NTP_SERVERS_MAX; i++) { + if (ic_ntp_servers[i] != NONE) { + if (i == 0) + pr_info(" ntpserver%u=%pI4", + i, &ic_ntp_servers[i]); + else + pr_cont(", ntpserver%u=%pI4", + i, &ic_ntp_servers[i]); + } + if (i + 1 == CONF_NTP_SERVERS_MAX) + pr_cont("\n"); + } #endif /* !SILENT */ /* @@ -1588,7 +1708,9 @@ static int __init ip_auto_config_setup(char *addrs) return 1; } + /* Initialise all name servers and NTP servers to NONE */ ic_nameservers_predef(); + ic_ntp_servers_predef(); /* Parse string for static IP assignment. */ ip = addrs; @@ -1647,6 +1769,13 @@ static int __init ip_auto_config_setup(char *addrs) ic_nameservers[1] = NONE; } break; + case 9: + if (CONF_NTP_SERVERS_MAX >= 1) { + ic_ntp_servers[0] = in_aton(ip); + if (ic_ntp_servers[0] == ANY) + ic_ntp_servers[0] = NONE; + } + break; } } ip = cp; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2fb4de3f7f66..38e092eafc97 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -201,7 +201,8 @@ static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = { }; static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, struct nlattr **tb) + struct fib_rule_hdr *frh, struct nlattr **tb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0396fb919b5d..8acbe5fd2098 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -582,6 +582,8 @@ void tcp_rcv_space_adjust(struct sock *sk) u32 copied; int time; + trace_tcp_rcv_space_adjust(sk); + tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) @@ -3887,11 +3889,8 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); - /* If the TCP option is too short, we can short cut */ - if (length < TCPOLEN_MD5SIG) - return NULL; - - while (length > 0) { + /* If not enough data remaining, we can short cut */ + while (length >= TCPOLEN_MD5SIG) { int opcode = *ptr++; int opsize; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383cac0ff0ec..95feffb6d53f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -585,14 +585,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; + *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - remaining -= TCPOLEN_MD5SIG_ALIGNED; + if (unlikely(rcu_access_pointer(tp->md5sig_info))) { + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + } } -#else - *md5 = NULL; #endif /* We always get an MSS option. The option bytes which will be seen in @@ -720,14 +721,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->options = 0; + *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (unlikely(*md5)) { - opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + if (unlikely(rcu_access_pointer(tp->md5sig_info))) { + *md5 = tp->af_specific->md5_lookup(sk, sk); + if (*md5) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } } -#else - *md5 = NULL; #endif if (likely(tp->rx_opt.tstamp_ok)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 24b5c59b1c53..794aeafeb782 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb, } EXPORT_SYMBOL(udp_set_csum); -static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) +static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); @@ -777,6 +778,21 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + } + if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); @@ -828,7 +844,7 @@ int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &inet->cork.base); out: up->len = 0; @@ -837,6 +853,43 @@ out: } EXPORT_SYMBOL(udp_push_pending_frames); +static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) +{ + switch (cmsg->cmsg_type) { + case UDP_SEGMENT: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u16))) + return -EINVAL; + *gso_size = *(__u16 *)CMSG_DATA(cmsg); + return 0; + default: + return -EINVAL; + } +} + +int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) +{ + struct cmsghdr *cmsg; + bool need_ip = false; + int err; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_UDP) { + need_ip = true; + continue; + } + + err = __udp_cmsg_send(cmsg, gso_size); + if (err) + return err; + } + + return need_ip; +} +EXPORT_SYMBOL_GPL(udp_cmsg_send); + int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); @@ -922,10 +975,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.sockc.tsflags = sk->sk_tsflags; ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; + ipc.gso_size = up->gso_size; if (msg->msg_controllen) { - err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); - if (unlikely(err)) { + err = udp_cmsg_send(sk, msg, &ipc.gso_size); + if (err > 0) + err = ip_cmsg_send(sk, msg, &ipc, + sk->sk_family == AF_INET6); + if (unlikely(err < 0)) { kfree(ipc.opt); return err; } @@ -1030,12 +1087,14 @@ back_from_confirm: /* Lockless fast path for the non-corking case. */ if (!corkreq) { + struct inet_cork cork; + skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, - msg->msg_flags); + &cork, msg->msg_flags); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_send_skb(skb, fl4); + err = udp_send_skb(skb, fl4, &cork); goto out; } @@ -2365,6 +2424,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->no_check6_rx = valbool; break; + case UDP_SEGMENT: + if (val < 0 || val > USHRT_MAX) + return -EINVAL; + up->gso_size = val; + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -2455,6 +2520,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->no_check6_rx; break; + case UDP_SEGMENT: + val = up->gso_size; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ea6e6e7df0ee..dc5158cba66e 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,6 +187,68 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features, + unsigned int mss, __sum16 check) +{ + struct sock *sk = gso_skb->sk; + unsigned int sum_truesize = 0; + struct sk_buff *segs, *seg; + unsigned int hdrlen; + struct udphdr *uh; + + if (gso_skb->len <= sizeof(*uh) + mss) + return ERR_PTR(-EINVAL); + + hdrlen = gso_skb->data - skb_mac_header(gso_skb); + skb_pull(gso_skb, sizeof(*uh)); + + /* clear destructor to avoid skb_segment assigning it to tail */ + WARN_ON_ONCE(gso_skb->destructor != sock_wfree); + gso_skb->destructor = NULL; + + segs = skb_segment(gso_skb, features); + if (unlikely(IS_ERR_OR_NULL(segs))) { + gso_skb->destructor = sock_wfree; + return segs; + } + + for (seg = segs; seg; seg = seg->next) { + uh = udp_hdr(seg); + uh->len = htons(seg->len - hdrlen); + uh->check = check; + + /* last packet can be partial gso_size */ + if (!seg->next) + csum_replace2(&uh->check, htons(mss), + htons(seg->len - hdrlen - sizeof(*uh))); + + seg->destructor = sock_wfree; + seg->sk = sk; + sum_truesize += seg->truesize; + } + + refcount_add(sum_truesize - gso_skb->truesize, &sk->sk_wmem_alloc); + + return segs; +} +EXPORT_SYMBOL_GPL(__udp_gso_segment); + +static struct sk_buff *__udp4_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features) +{ + const struct iphdr *iph = ip_hdr(gso_skb); + unsigned int mss = skb_shinfo(gso_skb)->gso_size; + + if (!can_checksum_protocol(features, htons(ETH_P_IP))) + return ERR_PTR(-EIO); + + return __udp_gso_segment(gso_skb, features, mss, + udp_v4_check(sizeof(struct udphdr) + mss, + iph->saddr, iph->daddr, 0)); +} +EXPORT_SYMBOL_GPL(__udp4_gso_segment); + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -203,12 +265,15 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4))) goto out; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp4_gso_segment(skb, features); + mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7b4d7bbf2c17..fbfd71a2d9c8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3622,8 +3622,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; - int _keep_addr; - bool keep_addr; + bool keep_addr = false; int state, i; ASSERT_RTNL(); @@ -3649,15 +3648,18 @@ static int addrconf_ifdown(struct net_device *dev, int how) } - /* aggregate the system setting and interface setting */ - _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; - if (!_keep_addr) - _keep_addr = idev->cnf.keep_addr_on_down; - /* combine the user config with event to determine if permanent * addresses are to be removed from address hash table */ - keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6); + if (!how && !idev->cnf.disable_ipv6) { + /* aggregate the system setting and interface setting */ + int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down; + + if (!_keep_addr) + _keep_addr = idev->cnf.keep_addr_on_down; + + keep_addr = (_keep_addr > 0); + } /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { @@ -3707,11 +3709,6 @@ restart: write_lock_bh(&idev->lock); } - /* re-combine the user config with event to determine if permanent - * addresses are to be removed from the interface list - */ - keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); - list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct fib6_info *rt = NULL; bool keep; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index df113c7b5fc8..6547fc6491a6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -245,15 +245,18 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = { static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { int err = -EINVAL; struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { - if (rule->table == RT6_TABLE_UNSPEC) + if (rule->table == RT6_TABLE_UNSPEC) { + NL_SET_ERR_MSG(extack, "Invalid table"); goto errout; + } if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4a87f9428ca5..5b3f2f89ef41 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -88,9 +88,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6)) - udpfrag = proto == IPPROTO_UDP && encap; + udpfrag = proto == IPPROTO_UDP && encap && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); else - udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP); ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6a477d54f8c7..dfd8af41824e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1240,6 +1240,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (mtu < IPV6_MIN_MTU) return -EINVAL; cork->base.fragsize = mtu; + cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0; + if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; @@ -1274,6 +1276,7 @@ static int __ip6_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; + bool paged; skb = skb_peek_tail(queue); if (!skb) { @@ -1281,7 +1284,8 @@ static int __ip6_append_data(struct sock *sk, dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } - mtu = cork->fragsize; + paged = !!cork->gso_size; + mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1329,7 +1333,7 @@ emsgsize: if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && - !(flags & MSG_MORE) && + (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; @@ -1372,6 +1376,7 @@ emsgsize: unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; alloc_new_skb: /* There's no room in the current skb */ if (skb) @@ -1394,11 +1399,17 @@ alloc_new_skb: if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else - alloclen = datalen + fragheaderlen; + else if (!paged) + alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += dst_exthdrlen; @@ -1420,7 +1431,7 @@ alloc_new_skb: */ alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; @@ -1459,7 +1470,7 @@ alloc_new_skb: /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + @@ -1482,7 +1493,7 @@ alloc_new_skb: } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; @@ -1755,9 +1766,9 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, + struct inet_cork_full *cork, const struct sockcm_cookie *sockc) { - struct inet_cork_full cork; struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); @@ -1768,27 +1779,27 @@ struct sk_buff *ip6_make_skb(struct sock *sk, __skb_queue_head_init(&queue); - cork.base.flags = 0; - cork.base.addr = 0; - cork.base.opt = NULL; - cork.base.dst = NULL; + cork->base.flags = 0; + cork->base.addr = 0; + cork->base.opt = NULL; + cork->base.dst = NULL; v6_cork.opt = NULL; - err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); + err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6); if (err) { - ip6_cork_release(&cork, &v6_cork); + ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; - err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork, + err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6, sockc); if (err) { - __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork); + __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); } - return __ip6_make_skb(sk, &queue, &cork, &v6_cork); + return __ip6_make_skb(sk, &queue, cork, &v6_cork); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 298fd8b6ed17..20a419ee8000 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -180,7 +180,8 @@ static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = { }; static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct fib_rule_hdr *frh, struct nlattr **tb) + struct fib_rule_hdr *frh, struct nlattr **tb, + struct netlink_ext_ack *extack) { return 0; } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index ccbfa83e4bb0..ce77bcc2490c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -48,6 +48,34 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. +if NF_NAT_IPV6 + +config NFT_CHAIN_NAT_IPV6 + tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NFT_MASQ_IPV6 + tristate "IPv6 masquerade support for nf_tables" + depends on NFT_MASQ + select NF_NAT_MASQUERADE_IPV6 + help + This is the expression that provides IPv4 masquerading support for + nf_tables. + +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NFT_REDIR + select NF_NAT_REDIRECT + help + This is the expression that provides IPv4 redirect support for + nf_tables. + +endif # NF_NAT_IPV6 + config NFT_REJECT_IPV6 select NF_REJECT_IPV6 default NFT_REJECT @@ -107,39 +135,12 @@ config NF_NAT_IPV6 if NF_NAT_IPV6 -config NFT_CHAIN_NAT_IPV6 - depends on NF_TABLES_IPV6 - tristate "IPv6 nf_tables nat chain support" - help - This option enables the "nat" chain for IPv6 in nf_tables. This - chain type is used to perform Network Address Translation (NAT) - packet transformations such as the source, destination address and - source and destination ports. - config NF_NAT_MASQUERADE_IPV6 tristate "IPv6 masquerade support" help This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. -config NFT_MASQ_IPV6 - tristate "IPv6 masquerade support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_MASQ - select NF_NAT_MASQUERADE_IPV6 - help - This is the expression that provides IPv4 masquerading support for - nf_tables. - -config NFT_REDIR_IPV6 - tristate "IPv6 redirect support for nf_tables" - depends on NF_TABLES_IPV6 - depends on NFT_REDIR - select NF_NAT_REDIRECT - help - This is the expression that provides IPv4 redirect support for - nf_tables. - endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0407bbc5a028..7ee0a34fba46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1541,11 +1541,12 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + struct fib6_info *from; int err; + from = rcu_dereference(rt->from); if (!from || !(rt->rt6i_flags & RTF_CACHE)) return -EINVAL; @@ -2201,10 +2202,12 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { + rcu_read_lock(); if (rt6_check_expired(rt)) { rt6_remove_exception_rt(rt); dst = NULL; } + rcu_read_unlock(); } else { dst_release(dst); dst = NULL; @@ -2221,6 +2224,7 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { + rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) rt6_remove_exception_rt(rt); @@ -2228,15 +2232,14 @@ static void ip6_link_failure(struct sk_buff *skb) struct fib6_info *from; struct fib6_node *fn; - rcu_read_lock(); from = rcu_dereference(rt->from); if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) fn->fn_sernum = -1; } - rcu_read_unlock(); } + rcu_read_unlock(); } } @@ -3338,8 +3341,10 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_lock(); from = rcu_dereference(rt->from); - nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); + fib6_info_hold(from); rcu_read_unlock(); + + nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); if (!nrt) goto out; @@ -3353,7 +3358,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu * a cached route because rt6_insert_exception() will * takes care of it */ - if (rt6_insert_exception(nrt, rt->from)) { + if (rt6_insert_exception(nrt, from)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3365,6 +3370,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); out: + fib6_info_release(from); neigh_release(neigh); } @@ -4047,6 +4053,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, + [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, [RTA_OIF] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, @@ -4058,6 +4065,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_EXPIRES] = { .type = NLA_U32 }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_TABLE] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index f343e6f0fc95..9898926ce30d 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -91,6 +91,24 @@ static void set_tun_src(struct net *net, struct net_device *dev, rcu_read_unlock(); } +/* Compute flowlabel for outer IPv6 header */ +static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, + struct ipv6hdr *inner_hdr) +{ + int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; + __be32 flowlabel = 0; + u32 hash; + + if (do_flowlabel > 0) { + hash = skb_get_hash(skb); + rol32(hash, 16); + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; + } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { + flowlabel = ip6_flowlabel(inner_hdr); + } + return flowlabel; +} + /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { @@ -99,6 +117,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; + __be32 flowlabel; hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); @@ -119,12 +138,13 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) * decapsulation will overwrite inner hlim with outer hlim */ + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); if (skb->protocol == htons(ETH_P_IPV6)) { ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), - ip6_flowlabel(inner_hdr)); + flowlabel); hdr->hop_limit = inner_hdr->hop_limit; } else { - ip6_flow_hdr(hdr, 0, 0); + ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); } @@ -136,7 +156,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6fbdef630152..e15cd37024fd 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -152,6 +152,13 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "seg6_flowlabel", + .data = &init_net.ipv6.sysctl.seg6_flowlabel, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -217,6 +224,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, + ipv6_table[15].data = &net->ipv6.sysctl.seg6_flowlabel; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4ec76a87aeb8..6acfdd3e442b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1023,7 +1023,8 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * Sending */ -static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) +static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, + struct inet_cork *cork) { struct sock *sk = skb->sk; struct udphdr *uh; @@ -1042,6 +1043,21 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) uh->len = htons(len); uh->check = 0; + if (cork->gso_size) { + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + + if (hlen + cork->gso_size > cork->fragsize) + return -EINVAL; + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + return -EIO; + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; + } + if (is_udplite) csum = udplite_csum(skb); else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */ @@ -1093,7 +1109,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &inet_sk(sk)->cork.base); out: up->len = 0; @@ -1127,6 +1143,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + ipc6.gso_size = up->gso_size; sockc.tsflags = sk->sk_tsflags; /* destination address check */ @@ -1259,7 +1276,10 @@ do_udp_sendmsg: opt->tot_len = sizeof(*opt); ipc6.opt = opt; - err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc); + err = udp_cmsg_send(sk, msg, &ipc6.gso_size); + if (err > 0) + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, + &ipc6, &sockc); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1324,15 +1344,16 @@ back_from_confirm: /* Lockless fast path for the non-corking case */ if (!corkreq) { + struct inet_cork_full cork; struct sk_buff *skb; skb = ip6_make_skb(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, &fl6, (struct rt6_info *)dst, - msg->msg_flags, &sockc); + msg->msg_flags, &cork, &sockc); err = PTR_ERR(skb); if (!IS_ERR_OR_NULL(skb)) - err = udp_v6_send_skb(skb, &fl6); + err = udp_v6_send_skb(skb, &fl6, &cork.base); goto out; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 2a04dc9c781b..f7b85b1e6b3e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,6 +17,20 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" +static struct sk_buff *__udp6_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(gso_skb); + unsigned int mss = skb_shinfo(gso_skb)->gso_size; + + if (!can_checksum_protocol(features, htons(ETH_P_IPV6))) + return ERR_PTR(-EIO); + + return __udp_gso_segment(gso_skb, features, mss, + udp_v6_check(sizeof(struct udphdr) + mss, + &ip6h->saddr, &ip6h->daddr, 0)); +} + static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -42,12 +56,15 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_UDP | SKB_GSO_UDP_L4))) goto out; if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + return __udp6_gso_segment(skb, features); + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot * do checksum of UDP packets sent as multiple IP fragments. */ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8f9d45bfeb1..7f1e842ef05a 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -106,8 +106,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of l2tp_dfs_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7d0c963680e6..1fd9e145076a 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -619,6 +619,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); error = -EINVAL; + + if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) && + sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6)) + goto end; + if (sp->sa_protocol != PX_PROTO_OL2TP) goto end; @@ -1618,8 +1625,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v) return; /* Drop reference taken by last invocation of pppol2tp_next_tunnel() */ - if (pd->tunnel) + if (pd->tunnel) { l2tp_tunnel_dec_refcount(pd->tunnel); + pd->tunnel = NULL; + pd->session = NULL; + } } static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 6d29b2b94e84..cb80ebb38311 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -189,7 +189,6 @@ static int llc_ui_release(struct socket *sock) { struct sock *sk = sock->sk; struct llc_sock *llc; - struct llc_sap *sap; if (unlikely(sk == NULL)) goto out; @@ -200,15 +199,19 @@ static int llc_ui_release(struct socket *sock) llc->laddr.lsap, llc->daddr.lsap); if (!llc_send_disc(sk)) llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo); - sap = llc->sap; - /* Hold this for release_sock(), so that llc_backlog_rcv() could still - * use it. - */ - llc_sap_hold(sap); - if (!sock_flag(sk, SOCK_ZAPPED)) + if (!sock_flag(sk, SOCK_ZAPPED)) { + struct llc_sap *sap = llc->sap; + + /* Hold this for release_sock(), so that llc_backlog_rcv() + * could still use it. + */ + llc_sap_hold(sap); llc_sap_remove_socket(llc->sap, sk); - release_sock(sk); - llc_sap_put(sap); + release_sock(sk); + llc_sap_put(sap); + } else { + release_sock(sk); + } if (llc->dev) dev_put(llc->dev); sock_put(sk); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 163121192aca..4d78375f9872 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1099,14 +1099,7 @@ int llc_conn_ac_inc_tx_win_size(struct sock *sk, struct sk_buff *skb) int llc_conn_ac_stop_all_timers(struct sock *sk, struct sk_buff *skb) { - struct llc_sock *llc = llc_sk(sk); - - del_timer(&llc->pf_cycle_timer.timer); - del_timer(&llc->ack_timer.timer); - del_timer(&llc->rej_sent_timer.timer); - del_timer(&llc->busy_state_timer.timer); - llc->ack_must_be_send = 0; - llc->ack_pf = 0; + llc_sk_stop_all_timers(sk, false); return 0; } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 110e32bcb399..c0ac522b48a1 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -961,6 +961,26 @@ out: return sk; } +void llc_sk_stop_all_timers(struct sock *sk, bool sync) +{ + struct llc_sock *llc = llc_sk(sk); + + if (sync) { + del_timer_sync(&llc->pf_cycle_timer.timer); + del_timer_sync(&llc->ack_timer.timer); + del_timer_sync(&llc->rej_sent_timer.timer); + del_timer_sync(&llc->busy_state_timer.timer); + } else { + del_timer(&llc->pf_cycle_timer.timer); + del_timer(&llc->ack_timer.timer); + del_timer(&llc->rej_sent_timer.timer); + del_timer(&llc->busy_state_timer.timer); + } + + llc->ack_must_be_send = 0; + llc->ack_pf = 0; +} + /** * llc_sk_free - Frees a LLC socket * @sk - socket to free @@ -973,7 +993,7 @@ void llc_sk_free(struct sock *sk) llc->state = LLC_CONN_OUT_OF_SVC; /* Stop all (possibly) running timers */ - llc_conn_ac_stop_all_timers(sk, NULL); + llc_sk_stop_all_timers(sk, true); #ifdef DEBUG_LLC_CONN_ALLOC printk(KERN_INFO "%s: unackq=%d, txq=%d\n", __func__, skb_queue_len(&llc->pdu_unack_q), diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 704b3832dbad..44d8a55e9721 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -594,6 +594,7 @@ config NFT_QUOTA config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" + depends on !NF_TABLES_INET || (IPV6!=m || m) help This option adds the "reject" expression that you can use to explicitly deny and notify via TCP reset/ICMP informational errors diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5ebde4b15810..f36098887ad0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2384,11 +2384,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) strlcpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)); cfg.syncid = dm->syncid; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &cfg, dm->state); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); } else { mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(ipvs, dm->state); @@ -3481,12 +3477,8 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) if (ipvs->mixed_address_family_dests > 0) return -EINVAL; - rtnl_lock(); - mutex_lock(&ipvs->sync_mutex); ret = start_sync_thread(ipvs, &c, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); - mutex_unlock(&ipvs->sync_mutex); - rtnl_unlock(); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fbaf3bd05b2e..001501e25625 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -49,6 +49,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/kernel.h> +#include <linux/sched/signal.h> #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ @@ -1360,15 +1361,9 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* * Specifiy default interface for outgoing multicasts */ -static int set_mcast_if(struct sock *sk, char *ifname) +static int set_mcast_if(struct sock *sk, struct net_device *dev) { - struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1396,19 +1391,14 @@ static int set_mcast_if(struct sock *sk, char *ifname) * in the in_addr structure passed in as a parameter. */ static int -join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) +join_mcast_group(struct sock *sk, struct in_addr *addr, struct net_device *dev) { - struct net *net = sock_net(sk); struct ip_mreqn mreq; - struct net_device *dev; int ret; memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1423,15 +1413,10 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) #ifdef CONFIG_IP_VS_IPV6 static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, - char *ifname) + struct net_device *dev) { - struct net *net = sock_net(sk); - struct net_device *dev; int ret; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1443,24 +1428,18 @@ static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, } #endif -static int bind_mcastif_addr(struct socket *sock, char *ifname) +static int bind_mcastif_addr(struct socket *sock, struct net_device *dev) { - struct net *net = sock_net(sock->sk); - struct net_device *dev; __be32 addr; struct sockaddr_in sin; - dev = __dev_get_by_name(net, ifname); - if (!dev) - return -ENODEV; - addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); if (!addr) pr_err("You probably need to specify IP address on " "multicast interface.\n"); IP_VS_DBG(7, "binding socket with (%s) %pI4\n", - ifname, &addr); + dev->name, &addr); /* Now bind the socket with the address of multicast interface */ sin.sin_family = AF_INET; @@ -1493,7 +1472,8 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) +static int make_send_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1505,9 +1485,10 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } - result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); + *sock_ret = sock; + result = set_mcast_if(sock->sk, dev); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1522,7 +1503,7 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) set_sock_size(sock->sk, 1, result); if (AF_INET == ipvs->mcfg.mcast_af) - result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); + result = bind_mcastif_addr(sock, dev); else result = 0; if (result < 0) { @@ -1538,19 +1519,18 @@ static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, - int ifindex) +static int make_receive_sock(struct netns_ipvs *ipvs, int id, + struct net_device *dev, struct socket **sock_ret) { /* multicast addr */ union ipvs_sockaddr mcast_addr; @@ -1562,8 +1542,9 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); - return ERR_PTR(result); + goto error; } + *sock_ret = sock; /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = SK_CAN_REUSE; result = sysctl_sync_sock_size(ipvs); @@ -1571,7 +1552,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, set_sock_size(sock->sk, 0, result); get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); - sock->sk->sk_bound_dev_if = ifindex; + sock->sk->sk_bound_dev_if = dev->ifindex; result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); @@ -1582,21 +1563,20 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id, #ifdef CONFIG_IP_VS_IPV6 if (ipvs->bcfg.mcast_af == AF_INET6) result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, - ipvs->bcfg.mcast_ifn); + dev); else #endif result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, - ipvs->bcfg.mcast_ifn); + dev); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; } - return sock; + return 0; error: - sock_release(sock); - return ERR_PTR(result); + return result; } @@ -1778,13 +1758,12 @@ static int sync_thread_backup(void *data) int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { - struct ip_vs_sync_thread_data *tinfo; + struct ip_vs_sync_thread_data *tinfo = NULL; struct task_struct **array = NULL, *task; - struct socket *sock; struct net_device *dev; char *name; int (*threadfn)(void *data); - int id, count, hlen; + int id = 0, count, hlen; int result = -ENOMEM; u16 mtu, min_mtu; @@ -1792,6 +1771,18 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* Do not hold one mutex and then to block on another */ + for (;;) { + rtnl_lock(); + if (mutex_trylock(&ipvs->sync_mutex)) + break; + rtnl_unlock(); + mutex_lock(&ipvs->sync_mutex); + if (rtnl_trylock()) + break; + mutex_unlock(&ipvs->sync_mutex); + } + if (!ipvs->sync_state) { count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX); ipvs->threads_mask = count - 1; @@ -1810,7 +1801,8 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); - return -ENODEV; + result = -ENODEV; + goto out_early; } hlen = (AF_INET6 == c->mcast_af) ? sizeof(struct ipv6hdr) + sizeof(struct udphdr) : @@ -1827,26 +1819,30 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, c->sync_maxlen = mtu - hlen; if (state == IP_VS_STATE_MASTER) { + result = -EEXIST; if (ipvs->ms) - return -EEXIST; + goto out_early; ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { + result = -EEXIST; if (ipvs->backup_threads) - return -EEXIST; + goto out_early; ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { - return -EINVAL; + result = -EINVAL; + goto out_early; } if (state == IP_VS_STATE_MASTER) { struct ipvs_master_sync_state *ms; + result = -ENOMEM; ipvs->ms = kcalloc(count, sizeof(ipvs->ms[0]), GFP_KERNEL); if (!ipvs->ms) goto out; @@ -1862,39 +1858,38 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, } else { array = kcalloc(count, sizeof(struct task_struct *), GFP_KERNEL); + result = -ENOMEM; if (!array) goto out; } - tinfo = NULL; for (id = 0; id < count; id++) { - if (state == IP_VS_STATE_MASTER) - sock = make_send_sock(ipvs, id); - else - sock = make_receive_sock(ipvs, id, dev->ifindex); - if (IS_ERR(sock)) { - result = PTR_ERR(sock); - goto outtinfo; - } + result = -ENOMEM; tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) - goto outsocket; + goto out; tinfo->ipvs = ipvs; - tinfo->sock = sock; + tinfo->sock = NULL; if (state == IP_VS_STATE_BACKUP) { tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) - goto outtinfo; + goto out; } else { tinfo->buf = NULL; } tinfo->id = id; + if (state == IP_VS_STATE_MASTER) + result = make_send_sock(ipvs, id, dev, &tinfo->sock); + else + result = make_receive_sock(ipvs, id, dev, &tinfo->sock); + if (result < 0) + goto out; task = kthread_run(threadfn, tinfo, name, ipvs->gen, id); if (IS_ERR(task)) { result = PTR_ERR(task); - goto outtinfo; + goto out; } tinfo = NULL; if (state == IP_VS_STATE_MASTER) @@ -1911,20 +1906,20 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + /* increase the module use count */ ip_vs_use_count_inc(); return 0; -outsocket: - sock_release(sock); - -outtinfo: - if (tinfo) { - sock_release(tinfo->sock); - kfree(tinfo->buf); - kfree(tinfo); - } +out: + /* We do not need RTNL lock anymore, release it here so that + * sock_release below and in the kthreads can use rtnl_lock + * to leave the mcast group. + */ + rtnl_unlock(); count = id; while (count-- > 0) { if (state == IP_VS_STATE_MASTER) @@ -1932,13 +1927,23 @@ outtinfo: else kthread_stop(array[count]); } - kfree(array); - -out: if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } + mutex_unlock(&ipvs->sync_mutex); + if (tinfo) { + if (tinfo->sock) + sock_release(tinfo->sock); + kfree(tinfo->buf); + kfree(tinfo); + } + kfree(array); + return result; + +out_early: + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); return result; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8ef21d9f9a00..4b2b3d53acfc 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -252,7 +252,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && a->class == b->class && + return a->master == b->master && nf_ct_tuple_equal(&a->tuple, &b->tuple) && nf_ct_tuple_mask_equal(&a->mask, &b->mask) && net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) && @@ -421,6 +421,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) h = nf_ct_expect_dst_hash(net, &expect->tuple); hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) { if (expect_matches(i, expect)) { + if (i->class != expect->class) + return -EALREADY; + if (nf_ct_remove_expect(i)) break; } else if (expect_clash(i, expect)) { diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 9fe0ddc333fb..277bbfe26478 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> +#include <linux/kmemleak.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/rcupdate.h> @@ -71,6 +72,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_unlock(); alloc = max(newlen, NF_CT_EXT_PREALLOC); + kmemleak_not_leak(old); new = __krealloc(old, alloc, gfp); if (!new) return NULL; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4dbb5bad4363..908e51e2dc2b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -938,11 +938,19 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { - if (nf_ct_expect_related(rtp_exp) == 0) { - if (nf_ct_expect_related(rtcp_exp) != 0) - nf_ct_unexpect_related(rtp_exp); - else + /* -EALREADY handling works around end-points that send + * SDP messages with identical port but different media type, + * we pretend expectation was set up. + */ + int errp = nf_ct_expect_related(rtp_exp); + + if (errp == 0 || errp == -EALREADY) { + int errcp = nf_ct_expect_related(rtcp_exp); + + if (errcp == 0 || errcp == -EALREADY) ret = NF_ACCEPT; + else if (errp == 0) + nf_ct_unexpect_related(rtp_exp); } } nf_ct_expect_put(rtcp_exp); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9134cc429ad4..04d4e3772584 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2361,41 +2361,46 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_is_active_next(net, old_rule)) { - trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, - old_rule); - if (trans == NULL) { - err = -ENOMEM; - goto err2; - } - nft_deactivate_next(net, old_rule); - chain->use--; - list_add_tail_rcu(&rule->list, &old_rule->list); - } else { + if (!nft_is_active_next(net, old_rule)) { err = -ENOENT; goto err2; } - } else if (nlh->nlmsg_flags & NLM_F_APPEND) - if (old_rule) - list_add_rcu(&rule->list, &old_rule->list); - else - list_add_tail_rcu(&rule->list, &chain->rules); - else { - if (old_rule) - list_add_tail_rcu(&rule->list, &old_rule->list); - else - list_add_rcu(&rule->list, &chain->rules); - } + trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, + old_rule); + if (trans == NULL) { + err = -ENOMEM; + goto err2; + } + nft_deactivate_next(net, old_rule); + chain->use--; - if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { - err = -ENOMEM; - goto err3; + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + list_add_tail_rcu(&rule->list, &old_rule->list); + } else { + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { + err = -ENOMEM; + goto err2; + } + + if (nlh->nlmsg_flags & NLM_F_APPEND) { + if (old_rule) + list_add_rcu(&rule->list, &old_rule->list); + else + list_add_tail_rcu(&rule->list, &chain->rules); + } else { + if (old_rule) + list_add_tail_rcu(&rule->list, &old_rule->list); + else + list_add_rcu(&rule->list, &chain->rules); + } } chain->use++; return 0; -err3: - list_del_rcu(&rule->list); err2: nf_tables_rule_destroy(&ctx, rule); err1: @@ -3207,18 +3212,20 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, err = ops->init(set, &desc, nla); if (err < 0) - goto err2; + goto err3; err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err3; + goto err4; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; -err3: +err4: ops->destroy(set); +err3: + kfree(set->name); err2: kvfree(set); err1: @@ -5738,7 +5745,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) struct nft_base_chain *basechain; if (nft_trans_chain_name(trans)) - strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); + swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); if (!nft_is_base_chain(trans->ctx.chain)) return; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index 773da82190dc..94df000abb92 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -36,11 +36,10 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static unsigned int -connmark_tg_shift(struct sk_buff *skb, - const struct xt_connmark_tginfo1 *info, - u8 shift_bits, u8 shift_dir) +connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) { enum ip_conntrack_info ctinfo; + u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; @@ -51,34 +50,39 @@ connmark_tg_shift(struct sk_buff *skb, switch (info->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + if (info->shift_dir == D_SHIFT_RIGHT) + newmark >>= info->shift_bits; else - newmark <<= shift_bits; + newmark <<= info->shift_bits; + if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: - newmark = (ct->mark & ~info->ctmask) ^ - (skb->mark & info->nfmask); - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark = (skb->mark & info->nfmask); + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + + newmark = (ct->mark & ~info->ctmask) ^ + new_targetmark; if (ct->mark != newmark) { ct->mark = newmark; nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - newmark = (skb->mark & ~info->nfmask) ^ - (ct->mark & info->ctmask); - if (shift_dir == D_SHIFT_RIGHT) - newmark >>= shift_bits; + new_targetmark = (ct->mark & info->ctmask); + if (info->shift_dir == D_SHIFT_RIGHT) + new_targetmark >>= info->shift_bits; else - newmark <<= shift_bits; + new_targetmark <<= info->shift_bits; + + newmark = (skb->mark & ~info->nfmask) ^ + new_targetmark; skb->mark = newmark; break; } @@ -89,8 +93,14 @@ static unsigned int connmark_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo1 *info = par->targinfo; - - return connmark_tg_shift(skb, info, 0, 0); + const struct xt_connmark_tginfo2 info2 = { + .ctmark = info->ctmark, + .ctmask = info->ctmask, + .nfmask = info->nfmask, + .mode = info->mode, + }; + + return connmark_tg_shift(skb, &info2); } static unsigned int @@ -98,8 +108,7 @@ connmark_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_connmark_tginfo2 *info = par->targinfo; - return connmark_tg_shift(skb, (const struct xt_connmark_tginfo1 *)info, - info->shift_bits, info->shift_dir); + return connmark_tg_shift(skb, info); } static int connmark_tg_check(const struct xt_tgchk_param *par) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c31b0687396a..01f3515cada0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -329,11 +329,11 @@ static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb) skb_set_queue_mapping(skb, queue_index); } -/* register_prot_hook must be invoked with the po->bind_lock held, +/* __register_prot_hook must be invoked through register_prot_hook * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). */ -static void register_prot_hook(struct sock *sk) +static void __register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); @@ -348,8 +348,13 @@ static void register_prot_hook(struct sock *sk) } } -/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock - * held. If the sync parameter is true, we will temporarily drop +static void register_prot_hook(struct sock *sk) +{ + lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); + __register_prot_hook(sk); +} + +/* If the sync parameter is true, we will temporarily drop * the po->bind_lock and do a synchronize_net to make sure no * asynchronous packet processing paths still refer to the elements * of po->prot_hook. If the sync parameter is false, it is the @@ -359,6 +364,8 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) { struct packet_sock *po = pkt_sk(sk); + lockdep_assert_held_once(&po->bind_lock); + po->running = 0; if (po->fanout) @@ -3252,7 +3259,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (proto) { po->prot_hook.type = proto; - register_prot_hook(sk); + __register_prot_hook(sk); } mutex_lock(&net->packet.sklist_lock); @@ -3732,12 +3739,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_loss = !!val; - return 0; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_loss = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_AUXDATA: { @@ -3748,7 +3761,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->auxdata = !!val; + release_sock(sk); return 0; } case PACKET_ORIGDEV: @@ -3760,7 +3775,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + lock_sock(sk); po->origdev = !!val; + release_sock(sk); return 0; } case PACKET_VNET_HDR: @@ -3769,15 +3786,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (sock->type != SOCK_RAW) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->has_vnet_hdr = !!val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->has_vnet_hdr = !!val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_TIMESTAMP: { @@ -3815,11 +3837,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; - po->tp_tx_has_off = !!val; + + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_tx_has_off = !!val; + ret = 0; + } + release_sock(sk); return 0; } case PACKET_QDISC_BYPASS: diff --git a/net/packet/internal.h b/net/packet/internal.h index a1d2b2319ae9..3bb7c5fb3bff 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -112,10 +112,12 @@ struct packet_sock { int copy_thresh; spinlock_t bind_lock; struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, + unsigned int running; /* bind_lock must be held */ + unsigned int auxdata:1, /* writer must hold sock lock */ origdev:1, - has_vnet_hdr:1; + has_vnet_hdr:1, + tp_loss:1, + tp_tx_has_off:1; int pressure; int ifindex; /* bound device */ __be16 num; @@ -125,8 +127,6 @@ struct packet_sock { enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index eea1d8611b20..13b38ad0fa4a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); - return ret; + goto out; sends_out: vfree(ic->i_sends); @@ -572,6 +572,7 @@ send_cq_out: ic->i_send_cq = NULL; rds_ibdev_out: rds_ib_remove_conn(rds_ibdev, conn); +out: rds_ib_dev_put(rds_ibdev); return ret; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a5994cf0512b..8527cfdc446d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -652,7 +652,7 @@ static int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, } } - return 0; + return -ENOENT; } static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, @@ -682,7 +682,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u16 mtype; u16 dlen; - curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL); + curr_data = ife_tlv_meta_decode(tlv_data, ifehdr_end, &mtype, + &dlen, NULL); + if (!curr_data) { + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); + return TC_ACT_SHOT; + } if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 837806dd5799..a8f3b088fcb2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -988,31 +988,6 @@ out: return match; } -/* Is this the association we are looking for? */ -struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, - struct net *net, - const union sctp_addr *laddr, - const union sctp_addr *paddr) -{ - struct sctp_transport *transport; - - if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && - (htons(asoc->peer.port) == paddr->v4.sin_port) && - net_eq(sock_net(asoc->base.sk), net)) { - transport = sctp_assoc_lookup_paddr(asoc, paddr); - if (!transport) - goto out; - - if (sctp_bind_addr_match(&asoc->base.bind_addr, laddr, - sctp_sk(asoc->base.sk))) - goto out; - } - transport = NULL; - -out: - return transport; -} - /* Do delayed input processing. This is scheduled by sctp_rcv(). */ static void sctp_assoc_bh_rcv(struct work_struct *work) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f211b3db6a35..dee7cbd54831 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1457,7 +1457,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * the outstanding bytes for this chunk, so only * count bytes associated with a transport. */ - if (transport) { + if (transport && !tchunk->tsn_gap_acked) { /* If this chunk is being used for RTT * measurement, calculate the RTT and update * the RTO using this value. @@ -1469,14 +1469,34 @@ static void sctp_check_transmitted(struct sctp_outq *q, * first instance of the packet or a later * instance). */ - if (!tchunk->tsn_gap_acked && - !sctp_chunk_retransmitted(tchunk) && + if (!sctp_chunk_retransmitted(tchunk) && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; sctp_transport_update_rto(transport, rtt); } + + if (TSN_lte(tsn, sack_ctsn)) { + /* + * SFR-CACC algorithm: + * 2) If the SACK contains gap acks + * and the flag CHANGEOVER_ACTIVE is + * set the receiver of the SACK MUST + * take the following action: + * + * B) For each TSN t being acked that + * has not been acked in any SACK so + * far, set cacc_saw_newack to 1 for + * the destination that the TSN was + * sent to. + */ + if (sack->num_gap_ack_blocks && + q->asoc->peer.primary_path->cacc. + changeover_active) + transport->cacc.cacc_saw_newack + = 1; + } } /* If the chunk hasn't been marked as ACKED, @@ -1508,28 +1528,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, restart_timer = 1; forward_progress = true; - if (!tchunk->tsn_gap_acked) { - /* - * SFR-CACC algorithm: - * 2) If the SACK contains gap acks - * and the flag CHANGEOVER_ACTIVE is - * set the receiver of the SACK MUST - * take the following action: - * - * B) For each TSN t being acked that - * has not been acked in any SACK so - * far, set cacc_saw_newack to 1 for - * the destination that the TSN was - * sent to. - */ - if (transport && - sack->num_gap_ack_blocks && - q->asoc->peer.primary_path->cacc. - changeover_active) - transport->cacc.cacc_saw_newack - = 1; - } - list_add_tail(&tchunk->transmitted_list, &q->sacked); } else { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5a4fb1dc8400..db93eabd6ef5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -779,10 +779,9 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, * association. This reports on which TSN's we've seen to date, * including duplicates and gaps. */ -struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) +struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc) { struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; - struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; __u16 num_gabs, num_dup_tsns; struct sctp_transport *trans; @@ -857,7 +856,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) /* Add the duplicate TSN information. */ if (num_dup_tsns) { - aptr->stats.idupchunks += num_dup_tsns; + asoc->stats.idupchunks += num_dup_tsns; sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); } @@ -869,11 +868,11 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) * association so no transport will match after a wrap event like this, * Until the next sack */ - if (++aptr->peer.sack_generation == 0) { + if (++asoc->peer.sack_generation == 0) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) trans->sack_generation = 0; - aptr->peer.sack_generation = 1; + asoc->peer.sack_generation = 1; } nodata: return retval; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f5d4b69dbabc..4470501374bf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -978,10 +978,6 @@ static void smc_tcp_listen_work(struct work_struct *work) } out: - if (lsmc->clcsock) { - sock_release(lsmc->clcsock); - lsmc->clcsock = NULL; - } release_sock(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 805b139756db..092bebc70048 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -67,7 +67,7 @@ static void strp_abort_strp(struct strparser *strp, int err) static void strp_start_timer(struct strparser *strp, long timeo) { - if (timeo) + if (timeo && timeo != LONG_MAX) mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 71e79597f940..6ed1c02cfc94 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -693,8 +693,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, if (!sgout) { nsg = skb_cow_data(skb, 0, &unused) + 1; sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation); - if (!sgout) - sgout = sgin; + sgout = sgin; } sg_init_table(sgin, nsg); diff --git a/security/commoncap.c b/security/commoncap.c index 48620c93d697..1ce701fcb3f3 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -449,6 +449,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, magic |= VFS_CAP_FLAGS_EFFECTIVE; memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); + } else { + size = -ENOMEM; } } kfree(tmpbuf); diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index f69764d7cdd7..e30e30ba6e39 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -36,8 +36,6 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, struct snd_rawmidi_params params; unsigned int val; - if (rfile->output == NULL) - return -EINVAL; if (get_user(params.stream, &src->stream) || get_user(params.buffer_size, &src->buffer_size) || get_user(params.avail_min, &src->avail_min) || @@ -46,8 +44,12 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, params.no_active_sensing = val; switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; return snd_rawmidi_output_params(rfile->output, ¶ms); case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; return snd_rawmidi_input_params(rfile->input, ¶ms); } return -EINVAL; @@ -67,16 +69,18 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: @@ -112,16 +116,18 @@ static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7a111a1b5836..b0c8c79848a9 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1647,7 +1647,8 @@ static void azx_check_snoop_available(struct azx *chip) */ u8 val; pci_read_config_byte(chip->pci, 0x42, &val); - if (!(val & 0x80) && chip->pci->revision == 0x30) + if (!(val & 0x80) && (chip->pci->revision == 0x30 || + chip->pci->revision == 0x20)) snoop = false; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aef1f52db7d9..fc77bf7a1544 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6370,6 +6370,8 @@ static const struct hda_fixup alc269_fixups[] = { { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ { } }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC }, }; @@ -6573,6 +6575,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index 6d7cde56a355..e2cf55c53ea8 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -125,7 +125,7 @@ static int send_midi_async(struct usb_line6 *line6, unsigned char *data, } usb_fill_int_urb(urb, line6->usbdev, - usb_sndbulkpipe(line6->usbdev, + usb_sndintpipe(line6->usbdev, line6->properties->ep_ctrl_w), transfer_buffer, length, midi_sent, line6, line6->interval); diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 6edd177bb1c7..2ba95d6fe852 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -135,6 +135,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CRM_SHIFT 7 #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +/* + * For KVM currently all guest registers are nonsecure, but we reserve a bit + * in the encoding to distinguish secure from nonsecure for AArch32 system + * registers that are banked by security. This is 1 for the secure banked + * register, and 0 for the nonsecure banked register or if the register is + * not banked by security. + */ +#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000 +#define KVM_REG_ARM_SECURE_SHIFT 28 #define ARM_CP15_REG_SHIFT_MASK(x,n) \ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..c535c2fdea13 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -354,8 +354,25 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 1ea545965ee3..53b60ad452f5 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c clean: bpftool_clean $(call QUIET_CLEAN, bpf-progs) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 04e32f965ad7..1827c2f973f9 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * required ordering. */ -#define READ_ONCE(x) \ - ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) - -#define WRITE_ONCE(x, val) \ - ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) #ifndef __fallthrough diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index edfeaba95429..a1a959ba24ff 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _LINUX_CORESIGHT_PMU_H diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f8b134f5608f..e7ee32861d51 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -27,6 +27,9 @@ # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ #endif +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + /* * Flags for mlock */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6b89f87db200..1065006c9bf5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -396,6 +396,10 @@ struct kvm_run { char padding[256]; }; + /* 2048 is the size of the char array used to bound/pad the size + * of the union that holds sync regs. + */ + #define SYNC_REGS_SIZE_BYTES 2048 /* * shared registers between kvm and userspace. * kvm_valid_regs specifies the register classes set by the host @@ -407,7 +411,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[2048]; + char padding[SYNC_REGS_SIZE_BYTES]; } s; }; @@ -936,6 +940,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GET_CPU_CHAR 151 #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 +#define KVM_CAP_HYPERV_EVENTFD 154 #ifdef KVM_CAP_IRQ_ROUTING @@ -1375,6 +1380,10 @@ struct kvm_enc_region { #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) +/* Available with KVM_CAP_HYPERV_EVENTFD */ +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) + + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1515,4 +1524,14 @@ struct kvm_assigned_msix_entry { #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) #define KVM_ARM_DEV_PMU (1 << 2) +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 912b85b52344..b8e288a1f740 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -650,11 +650,23 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* - * Indicates that the content of PERF_SAMPLE_IP points to - * the actual instruction that triggered the event. See also - * perf_event_attr::precise_ip. + * These PERF_RECORD_MISC_* flags below are safely reused + * for the following events: + * + * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * + * + * PERF_RECORD_MISC_EXACT_IP: + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + * + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: + * Indicates that thread was preempted in TASK_RUNNING state. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) +#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 07d61583fd02..ed0a120d4f08 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -242,6 +242,7 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */ #define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */ #define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE +#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8 #ifdef SNDRV_LITTLE_ENDIAN #define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index f6a1babcbac4..cb7154eccbdc 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -433,7 +433,7 @@ match: if (ambiguous_option) { fprintf(stderr, - " Error: Ambiguous option: %s (could be --%s%s or --%s%s)", + " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n", arg, (ambiguous_flags & OPT_UNSET) ? "no-" : "", ambiguous_option->long_name, @@ -458,7 +458,7 @@ static void check_typos(const char *arg, const struct option *options) return; if (strstarts(arg, "no-")) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } @@ -466,7 +466,7 @@ static void check_typos(const char *arg, const struct option *options) if (!options->long_name) continue; if (strstarts(options->long_name, arg)) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } } diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 8ae824dbfca3..f76d9914686a 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -31,8 +31,8 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/objtool/arch/$(ARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) -LDFLAGS += -lelf $(LIBSUBCMD) +CFLAGS += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES) +LDFLAGS += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS) # Allow old libelf to be used: elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5b4fff3adc4b..32f4a898e3f2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -334,6 +334,11 @@ annotate.*:: 99.93 │ mov %eax,%eax + annotate.offset_level:: + Default is '1', meaning just jump targets will have offsets show right beside + the instruction. When set to '2' 'call' instructions will also have its offsets + shown, 3 or higher will show offsets for all instructions. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index b0211410969b..8806ed5f3802 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -67,6 +67,9 @@ OPTIONS --phys-data:: Record/Report sample physical addresses +In addition, for report all perf report options are valid, and for record +all perf record options. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index bb33601a823b..63f938b887dd 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist' kallsyms pathname -g:: ---no-call-graph:: - Do not display call chains if present. +--call-graph:: + Display call chains if present (default on). --max-stack:: Maximum number of functions to display in backtrace, default 5. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 36ec0257f8d3..afdafe2110a1 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -228,14 +228,15 @@ OPTIONS For sample events it's possible to display misc field with -F +misc option, following letters are displayed for each bit: - PERF_RECORD_MISC_KERNEL K - PERF_RECORD_MISC_USER U - PERF_RECORD_MISC_HYPERVISOR H - PERF_RECORD_MISC_GUEST_KERNEL G - PERF_RECORD_MISC_GUEST_USER g - PERF_RECORD_MISC_MMAP_DATA* M - PERF_RECORD_MISC_COMM_EXEC E - PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp $ perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index f15b306be183..e6c3b4e555c2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: -Print count deltas every N milliseconds (minimum: 10ms) +Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c7abd83a8e19..ae7dc46e8f8a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0) endif ifneq ($(NO_SYSCALL_TABLE),1) - CFLAGS += -DHAVE_SYSCALL_TABLE + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT endif # So far there's only x86 and arm libdw unwind support merged in perf. @@ -847,7 +847,7 @@ ifndef NO_JVMTI ifeq ($(feature-jvmti), 1) $(call detected_var,JDIR) else - $(warning No openjdk development package found, please install JDK package) + $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel) NO_JVMTI := 1 endif endif diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index fa639e3e52ac..1ce6bdbda561 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <stdbool.h> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 5c655ad4621e..2f595cd73da6 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <api/fs/fs.h> diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h index 5256741be549..1a12e64f5127 100644 --- a/tools/perf/arch/arm/util/cs-etm.h +++ b/tools/perf/arch/arm/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef INCLUDE__PERF_CS_ETM_H__ diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index ac4dffc807b8..e047571e6080 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index d74eaa7aa927..1a38e78117ce 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -21,7 +21,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sys)/syscall_64.tbl $(systbl) @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ - || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true + || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5bd1ba8c0282..44f5aba78210 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -1,21 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 static struct ins x86__instructions[] = { + { .name = "adc", .ops = &mov_ops, }, + { .name = "adcb", .ops = &mov_ops, }, + { .name = "adcl", .ops = &mov_ops, }, { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, { .name = "addq", .ops = &mov_ops, }, + { .name = "addsd", .ops = &mov_ops, }, { .name = "addw", .ops = &mov_ops, }, { .name = "and", .ops = &mov_ops, }, + { .name = "andb", .ops = &mov_ops, }, + { .name = "andl", .ops = &mov_ops, }, + { .name = "andpd", .ops = &mov_ops, }, + { .name = "andps", .ops = &mov_ops, }, + { .name = "andq", .ops = &mov_ops, }, + { .name = "andw", .ops = &mov_ops, }, + { .name = "bsr", .ops = &mov_ops, }, + { .name = "bt", .ops = &mov_ops, }, + { .name = "btr", .ops = &mov_ops, }, { .name = "bts", .ops = &mov_ops, }, + { .name = "btsq", .ops = &mov_ops, }, { .name = "call", .ops = &call_ops, }, { .name = "callq", .ops = &call_ops, }, + { .name = "cmovbe", .ops = &mov_ops, }, + { .name = "cmove", .ops = &mov_ops, }, + { .name = "cmovae", .ops = &mov_ops, }, { .name = "cmp", .ops = &mov_ops, }, { .name = "cmpb", .ops = &mov_ops, }, { .name = "cmpl", .ops = &mov_ops, }, { .name = "cmpq", .ops = &mov_ops, }, { .name = "cmpw", .ops = &mov_ops, }, { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "cmpxchg", .ops = &mov_ops, }, + { .name = "cs", .ops = &mov_ops, }, { .name = "dec", .ops = &dec_ops, }, { .name = "decl", .ops = &dec_ops, }, + { .name = "divsd", .ops = &mov_ops, }, + { .name = "divss", .ops = &mov_ops, }, + { .name = "gs", .ops = &mov_ops, }, { .name = "imul", .ops = &mov_ops, }, { .name = "inc", .ops = &dec_ops, }, { .name = "incl", .ops = &dec_ops, }, @@ -57,25 +79,68 @@ static struct ins x86__instructions[] = { { .name = "lea", .ops = &mov_ops, }, { .name = "lock", .ops = &lock_ops, }, { .name = "mov", .ops = &mov_ops, }, + { .name = "movapd", .ops = &mov_ops, }, + { .name = "movaps", .ops = &mov_ops, }, { .name = "movb", .ops = &mov_ops, }, { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movdqu", .ops = &mov_ops, }, { .name = "movl", .ops = &mov_ops, }, { .name = "movq", .ops = &mov_ops, }, + { .name = "movsd", .ops = &mov_ops, }, { .name = "movslq", .ops = &mov_ops, }, + { .name = "movss", .ops = &mov_ops, }, + { .name = "movupd", .ops = &mov_ops, }, + { .name = "movups", .ops = &mov_ops, }, + { .name = "movw", .ops = &mov_ops, }, { .name = "movzbl", .ops = &mov_ops, }, { .name = "movzwl", .ops = &mov_ops, }, + { .name = "mulsd", .ops = &mov_ops, }, + { .name = "mulss", .ops = &mov_ops, }, { .name = "nop", .ops = &nop_ops, }, { .name = "nopl", .ops = &nop_ops, }, { .name = "nopw", .ops = &nop_ops, }, { .name = "or", .ops = &mov_ops, }, + { .name = "orb", .ops = &mov_ops, }, { .name = "orl", .ops = &mov_ops, }, + { .name = "orps", .ops = &mov_ops, }, + { .name = "orq", .ops = &mov_ops, }, + { .name = "pand", .ops = &mov_ops, }, + { .name = "paddq", .ops = &mov_ops, }, + { .name = "pcmpeqb", .ops = &mov_ops, }, + { .name = "por", .ops = &mov_ops, }, + { .name = "rclb", .ops = &mov_ops, }, + { .name = "rcll", .ops = &mov_ops, }, + { .name = "retq", .ops = &ret_ops, }, + { .name = "sbb", .ops = &mov_ops, }, + { .name = "sbbl", .ops = &mov_ops, }, + { .name = "sete", .ops = &mov_ops, }, + { .name = "sub", .ops = &mov_ops, }, + { .name = "subl", .ops = &mov_ops, }, + { .name = "subq", .ops = &mov_ops, }, + { .name = "subsd", .ops = &mov_ops, }, + { .name = "subw", .ops = &mov_ops, }, { .name = "test", .ops = &mov_ops, }, { .name = "testb", .ops = &mov_ops, }, { .name = "testl", .ops = &mov_ops, }, + { .name = "ucomisd", .ops = &mov_ops, }, + { .name = "ucomiss", .ops = &mov_ops, }, + { .name = "vaddsd", .ops = &mov_ops, }, + { .name = "vandpd", .ops = &mov_ops, }, + { .name = "vmovdqa", .ops = &mov_ops, }, + { .name = "vmovq", .ops = &mov_ops, }, + { .name = "vmovsd", .ops = &mov_ops, }, + { .name = "vmulsd", .ops = &mov_ops, }, + { .name = "vorpd", .ops = &mov_ops, }, + { .name = "vsubsd", .ops = &mov_ops, }, + { .name = "vucomisd", .ops = &mov_ops, }, { .name = "xadd", .ops = &mov_ops, }, { .name = "xbeginl", .ops = &jump_ops, }, { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, + { .name = "xchg", .ops = &mov_ops, }, + { .name = "xor", .ops = &mov_ops, }, + { .name = "xorb", .ops = &mov_ops, }, + { .name = "xorpd", .ops = &mov_ops, }, + { .name = "xorps", .ops = &mov_ops, }, }; static bool x86__ins_is_fused(struct arch *arch, const char *ins1, diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5aef183e2f85..4dfe42666d0c 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -4,379 +4,383 @@ # The format is: # <number> <abi> <name> <entry point> # +# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls +# # The abi is "common", "64" or "x32" for this file. # -0 common read sys_read -1 common write sys_write -2 common open sys_open -3 common close sys_close -4 common stat sys_newstat -5 common fstat sys_newfstat -6 common lstat sys_newlstat -7 common poll sys_poll -8 common lseek sys_lseek -9 common mmap sys_mmap -10 common mprotect sys_mprotect -11 common munmap sys_munmap -12 common brk sys_brk -13 64 rt_sigaction sys_rt_sigaction -14 common rt_sigprocmask sys_rt_sigprocmask -15 64 rt_sigreturn sys_rt_sigreturn/ptregs -16 64 ioctl sys_ioctl -17 common pread64 sys_pread64 -18 common pwrite64 sys_pwrite64 -19 64 readv sys_readv -20 64 writev sys_writev -21 common access sys_access -22 common pipe sys_pipe -23 common select sys_select -24 common sched_yield sys_sched_yield -25 common mremap sys_mremap -26 common msync sys_msync -27 common mincore sys_mincore -28 common madvise sys_madvise -29 common shmget sys_shmget -30 common shmat sys_shmat -31 common shmctl sys_shmctl -32 common dup sys_dup -33 common dup2 sys_dup2 -34 common pause sys_pause -35 common nanosleep sys_nanosleep -36 common getitimer sys_getitimer -37 common alarm sys_alarm -38 common setitimer sys_setitimer -39 common getpid sys_getpid -40 common sendfile sys_sendfile64 -41 common socket sys_socket -42 common connect sys_connect -43 common accept sys_accept -44 common sendto sys_sendto -45 64 recvfrom sys_recvfrom -46 64 sendmsg sys_sendmsg -47 64 recvmsg sys_recvmsg -48 common shutdown sys_shutdown -49 common bind sys_bind -50 common listen sys_listen -51 common getsockname sys_getsockname -52 common getpeername sys_getpeername -53 common socketpair sys_socketpair -54 64 setsockopt sys_setsockopt -55 64 getsockopt sys_getsockopt -56 common clone sys_clone/ptregs -57 common fork sys_fork/ptregs -58 common vfork sys_vfork/ptregs -59 64 execve sys_execve/ptregs -60 common exit sys_exit -61 common wait4 sys_wait4 -62 common kill sys_kill -63 common uname sys_newuname -64 common semget sys_semget -65 common semop sys_semop -66 common semctl sys_semctl -67 common shmdt sys_shmdt -68 common msgget sys_msgget -69 common msgsnd sys_msgsnd -70 common msgrcv sys_msgrcv -71 common msgctl sys_msgctl -72 common fcntl sys_fcntl -73 common flock sys_flock -74 common fsync sys_fsync -75 common fdatasync sys_fdatasync -76 common truncate sys_truncate -77 common ftruncate sys_ftruncate -78 common getdents sys_getdents -79 common getcwd sys_getcwd -80 common chdir sys_chdir -81 common fchdir sys_fchdir -82 common rename sys_rename -83 common mkdir sys_mkdir -84 common rmdir sys_rmdir -85 common creat sys_creat -86 common link sys_link -87 common unlink sys_unlink -88 common symlink sys_symlink -89 common readlink sys_readlink -90 common chmod sys_chmod -91 common fchmod sys_fchmod -92 common chown sys_chown -93 common fchown sys_fchown -94 common lchown sys_lchown -95 common umask sys_umask -96 common gettimeofday sys_gettimeofday -97 common getrlimit sys_getrlimit -98 common getrusage sys_getrusage -99 common sysinfo sys_sysinfo -100 common times sys_times -101 64 ptrace sys_ptrace -102 common getuid sys_getuid -103 common syslog sys_syslog -104 common getgid sys_getgid -105 common setuid sys_setuid -106 common setgid sys_setgid -107 common geteuid sys_geteuid -108 common getegid sys_getegid -109 common setpgid sys_setpgid -110 common getppid sys_getppid -111 common getpgrp sys_getpgrp -112 common setsid sys_setsid -113 common setreuid sys_setreuid -114 common setregid sys_setregid -115 common getgroups sys_getgroups -116 common setgroups sys_setgroups -117 common setresuid sys_setresuid -118 common getresuid sys_getresuid -119 common setresgid sys_setresgid -120 common getresgid sys_getresgid -121 common getpgid sys_getpgid -122 common setfsuid sys_setfsuid -123 common setfsgid sys_setfsgid -124 common getsid sys_getsid -125 common capget sys_capget -126 common capset sys_capset -127 64 rt_sigpending sys_rt_sigpending -128 64 rt_sigtimedwait sys_rt_sigtimedwait -129 64 rt_sigqueueinfo sys_rt_sigqueueinfo -130 common rt_sigsuspend sys_rt_sigsuspend -131 64 sigaltstack sys_sigaltstack -132 common utime sys_utime -133 common mknod sys_mknod +0 common read __x64_sys_read +1 common write __x64_sys_write +2 common open __x64_sys_open +3 common close __x64_sys_close +4 common stat __x64_sys_newstat +5 common fstat __x64_sys_newfstat +6 common lstat __x64_sys_newlstat +7 common poll __x64_sys_poll +8 common lseek __x64_sys_lseek +9 common mmap __x64_sys_mmap +10 common mprotect __x64_sys_mprotect +11 common munmap __x64_sys_munmap +12 common brk __x64_sys_brk +13 64 rt_sigaction __x64_sys_rt_sigaction +14 common rt_sigprocmask __x64_sys_rt_sigprocmask +15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs +16 64 ioctl __x64_sys_ioctl +17 common pread64 __x64_sys_pread64 +18 common pwrite64 __x64_sys_pwrite64 +19 64 readv __x64_sys_readv +20 64 writev __x64_sys_writev +21 common access __x64_sys_access +22 common pipe __x64_sys_pipe +23 common select __x64_sys_select +24 common sched_yield __x64_sys_sched_yield +25 common mremap __x64_sys_mremap +26 common msync __x64_sys_msync +27 common mincore __x64_sys_mincore +28 common madvise __x64_sys_madvise +29 common shmget __x64_sys_shmget +30 common shmat __x64_sys_shmat +31 common shmctl __x64_sys_shmctl +32 common dup __x64_sys_dup +33 common dup2 __x64_sys_dup2 +34 common pause __x64_sys_pause +35 common nanosleep __x64_sys_nanosleep +36 common getitimer __x64_sys_getitimer +37 common alarm __x64_sys_alarm +38 common setitimer __x64_sys_setitimer +39 common getpid __x64_sys_getpid +40 common sendfile __x64_sys_sendfile64 +41 common socket __x64_sys_socket +42 common connect __x64_sys_connect +43 common accept __x64_sys_accept +44 common sendto __x64_sys_sendto +45 64 recvfrom __x64_sys_recvfrom +46 64 sendmsg __x64_sys_sendmsg +47 64 recvmsg __x64_sys_recvmsg +48 common shutdown __x64_sys_shutdown +49 common bind __x64_sys_bind +50 common listen __x64_sys_listen +51 common getsockname __x64_sys_getsockname +52 common getpeername __x64_sys_getpeername +53 common socketpair __x64_sys_socketpair +54 64 setsockopt __x64_sys_setsockopt +55 64 getsockopt __x64_sys_getsockopt +56 common clone __x64_sys_clone/ptregs +57 common fork __x64_sys_fork/ptregs +58 common vfork __x64_sys_vfork/ptregs +59 64 execve __x64_sys_execve/ptregs +60 common exit __x64_sys_exit +61 common wait4 __x64_sys_wait4 +62 common kill __x64_sys_kill +63 common uname __x64_sys_newuname +64 common semget __x64_sys_semget +65 common semop __x64_sys_semop +66 common semctl __x64_sys_semctl +67 common shmdt __x64_sys_shmdt +68 common msgget __x64_sys_msgget +69 common msgsnd __x64_sys_msgsnd +70 common msgrcv __x64_sys_msgrcv +71 common msgctl __x64_sys_msgctl +72 common fcntl __x64_sys_fcntl +73 common flock __x64_sys_flock +74 common fsync __x64_sys_fsync +75 common fdatasync __x64_sys_fdatasync +76 common truncate __x64_sys_truncate +77 common ftruncate __x64_sys_ftruncate +78 common getdents __x64_sys_getdents +79 common getcwd __x64_sys_getcwd +80 common chdir __x64_sys_chdir +81 common fchdir __x64_sys_fchdir +82 common rename __x64_sys_rename +83 common mkdir __x64_sys_mkdir +84 common rmdir __x64_sys_rmdir +85 common creat __x64_sys_creat +86 common link __x64_sys_link +87 common unlink __x64_sys_unlink +88 common symlink __x64_sys_symlink +89 common readlink __x64_sys_readlink +90 common chmod __x64_sys_chmod +91 common fchmod __x64_sys_fchmod +92 common chown __x64_sys_chown +93 common fchown __x64_sys_fchown +94 common lchown __x64_sys_lchown +95 common umask __x64_sys_umask +96 common gettimeofday __x64_sys_gettimeofday +97 common getrlimit __x64_sys_getrlimit +98 common getrusage __x64_sys_getrusage +99 common sysinfo __x64_sys_sysinfo +100 common times __x64_sys_times +101 64 ptrace __x64_sys_ptrace +102 common getuid __x64_sys_getuid +103 common syslog __x64_sys_syslog +104 common getgid __x64_sys_getgid +105 common setuid __x64_sys_setuid +106 common setgid __x64_sys_setgid +107 common geteuid __x64_sys_geteuid +108 common getegid __x64_sys_getegid +109 common setpgid __x64_sys_setpgid +110 common getppid __x64_sys_getppid +111 common getpgrp __x64_sys_getpgrp +112 common setsid __x64_sys_setsid +113 common setreuid __x64_sys_setreuid +114 common setregid __x64_sys_setregid +115 common getgroups __x64_sys_getgroups +116 common setgroups __x64_sys_setgroups +117 common setresuid __x64_sys_setresuid +118 common getresuid __x64_sys_getresuid +119 common setresgid __x64_sys_setresgid +120 common getresgid __x64_sys_getresgid +121 common getpgid __x64_sys_getpgid +122 common setfsuid __x64_sys_setfsuid +123 common setfsgid __x64_sys_setfsgid +124 common getsid __x64_sys_getsid +125 common capget __x64_sys_capget +126 common capset __x64_sys_capset +127 64 rt_sigpending __x64_sys_rt_sigpending +128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait +129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo +130 common rt_sigsuspend __x64_sys_rt_sigsuspend +131 64 sigaltstack __x64_sys_sigaltstack +132 common utime __x64_sys_utime +133 common mknod __x64_sys_mknod 134 64 uselib -135 common personality sys_personality -136 common ustat sys_ustat -137 common statfs sys_statfs -138 common fstatfs sys_fstatfs -139 common sysfs sys_sysfs -140 common getpriority sys_getpriority -141 common setpriority sys_setpriority -142 common sched_setparam sys_sched_setparam -143 common sched_getparam sys_sched_getparam -144 common sched_setscheduler sys_sched_setscheduler -145 common sched_getscheduler sys_sched_getscheduler -146 common sched_get_priority_max sys_sched_get_priority_max -147 common sched_get_priority_min sys_sched_get_priority_min -148 common sched_rr_get_interval sys_sched_rr_get_interval -149 common mlock sys_mlock -150 common munlock sys_munlock -151 common mlockall sys_mlockall -152 common munlockall sys_munlockall -153 common vhangup sys_vhangup -154 common modify_ldt sys_modify_ldt -155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl -157 common prctl sys_prctl -158 common arch_prctl sys_arch_prctl -159 common adjtimex sys_adjtimex -160 common setrlimit sys_setrlimit -161 common chroot sys_chroot -162 common sync sys_sync -163 common acct sys_acct -164 common settimeofday sys_settimeofday -165 common mount sys_mount -166 common umount2 sys_umount -167 common swapon sys_swapon -168 common swapoff sys_swapoff -169 common reboot sys_reboot -170 common sethostname sys_sethostname -171 common setdomainname sys_setdomainname -172 common iopl sys_iopl/ptregs -173 common ioperm sys_ioperm +135 common personality __x64_sys_personality +136 common ustat __x64_sys_ustat +137 common statfs __x64_sys_statfs +138 common fstatfs __x64_sys_fstatfs +139 common sysfs __x64_sys_sysfs +140 common getpriority __x64_sys_getpriority +141 common setpriority __x64_sys_setpriority +142 common sched_setparam __x64_sys_sched_setparam +143 common sched_getparam __x64_sys_sched_getparam +144 common sched_setscheduler __x64_sys_sched_setscheduler +145 common sched_getscheduler __x64_sys_sched_getscheduler +146 common sched_get_priority_max __x64_sys_sched_get_priority_max +147 common sched_get_priority_min __x64_sys_sched_get_priority_min +148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval +149 common mlock __x64_sys_mlock +150 common munlock __x64_sys_munlock +151 common mlockall __x64_sys_mlockall +152 common munlockall __x64_sys_munlockall +153 common vhangup __x64_sys_vhangup +154 common modify_ldt __x64_sys_modify_ldt +155 common pivot_root __x64_sys_pivot_root +156 64 _sysctl __x64_sys_sysctl +157 common prctl __x64_sys_prctl +158 common arch_prctl __x64_sys_arch_prctl +159 common adjtimex __x64_sys_adjtimex +160 common setrlimit __x64_sys_setrlimit +161 common chroot __x64_sys_chroot +162 common sync __x64_sys_sync +163 common acct __x64_sys_acct +164 common settimeofday __x64_sys_settimeofday +165 common mount __x64_sys_mount +166 common umount2 __x64_sys_umount +167 common swapon __x64_sys_swapon +168 common swapoff __x64_sys_swapoff +169 common reboot __x64_sys_reboot +170 common sethostname __x64_sys_sethostname +171 common setdomainname __x64_sys_setdomainname +172 common iopl __x64_sys_iopl/ptregs +173 common ioperm __x64_sys_ioperm 174 64 create_module -175 common init_module sys_init_module -176 common delete_module sys_delete_module +175 common init_module __x64_sys_init_module +176 common delete_module __x64_sys_delete_module 177 64 get_kernel_syms 178 64 query_module -179 common quotactl sys_quotactl +179 common quotactl __x64_sys_quotactl 180 64 nfsservctl 181 common getpmsg 182 common putpmsg 183 common afs_syscall 184 common tuxcall 185 common security -186 common gettid sys_gettid -187 common readahead sys_readahead -188 common setxattr sys_setxattr -189 common lsetxattr sys_lsetxattr -190 common fsetxattr sys_fsetxattr -191 common getxattr sys_getxattr -192 common lgetxattr sys_lgetxattr -193 common fgetxattr sys_fgetxattr -194 common listxattr sys_listxattr -195 common llistxattr sys_llistxattr -196 common flistxattr sys_flistxattr -197 common removexattr sys_removexattr -198 common lremovexattr sys_lremovexattr -199 common fremovexattr sys_fremovexattr -200 common tkill sys_tkill -201 common time sys_time -202 common futex sys_futex -203 common sched_setaffinity sys_sched_setaffinity -204 common sched_getaffinity sys_sched_getaffinity +186 common gettid __x64_sys_gettid +187 common readahead __x64_sys_readahead +188 common setxattr __x64_sys_setxattr +189 common lsetxattr __x64_sys_lsetxattr +190 common fsetxattr __x64_sys_fsetxattr +191 common getxattr __x64_sys_getxattr +192 common lgetxattr __x64_sys_lgetxattr +193 common fgetxattr __x64_sys_fgetxattr +194 common listxattr __x64_sys_listxattr +195 common llistxattr __x64_sys_llistxattr +196 common flistxattr __x64_sys_flistxattr +197 common removexattr __x64_sys_removexattr +198 common lremovexattr __x64_sys_lremovexattr +199 common fremovexattr __x64_sys_fremovexattr +200 common tkill __x64_sys_tkill +201 common time __x64_sys_time +202 common futex __x64_sys_futex +203 common sched_setaffinity __x64_sys_sched_setaffinity +204 common sched_getaffinity __x64_sys_sched_getaffinity 205 64 set_thread_area -206 64 io_setup sys_io_setup -207 common io_destroy sys_io_destroy -208 common io_getevents sys_io_getevents -209 64 io_submit sys_io_submit -210 common io_cancel sys_io_cancel +206 64 io_setup __x64_sys_io_setup +207 common io_destroy __x64_sys_io_destroy +208 common io_getevents __x64_sys_io_getevents +209 64 io_submit __x64_sys_io_submit +210 common io_cancel __x64_sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie sys_lookup_dcookie -213 common epoll_create sys_epoll_create +212 common lookup_dcookie __x64_sys_lookup_dcookie +213 common epoll_create __x64_sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -216 common remap_file_pages sys_remap_file_pages -217 common getdents64 sys_getdents64 -218 common set_tid_address sys_set_tid_address -219 common restart_syscall sys_restart_syscall -220 common semtimedop sys_semtimedop -221 common fadvise64 sys_fadvise64 -222 64 timer_create sys_timer_create -223 common timer_settime sys_timer_settime -224 common timer_gettime sys_timer_gettime -225 common timer_getoverrun sys_timer_getoverrun -226 common timer_delete sys_timer_delete -227 common clock_settime sys_clock_settime -228 common clock_gettime sys_clock_gettime -229 common clock_getres sys_clock_getres -230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group -232 common epoll_wait sys_epoll_wait -233 common epoll_ctl sys_epoll_ctl -234 common tgkill sys_tgkill -235 common utimes sys_utimes +216 common remap_file_pages __x64_sys_remap_file_pages +217 common getdents64 __x64_sys_getdents64 +218 common set_tid_address __x64_sys_set_tid_address +219 common restart_syscall __x64_sys_restart_syscall +220 common semtimedop __x64_sys_semtimedop +221 common fadvise64 __x64_sys_fadvise64 +222 64 timer_create __x64_sys_timer_create +223 common timer_settime __x64_sys_timer_settime +224 common timer_gettime __x64_sys_timer_gettime +225 common timer_getoverrun __x64_sys_timer_getoverrun +226 common timer_delete __x64_sys_timer_delete +227 common clock_settime __x64_sys_clock_settime +228 common clock_gettime __x64_sys_clock_gettime +229 common clock_getres __x64_sys_clock_getres +230 common clock_nanosleep __x64_sys_clock_nanosleep +231 common exit_group __x64_sys_exit_group +232 common epoll_wait __x64_sys_epoll_wait +233 common epoll_ctl __x64_sys_epoll_ctl +234 common tgkill __x64_sys_tgkill +235 common utimes __x64_sys_utimes 236 64 vserver -237 common mbind sys_mbind -238 common set_mempolicy sys_set_mempolicy -239 common get_mempolicy sys_get_mempolicy -240 common mq_open sys_mq_open -241 common mq_unlink sys_mq_unlink -242 common mq_timedsend sys_mq_timedsend -243 common mq_timedreceive sys_mq_timedreceive -244 64 mq_notify sys_mq_notify -245 common mq_getsetattr sys_mq_getsetattr -246 64 kexec_load sys_kexec_load -247 64 waitid sys_waitid -248 common add_key sys_add_key -249 common request_key sys_request_key -250 common keyctl sys_keyctl -251 common ioprio_set sys_ioprio_set -252 common ioprio_get sys_ioprio_get -253 common inotify_init sys_inotify_init -254 common inotify_add_watch sys_inotify_add_watch -255 common inotify_rm_watch sys_inotify_rm_watch -256 common migrate_pages sys_migrate_pages -257 common openat sys_openat -258 common mkdirat sys_mkdirat -259 common mknodat sys_mknodat -260 common fchownat sys_fchownat -261 common futimesat sys_futimesat -262 common newfstatat sys_newfstatat -263 common unlinkat sys_unlinkat -264 common renameat sys_renameat -265 common linkat sys_linkat -266 common symlinkat sys_symlinkat -267 common readlinkat sys_readlinkat -268 common fchmodat sys_fchmodat -269 common faccessat sys_faccessat -270 common pselect6 sys_pselect6 -271 common ppoll sys_ppoll -272 common unshare sys_unshare -273 64 set_robust_list sys_set_robust_list -274 64 get_robust_list sys_get_robust_list -275 common splice sys_splice -276 common tee sys_tee -277 common sync_file_range sys_sync_file_range -278 64 vmsplice sys_vmsplice -279 64 move_pages sys_move_pages -280 common utimensat sys_utimensat -281 common epoll_pwait sys_epoll_pwait -282 common signalfd sys_signalfd -283 common timerfd_create sys_timerfd_create -284 common eventfd sys_eventfd -285 common fallocate sys_fallocate -286 common timerfd_settime sys_timerfd_settime -287 common timerfd_gettime sys_timerfd_gettime -288 common accept4 sys_accept4 -289 common signalfd4 sys_signalfd4 -290 common eventfd2 sys_eventfd2 -291 common epoll_create1 sys_epoll_create1 -292 common dup3 sys_dup3 -293 common pipe2 sys_pipe2 -294 common inotify_init1 sys_inotify_init1 -295 64 preadv sys_preadv -296 64 pwritev sys_pwritev -297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo -298 common perf_event_open sys_perf_event_open -299 64 recvmmsg sys_recvmmsg -300 common fanotify_init sys_fanotify_init -301 common fanotify_mark sys_fanotify_mark -302 common prlimit64 sys_prlimit64 -303 common name_to_handle_at sys_name_to_handle_at -304 common open_by_handle_at sys_open_by_handle_at -305 common clock_adjtime sys_clock_adjtime -306 common syncfs sys_syncfs -307 64 sendmmsg sys_sendmmsg -308 common setns sys_setns -309 common getcpu sys_getcpu -310 64 process_vm_readv sys_process_vm_readv -311 64 process_vm_writev sys_process_vm_writev -312 common kcmp sys_kcmp -313 common finit_module sys_finit_module -314 common sched_setattr sys_sched_setattr -315 common sched_getattr sys_sched_getattr -316 common renameat2 sys_renameat2 -317 common seccomp sys_seccomp -318 common getrandom sys_getrandom -319 common memfd_create sys_memfd_create -320 common kexec_file_load sys_kexec_file_load -321 common bpf sys_bpf -322 64 execveat sys_execveat/ptregs -323 common userfaultfd sys_userfaultfd -324 common membarrier sys_membarrier -325 common mlock2 sys_mlock2 -326 common copy_file_range sys_copy_file_range -327 64 preadv2 sys_preadv2 -328 64 pwritev2 sys_pwritev2 -329 common pkey_mprotect sys_pkey_mprotect -330 common pkey_alloc sys_pkey_alloc -331 common pkey_free sys_pkey_free -332 common statx sys_statx +237 common mbind __x64_sys_mbind +238 common set_mempolicy __x64_sys_set_mempolicy +239 common get_mempolicy __x64_sys_get_mempolicy +240 common mq_open __x64_sys_mq_open +241 common mq_unlink __x64_sys_mq_unlink +242 common mq_timedsend __x64_sys_mq_timedsend +243 common mq_timedreceive __x64_sys_mq_timedreceive +244 64 mq_notify __x64_sys_mq_notify +245 common mq_getsetattr __x64_sys_mq_getsetattr +246 64 kexec_load __x64_sys_kexec_load +247 64 waitid __x64_sys_waitid +248 common add_key __x64_sys_add_key +249 common request_key __x64_sys_request_key +250 common keyctl __x64_sys_keyctl +251 common ioprio_set __x64_sys_ioprio_set +252 common ioprio_get __x64_sys_ioprio_get +253 common inotify_init __x64_sys_inotify_init +254 common inotify_add_watch __x64_sys_inotify_add_watch +255 common inotify_rm_watch __x64_sys_inotify_rm_watch +256 common migrate_pages __x64_sys_migrate_pages +257 common openat __x64_sys_openat +258 common mkdirat __x64_sys_mkdirat +259 common mknodat __x64_sys_mknodat +260 common fchownat __x64_sys_fchownat +261 common futimesat __x64_sys_futimesat +262 common newfstatat __x64_sys_newfstatat +263 common unlinkat __x64_sys_unlinkat +264 common renameat __x64_sys_renameat +265 common linkat __x64_sys_linkat +266 common symlinkat __x64_sys_symlinkat +267 common readlinkat __x64_sys_readlinkat +268 common fchmodat __x64_sys_fchmodat +269 common faccessat __x64_sys_faccessat +270 common pselect6 __x64_sys_pselect6 +271 common ppoll __x64_sys_ppoll +272 common unshare __x64_sys_unshare +273 64 set_robust_list __x64_sys_set_robust_list +274 64 get_robust_list __x64_sys_get_robust_list +275 common splice __x64_sys_splice +276 common tee __x64_sys_tee +277 common sync_file_range __x64_sys_sync_file_range +278 64 vmsplice __x64_sys_vmsplice +279 64 move_pages __x64_sys_move_pages +280 common utimensat __x64_sys_utimensat +281 common epoll_pwait __x64_sys_epoll_pwait +282 common signalfd __x64_sys_signalfd +283 common timerfd_create __x64_sys_timerfd_create +284 common eventfd __x64_sys_eventfd +285 common fallocate __x64_sys_fallocate +286 common timerfd_settime __x64_sys_timerfd_settime +287 common timerfd_gettime __x64_sys_timerfd_gettime +288 common accept4 __x64_sys_accept4 +289 common signalfd4 __x64_sys_signalfd4 +290 common eventfd2 __x64_sys_eventfd2 +291 common epoll_create1 __x64_sys_epoll_create1 +292 common dup3 __x64_sys_dup3 +293 common pipe2 __x64_sys_pipe2 +294 common inotify_init1 __x64_sys_inotify_init1 +295 64 preadv __x64_sys_preadv +296 64 pwritev __x64_sys_pwritev +297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo +298 common perf_event_open __x64_sys_perf_event_open +299 64 recvmmsg __x64_sys_recvmmsg +300 common fanotify_init __x64_sys_fanotify_init +301 common fanotify_mark __x64_sys_fanotify_mark +302 common prlimit64 __x64_sys_prlimit64 +303 common name_to_handle_at __x64_sys_name_to_handle_at +304 common open_by_handle_at __x64_sys_open_by_handle_at +305 common clock_adjtime __x64_sys_clock_adjtime +306 common syncfs __x64_sys_syncfs +307 64 sendmmsg __x64_sys_sendmmsg +308 common setns __x64_sys_setns +309 common getcpu __x64_sys_getcpu +310 64 process_vm_readv __x64_sys_process_vm_readv +311 64 process_vm_writev __x64_sys_process_vm_writev +312 common kcmp __x64_sys_kcmp +313 common finit_module __x64_sys_finit_module +314 common sched_setattr __x64_sys_sched_setattr +315 common sched_getattr __x64_sys_sched_getattr +316 common renameat2 __x64_sys_renameat2 +317 common seccomp __x64_sys_seccomp +318 common getrandom __x64_sys_getrandom +319 common memfd_create __x64_sys_memfd_create +320 common kexec_file_load __x64_sys_kexec_file_load +321 common bpf __x64_sys_bpf +322 64 execveat __x64_sys_execveat/ptregs +323 common userfaultfd __x64_sys_userfaultfd +324 common membarrier __x64_sys_membarrier +325 common mlock2 __x64_sys_mlock2 +326 common copy_file_range __x64_sys_copy_file_range +327 64 preadv2 __x64_sys_preadv2 +328 64 pwritev2 __x64_sys_pwritev2 +329 common pkey_mprotect __x64_sys_pkey_mprotect +330 common pkey_alloc __x64_sys_pkey_alloc +331 common pkey_free __x64_sys_pkey_free +332 common statx __x64_sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. +# for native 64-bit operation. The __x32_compat_sys stubs are created +# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 +# is defined. # -512 x32 rt_sigaction compat_sys_rt_sigaction +512 x32 rt_sigaction __x32_compat_sys_rt_sigaction 513 x32 rt_sigreturn sys32_x32_rt_sigreturn -514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev -517 x32 recvfrom compat_sys_recvfrom -518 x32 sendmsg compat_sys_sendmsg -519 x32 recvmsg compat_sys_recvmsg -520 x32 execve compat_sys_execve/ptregs -521 x32 ptrace compat_sys_ptrace -522 x32 rt_sigpending compat_sys_rt_sigpending -523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait -524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo -525 x32 sigaltstack compat_sys_sigaltstack -526 x32 timer_create compat_sys_timer_create -527 x32 mq_notify compat_sys_mq_notify -528 x32 kexec_load compat_sys_kexec_load -529 x32 waitid compat_sys_waitid -530 x32 set_robust_list compat_sys_set_robust_list -531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice -533 x32 move_pages compat_sys_move_pages -534 x32 preadv compat_sys_preadv64 -535 x32 pwritev compat_sys_pwritev64 -536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg compat_sys_recvmmsg -538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev -541 x32 setsockopt compat_sys_setsockopt -542 x32 getsockopt compat_sys_getsockopt -543 x32 io_setup compat_sys_io_setup -544 x32 io_submit compat_sys_io_submit -545 x32 execveat compat_sys_execveat/ptregs -546 x32 preadv2 compat_sys_preadv64v2 -547 x32 pwritev2 compat_sys_pwritev64v2 +514 x32 ioctl __x32_compat_sys_ioctl +515 x32 readv __x32_compat_sys_readv +516 x32 writev __x32_compat_sys_writev +517 x32 recvfrom __x32_compat_sys_recvfrom +518 x32 sendmsg __x32_compat_sys_sendmsg +519 x32 recvmsg __x32_compat_sys_recvmsg +520 x32 execve __x32_compat_sys_execve/ptregs +521 x32 ptrace __x32_compat_sys_ptrace +522 x32 rt_sigpending __x32_compat_sys_rt_sigpending +523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo +525 x32 sigaltstack __x32_compat_sys_sigaltstack +526 x32 timer_create __x32_compat_sys_timer_create +527 x32 mq_notify __x32_compat_sys_mq_notify +528 x32 kexec_load __x32_compat_sys_kexec_load +529 x32 waitid __x32_compat_sys_waitid +530 x32 set_robust_list __x32_compat_sys_set_robust_list +531 x32 get_robust_list __x32_compat_sys_get_robust_list +532 x32 vmsplice __x32_compat_sys_vmsplice +533 x32 move_pages __x32_compat_sys_move_pages +534 x32 preadv __x32_compat_sys_preadv64 +535 x32 pwritev __x32_compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg __x32_compat_sys_recvmmsg +538 x32 sendmmsg __x32_compat_sys_sendmmsg +539 x32 process_vm_readv __x32_compat_sys_process_vm_readv +540 x32 process_vm_writev __x32_compat_sys_process_vm_writev +541 x32 setsockopt __x32_compat_sys_setsockopt +542 x32 getsockopt __x32_compat_sys_getsockopt +543 x32 io_setup __x32_compat_sys_io_setup +544 x32 io_submit __x32_compat_sys_io_submit +545 x32 execveat __x32_compat_sys_execveat/ptregs +546 x32 preadv2 __x32_compat_sys_preadv64v2 +547 x32 pwritev2 __x32_compat_sys_pwritev64v2 diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4aca13f23b9d..1c41b4eaf73c 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) "trace", #endif NULL }; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 506564651cda..57393e94d156 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -83,7 +83,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) }; argc = parse_options(argc, argv, options, record_mem_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + PARSE_OPT_KEEP_UNKNOWN); rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -436,7 +436,7 @@ int cmd_mem(int argc, const char **argv) } argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, - mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); + mem_usage, PARSE_OPT_KEEP_UNKNOWN); if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 313c42423393..e0a9845b6cbc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -657,8 +657,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, break; case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: - if (has(SWITCH_OUT)) + if (has(SWITCH_OUT)) { ret += fprintf(fp, "S"); + if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) + ret += fprintf(fp, "p"); + } default: break; } @@ -2801,11 +2804,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array) for_each_lang(scripts_path, scripts_dir, lang_dirent) { scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, lang_dirent->d_name); -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT if (strstr(lang_path, "perl")) continue; #endif -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT if (strstr(lang_path, "python")) continue; #endif diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5c454855908..147a27e8c937 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1943,7 +1943,8 @@ static const struct option stat_options[] = { OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 10)"), + "print counts at regular interval in ms " + "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, @@ -2923,17 +2924,6 @@ int cmd_stat(int argc, const char **argv) } } - if (interval && interval < 100) { - if (interval < 10) { - pr_err("print interval must be >= 10ms\n"); - parse_options_usage(stat_usage, stat_options, "I", 1); - goto out; - } else - pr_warning("print interval < 100ms. " - "The overhead percentage could be high in some cases. " - "Please proceed with caution.\n"); - } - if (stat_config.times && interval) interval_count = true; else if (stat_config.times && !interval) { diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 2abe3910d6b6..50df168be326 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -60,7 +60,10 @@ static void library_status(void) STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); STATUS(HAVE_GLIBC_SUPPORT, glibc); STATUS(HAVE_GTK2_SUPPORT, gtk2); +#ifndef HAVE_SYSCALL_TABLE_SUPPORT STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); +#endif + STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table); STATUS(HAVE_LIBBFD_SUPPORT, libbfd); STATUS(HAVE_LIBELF_SUPPORT, libelf); STATUS(HAVE_LIBNUMA_SUPPORT, libnuma); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1659029d03fc..20a08cb32332 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -491,7 +491,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index e4123c1b0e88..1ca5106df5f1 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,7 +31,7 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_pwait") +SEC("func=do_epoll_wait") int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c index 3626924740d8..ff3ec8337f0a 100644 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -9,7 +9,6 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #include <uapi/linux/fs.h> -#include <uapi/asm/ptrace.h> SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 625f5a6772af..cac8f8889bc3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,7 @@ static struct test generic_tests[] = { { .desc = "Breakpoint accounting", .func = test__bp_accounting, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bb8e6bcb0d96..0919b0793e5b 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -75,7 +75,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); evsels[i] = perf_evsel__newtp("syscalls", name); if (IS_ERR(evsels[i])) { - pr_debug("perf_evsel__new\n"); + pr_debug("perf_evsel__new(%s)\n", name); goto out_delete_evlist; } diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 417e3ecfe9d7..9f68077b241b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -54,6 +54,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(EXECUTABLE); P_MMAP_FLAG(FILE); P_MMAP_FLAG(FIXED); +#ifdef MAP_FIXED_NOREPLACE + P_MMAP_FLAG(FIXED_NOREPLACE); +#endif P_MMAP_FLAG(GROWSDOWN); P_MMAP_FLAG(HUGETLB); P_MMAP_FLAG(LOCKED); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 12c099a87f8b..3781d74088a7 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -692,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "J Toggle showing number of jump sources on targets\n" "n Search next string\n" "o Toggle disassembler output/simplified view\n" + "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" "/ Search string\n" @@ -719,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser, notes->options->use_offset = !notes->options->use_offset; annotation__update_column_widths(notes); continue; + case 'O': + if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + continue; case 'j': notes->options->jump_arrows = !notes->options->jump_arrows; continue; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 0eec06c105c6..e5f247247daa 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2714,7 +2714,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "h/?/F1 Show this window\n" \ "UP/DOWN/PGUP\n" \ "PGDN/SPACE Navigate\n" \ - "q/ESC/CTRL+C Exit browser\n\n" \ + "q/ESC/CTRL+C Exit browser or go back to previous screen\n\n" \ "For multiple event sessions:\n\n" \ "TAB/UNTAB Switch events\n\n" \ "For symbolic views (--sort has sym):\n\n" \ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index fbad8dfbb186..536ee148bff8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -46,6 +46,7 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, }; const char *disassembler_style; @@ -2512,7 +2513,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!notes->options->use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (al->jump_sources) { + if (al->jump_sources && + notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { if (notes->options->show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", @@ -2523,9 +2525,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, bf); obj__set_color(obj, prev); } - +print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); + } else if (ins__is_call(&disasm_line(al)->ins) && + notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + goto print_addr; + } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -2642,10 +2649,11 @@ int __annotation__scnprintf_samples_period(struct annotation *notes, */ static struct annotation_config { const char *name; - bool *value; + void *value; } annotation__configs[] = { ANNOTATION__CFG(hide_src_code), ANNOTATION__CFG(jump_arrows), + ANNOTATION__CFG(offset_level), ANNOTATION__CFG(show_linenr), ANNOTATION__CFG(show_nr_jumps), ANNOTATION__CFG(show_nr_samples), @@ -2677,8 +2685,16 @@ static int annotation__config(const char *var, const char *value, if (cfg == NULL) pr_debug("%s variable unknown, ignoring...", var); - else - *cfg->value = perf_config_bool(name, value); + else if (strcmp(var, "annotate.offset_level") == 0) { + perf_config_int(cfg->value, name, value); + + if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; + else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + } else { + *(bool *)cfg->value = perf_config_bool(name, value); + } return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index db8d09bea07e..f28a9e43421d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -70,8 +70,17 @@ struct annotation_options { show_nr_jumps, show_nr_samples, show_total_period; + u8 offset_level; }; +enum { + ANNOTATION__OFFSET_JUMP_TARGETS = 1, + ANNOTATION__OFFSET_CALL, + ANNOTATION__MAX_OFFSET_LEVEL, +}; + +#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS + extern struct annotation_options annotation__default_options; struct annotation; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 640af88331b4..c8b98fa22997 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen <tor@ti.com> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1b0d422373be..40020b1ca54f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen <tor@ti.com> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5864d5dca616..37f8d48179ca 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f0a6cbd033cc..98ff3a6a3d50 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1421,7 +1421,9 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; - const char *in_out = out ? "OUT" : "IN "; + const char *in_out = !out ? "IN " : + !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ? + "OUT " : "OUT preempt"; if (event->header.type == PERF_RECORD_SWITCH) return fprintf(fp, " %s\n", in_out); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1ac8d9236efd..3e87486c28fe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2870,8 +2870,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, #if defined(__i386__) || defined(__x86_64__) if (evsel->attr.type == PERF_TYPE_HARDWARE) return scnprintf(msg, size, "%s", - "No hardware sampling interrupt available.\n" - "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it."); + "No hardware sampling interrupt available.\n"); #endif break; case EBUSY: @@ -2894,8 +2893,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support configured?", + "/bin/dmesg | grep -i perf may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), perf_evsel__name(evsel)); } diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index ff17920a5ebc..c3cef36d4176 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 121df1683c36..a8bff2178fbc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) dir = opendir(path); if (!dir) { - pr_warning("failed: can't open node sysfs data\n"); + pr_debug2("%s: could't read %s, does this arch have topology information?\n", + __func__, path); return -1; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 064bdcb7bd78..61a5e5027338 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -562,6 +562,12 @@ static int is_pmu_core(const char *name) if (stat(path, &st) == 0) return 1; + /* Look for cpu sysfs (specific to s390) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s", + sysfs, name); + if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5)) + return 1; + return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 62b2dd2253eb..1466814ebada 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void) int symbol__annotation_init(void) { + if (symbol_conf.init_annotation) + return 0; + if (symbol_conf.initialized) { pr_err("Annotation needs to be init before symbol__init()\n"); return -1; } - if (symbol_conf.init_annotation) { - pr_warning("Annotation being initialized multiple times\n"); - return 0; - } - symbol_conf.priv_size += sizeof(struct annotation); symbol_conf.init_annotation = true; return 0; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 895122d638dd..0ee7f568d60c 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <linux/compiler.h> -#ifdef HAVE_SYSCALL_TABLE +#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include <string.h> #include "string2.h" #include "util.h" @@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#else /* HAVE_SYSCALL_TABLE */ +#else /* HAVE_SYSCALL_TABLE_SUPPORT */ #include <libaudit.h> @@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g { return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE */ +#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 0ac9077f62a2..b1e5c3a2b8e3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT void setup_python_scripting(void) { register_python_scripting(&python_scripting_unsupported_ops); @@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT void setup_perl_scripting(void) { register_perl_scripting(&perl_scripting_unsupported_ops); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cb166be4918d..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -138,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -892,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -1162,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1176,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1191,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); @@ -1225,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; @@ -1240,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1335,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1377,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -2222,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index da19f0562bf8..3e3b3ced3f7c 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,4 +12,7 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read test_btf diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..5c7d7001ad37 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) -include ../lib.mk +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test -clean: - rm -fr $(TEST_PROGS) +include ../lib.mk diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c612d6e38c62..f0e6c35a93ae 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -7,3 +7,7 @@ reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack reuseaddr_conflict +tcp_mmap +udpgso +udpgso_bench_rx +udpgso_bench_tx diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c3761c35f542..df9102ec7b7a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,12 +5,14 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh udpgso.sh +TEST_PROGS += udpgso_bench.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_FILES += tcp_mmap TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict +TEST_GEN_PROGS += udpgso udpgso_bench_tx udpgso_bench_rx include ../lib.mk diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c new file mode 100644 index 000000000000..48a0592db938 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <net/if.h> +#include <linux/in.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#define CONST_MTU_TEST 1500 + +#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr)) +#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr)) + +#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4) +#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6) + +#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4) +#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6) + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_do_connected; +static bool cfg_do_connectionless; +static bool cfg_do_msgmore; +static bool cfg_do_setsockopt; +static int cfg_specific_test_id = -1; + +static const char cfg_ifname[] = "lo"; +static unsigned short cfg_port = 9000; + +static char buf[ETH_MAX_MTU]; + +struct testcase { + int tlen; /* send() buffer size, may exceed mss */ + bool tfail; /* send() call is expected to fail */ + int gso_len; /* mss after applying gso */ + int r_num_mss; /* recv(): number of calls of full mss */ + int r_len_last; /* recv(): size of last non-mss dgram, if any */ +}; + +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; + +struct testcase testcases_v4[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V4, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V4 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V4 * 2, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V4 * 2) + 1, + .gso_len = CONST_MSS_V4, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4), + }, + + { + /* send MAX bytes */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4, + .gso_len = CONST_MSS_V4, + .r_num_mss = CONST_MAX_SEGS_V4, + .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 - + (CONST_MAX_SEGS_V4 * CONST_MSS_V4), + }, + { + /* send MAX + 1: fail */ + .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1, + .gso_len = CONST_MSS_V4, + .tfail = true, + }, + { + /* EOL */ + } +}; + +#ifndef IP6_MAX_MTU +#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr)) +#endif + +struct testcase testcases_v6[] = { + { + /* no GSO: send a single byte */ + .tlen = 1, + .r_len_last = 1, + }, + { + /* no GSO: send a single MSS */ + .tlen = CONST_MSS_V6, + .r_num_mss = 1, + }, + { + /* no GSO: send a single MSS + 1B: fail */ + .tlen = CONST_MSS_V6 + 1, + .tfail = true, + }, + { + /* send a single MSS: will fail with GSO, because the segment + * logic in udp4_ufo_fragment demands a gso skb to be > MTU + */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .tfail = true, + .r_num_mss = 1, + }, + { + /* send a single MSS + 1B */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 1, + .r_len_last = 1, + }, + { + /* send exactly 2 MSS */ + .tlen = CONST_MSS_V6 * 2, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + }, + { + /* send 2 MSS + 1B */ + .tlen = (CONST_MSS_V6 * 2) + 1, + .gso_len = CONST_MSS_V6, + .r_num_mss = 2, + .r_len_last = 1, + }, + { + /* send MAX segs */ + .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6), + }, + + { + /* send MAX bytes */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6, + .gso_len = CONST_MSS_V6, + .r_num_mss = CONST_MAX_SEGS_V6, + .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 - + (CONST_MAX_SEGS_V6 * CONST_MSS_V6), + }, + { + /* send MAX + 1: fail */ + .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1, + .gso_len = CONST_MSS_V6, + .tfail = true, + }, + { + /* EOL */ + } +}; + +static unsigned int get_device_mtu(int fd, const char *ifname) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFMTU, &ifr)) + error(1, errno, "ioctl get mtu"); + + return ifr.ifr_mtu; +} + +static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_mtu = mtu; + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCSIFMTU, &ifr)) + error(1, errno, "ioctl set mtu"); +} + +static void set_device_mtu(int fd, int mtu) +{ + int val; + + val = get_device_mtu(fd, cfg_ifname); + fprintf(stderr, "device mtu (orig): %u\n", val); + + __set_device_mtu(fd, cfg_ifname, mtu); + val = get_device_mtu(fd, cfg_ifname); + if (val != mtu) + error(1, 0, "unable to set device mtu to %u\n", val); + + fprintf(stderr, "device mtu (test): %u\n", val); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +static unsigned int get_path_mtu(int fd, bool is_ipv4) +{ + socklen_t vallen; + unsigned int mtu; + int ret; + + vallen = sizeof(mtu); + if (is_ipv4) + ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen); + else + ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen); + + if (ret) + error(1, errno, "getsockopt mtu"); + + + fprintf(stderr, "path mtu (read): %u\n", mtu); + return mtu; +} + +/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ +static void set_route_mtu(int mtu, bool is_ipv4) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr *nh; + struct rtattr *rta; + struct rtmsg *rt; + char data[NLMSG_ALIGN(sizeof(*nh)) + + NLMSG_ALIGN(sizeof(*rt)) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + + NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + + NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; + int fd, ret, alen, off = 0; + + alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd == -1) + error(1, errno, "socket netlink"); + + memset(data, 0, sizeof(data)); + + nh = (void *)data; + nh->nlmsg_type = RTM_NEWROUTE; + nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + off += NLMSG_ALIGN(sizeof(*nh)); + + rt = (void *)(data + off); + rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; + rt->rtm_table = RT_TABLE_MAIN; + rt->rtm_dst_len = alen << 3; + rt->rtm_protocol = RTPROT_BOOT; + rt->rtm_scope = RT_SCOPE_UNIVERSE; + rt->rtm_type = RTN_UNICAST; + off += NLMSG_ALIGN(sizeof(*rt)); + + rta = (void *)(data + off); + rta->rta_type = RTA_DST; + rta->rta_len = RTA_LENGTH(alen); + if (is_ipv4) + memcpy(RTA_DATA(rta), &addr4, alen); + else + memcpy(RTA_DATA(rta), &addr6, alen); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = RTA_OIF; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); + off += NLMSG_ALIGN(rta->rta_len); + + /* MTU is a subtype in a metrics type */ + rta = (void *)(data + off); + rta->rta_type = RTA_METRICS; + rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); + off += NLMSG_ALIGN(rta->rta_len); + + /* now fill MTU subtype. Note that it fits within above rta_len */ + rta = (void *)(((char *) rta) + RTA_LENGTH(0)); + rta->rta_type = RTAX_MTU; + rta->rta_len = RTA_LENGTH(sizeof(int)); + *((int *)(RTA_DATA(rta))) = mtu; + + nh->nlmsg_len = off; + + ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != off) + error(1, errno, "send netlink: %uB != %uB\n", ret, off); + + if (close(fd)) + error(1, errno, "close netlink"); + + fprintf(stderr, "route mtu (test): %u\n", mtu); +} + +static bool __send_one(int fd, struct msghdr *msg, int flags) +{ + int ret; + + ret = sendmsg(fd, msg, flags); + if (ret == -1 && (errno == EMSGSIZE || errno == ENOMEM)) + return false; + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != msg->msg_iov->iov_len) + error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); + if (msg->msg_flags) + error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); + + return true; +} + +static bool send_one(int fd, int len, int gso_len, + struct sockaddr *addr, socklen_t alen) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_name = addr; + msg.msg_namelen = alen; + + if (gso_len && !cfg_do_setsockopt) { + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *((uint16_t *) CMSG_DATA(cm)) = gso_len; + } + + /* If MSG_MORE, send 1 byte followed by remainder */ + if (cfg_do_msgmore && len > 1) { + iov.iov_len = 1; + if (!__send_one(fd, &msg, MSG_MORE)) + error(1, 0, "send 1B failed"); + + iov.iov_base++; + iov.iov_len = len - 1; + } + + return __send_one(fd, &msg, 0); +} + +static int recv_one(int fd, int flags) +{ + int ret; + + ret = recv(fd, buf, sizeof(buf), flags); + if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT)) + return 0; + if (ret == -1) + error(1, errno, "recv"); + + return ret; +} + +static void run_one(struct testcase *test, int fdt, int fdr, + struct sockaddr *addr, socklen_t alen) +{ + int i, ret, val, mss; + bool sent; + + fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + addr->sa_family == AF_INET ? 4 : 6, + test->tlen, test->gso_len, + test->tfail ? "(fail)" : ""); + + val = test->gso_len; + if (cfg_do_setsockopt) { + if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) + error(1, errno, "setsockopt udp segment"); + } + + sent = send_one(fdt, test->tlen, test->gso_len, addr, alen); + if (sent && test->tfail) + error(1, 0, "send succeeded while expecting failure"); + if (!sent && !test->tfail) + error(1, 0, "send failed while expecting success"); + if (!sent) + return; + + mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6; + + /* Recv all full MSS datagrams */ + for (i = 0; i < test->r_num_mss; i++) { + ret = recv_one(fdr, 0); + if (ret != mss) + error(1, 0, "recv.%d: %d != %d", i, ret, mss); + } + + /* Recv the non-full last datagram, if tlen was not a multiple of mss */ + if (test->r_len_last) { + ret = recv_one(fdr, 0); + if (ret != test->r_len_last) + error(1, 0, "recv.%d: %d != %d (last)", + i, ret, test->r_len_last); + } + + /* Verify received all data */ + ret = recv_one(fdr, MSG_DONTWAIT); + if (ret) + error(1, 0, "recv: unexpected datagram"); +} + +static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen) +{ + struct testcase *tests, *test; + + tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6; + + for (test = tests; test->tlen; test++) { + /* if a specific test is given, then skip all others */ + if (cfg_specific_test_id == -1 || + cfg_specific_test_id == test - tests) + run_one(test, fdt, fdr, addr, alen); + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fdr, fdt, val; + + fdr = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdr == -1) + error(1, errno, "socket r"); + + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + + /* Have tests fail quickly instead of hang */ + if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + fdt = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + /* Do not fragment these datagrams: only succeed if GSO works */ + set_pmtu_discover(fdt, addr->sa_family == AF_INET); + + if (cfg_do_connectionless) { + set_device_mtu(fdt, CONST_MTU_TEST); + run_all(fdt, fdr, addr, alen); + } + + if (cfg_do_connected) { + set_device_mtu(fdt, CONST_MTU_TEST + 100); + set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); + + if (connect(fdt, addr, alen)) + error(1, errno, "connect"); + + val = get_path_mtu(fdt, addr->sa_family == AF_INET); + if (val != CONST_MTU_TEST) + error(1, 0, "bad path mtu %u\n", val); + + run_all(fdt, fdr, addr, 0 /* use connected addr */); + } + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); +} + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr)); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr)); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'c': + cfg_do_connected = true; + break; + case 'C': + cfg_do_connectionless = true; + break; + case 'm': + cfg_do_msgmore = true; + break; + case 's': + cfg_do_setsockopt = true; + break; + case 't': + cfg_specific_test_id = strtoul(optarg, NULL, 0); + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh new file mode 100755 index 000000000000..fec24f584fe9 --- /dev/null +++ b/tools/testing/selftests/net/udpgso.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso regression tests + +echo "ipv4 cmsg" +./in_netns.sh ./udpgso -4 -C + +echo "ipv4 setsockopt" +./in_netns.sh ./udpgso -4 -C -s + +echo "ipv6 cmsg" +./in_netns.sh ./udpgso -6 -C + +echo "ipv6 setsockopt" +./in_netns.sh ./udpgso -6 -C -s + +echo "ipv4 connected" +./in_netns.sh ./udpgso -4 -c + +# blocked on 2nd loopback address +# echo "ipv6 connected" +# ./in_netns.sh ./udpgso -6 -c + +echo "ipv4 msg_more" +./in_netns.sh ./udpgso -4 -C -m + +echo "ipv6 msg_more" +./in_netns.sh ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh new file mode 100755 index 000000000000..792fa4d0285e --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -0,0 +1,74 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgso benchmarks + +wake_children() { + local -r jobs="$(jobs -p)" + + if [[ "${jobs}" != "" ]]; then + kill -1 ${jobs} 2>/dev/null + fi +} +trap wake_children EXIT + +run_one() { + local -r args=$@ + + ./udpgso_bench_rx & + ./udpgso_bench_rx -t & + + ./udpgso_bench_tx ${args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp" + run_in_netns ${args} + + echo "udp gso" + run_in_netns ${args} -S + + echo "udp gso zerocopy" + run_in_netns ${args} -S -z +} + +run_tcp() { + local -r args=$@ + + echo "tcp" + run_in_netns ${args} -t + + echo "tcp zerocopy" + run_in_netns ${args} -t -z +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 127.0.0.1" + local -r ipv6_args="${core_args} -6 -D ::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c new file mode 100644 index 000000000000..727cf67a3f75 --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static int cfg_port = 8000; +static bool cfg_tcp; +static bool cfg_verify; + +static bool interrupted; +static unsigned long packets, bytes; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.events = POLLIN; + pfd.revents = 0; + pfd.fd = fd; + + do { + ret = poll(&pfd, 1, 10); + if (ret == -1) + error(1, errno, "poll"); + if (ret == 0) + continue; + if (pfd.revents != POLLIN) + error(1, errno, "poll: 0x%x expected 0x%x\n", + pfd.revents, POLLIN); + } while (!ret && !interrupted); +} + +static int do_socket(bool do_tcp) +{ + struct sockaddr_in6 addr = {0}; + int fd, val; + + fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + val = 1 << 21; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) + error(1, errno, "setsockopt reuseport"); + + addr.sin6_family = PF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = in6addr_any; + if (bind(fd, (void *) &addr, sizeof(addr))) + error(1, errno, "bind"); + + if (do_tcp) { + int accept_fd = fd; + + if (listen(accept_fd, 1)) + error(1, errno, "listen"); + + do_poll(accept_fd); + + fd = accept(accept_fd, NULL, NULL); + if (fd == -1) + error(1, errno, "accept"); + if (close(accept_fd)) + error(1, errno, "close accept fd"); + } + + return fd; +} + +/* Flush all outstanding bytes for the tcp receive queue */ +static void do_flush_tcp(int fd) +{ + int ret; + + while (true) { + /* MSG_TRUNC flushes up to len bytes */ + ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "flush"); + if (ret == 0) { + /* client detached */ + exit(0); + } + + packets++; + bytes += ret; + } + +} + +static char sanitized_char(char val) +{ + return (val >= 'a' && val <= 'z') ? val : '.'; +} + +static void do_verify_udp(const char *data, int len) +{ + char cur = data[0]; + int i; + + /* verify contents */ + if (cur < 'a' || cur > 'z') + error(1, 0, "data initial byte out of range"); + + for (i = 1; i < len; i++) { + if (cur == 'z') + cur = 'a'; + else + cur++; + + if (data[i] != cur) + error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n", + i, len, + sanitized_char(data[i]), data[i], + sanitized_char(cur), cur); + } +} + +/* Flush all outstanding datagrams. Verify first few bytes of each. */ +static void do_flush_udp(int fd) +{ + static char rbuf[ETH_DATA_LEN]; + int ret, len, budget = 256; + + len = cfg_verify ? sizeof(rbuf) : 0; + while (budget--) { + /* MSG_TRUNC will make return value full datagram length */ + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "recv"); + if (len) { + if (ret == 0) + error(1, errno, "recv: 0 byte datagram\n"); + + do_verify_udp(rbuf, ret); + } + + packets++; + bytes += ret; + } +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-tv] [-p port]", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "ptv")) != -1) { + switch (c) { + case 'p': + cfg_port = htons(strtoul(optarg, NULL, 0)); + break; + case 't': + cfg_tcp = true; + break; + case 'v': + cfg_verify = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_tcp && cfg_verify) + error(1, 0, "TODO: implement verify mode for tcp"); +} + +static void do_recv(void) +{ + unsigned long tnow, treport; + int fd; + + fd = do_socket(cfg_tcp); + + treport = gettimeofday_ms() + 1000; + do { + do_poll(fd); + + if (cfg_tcp) + do_flush_tcp(fd); + else + do_flush_udp(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + if (packets) + fprintf(stderr, + "%s rx: %6lu MB/s %8lu calls/s\n", + cfg_tcp ? "tcp" : "udp", + bytes >> 20, packets); + bytes = packets = 0; + treport = tnow + 1000; + } + + } while (!interrupted); + + if (close(fd)) + error(1, errno, "close"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + signal(SIGINT, sigint_handler); + + do_recv(); + + return 0; +} diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c new file mode 100644 index 000000000000..e821564053cf --- /dev/null +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#ifndef ETH_MAX_MTU +#define ETH_MAX_MTU 0xFFFFU +#endif + +#ifndef UDP_SEGMENT +#define UDP_SEGMENT 103 +#endif + +#ifndef SO_ZEROCOPY +#define SO_ZEROCOPY 60 +#endif + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +#define NUM_PKT 100 + +static bool cfg_cache_trash; +static int cfg_cpu = -1; +static int cfg_connected = true; +static int cfg_family = PF_UNSPEC; +static uint16_t cfg_mss; +static int cfg_payload_len = (1472 * 42); +static int cfg_port = 8000; +static int cfg_runtime_ms = -1; +static bool cfg_segment; +static bool cfg_sendmmsg; +static bool cfg_tcp; +static bool cfg_zerocopy; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static bool interrupted; +static char buf[NUM_PKT][ETH_MAX_MTU]; + +static void sigint_handler(int signum) +{ + if (signum == SIGINT) + interrupted = true; +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int set_cpu(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + error(1, 0, "setaffinity %d", cpu); + + return 0; +} + +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static void flush_zerocopy(int fd) +{ + struct msghdr msg = {0}; /* flush */ + int ret; + + while (1) { + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC)) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + msg.msg_flags = 0; + } +} + +static int send_tcp(int fd, char *data) +{ + int ret, done = 0, count = 0; + + while (done < cfg_payload_len) { + ret = send(fd, data + done, cfg_payload_len - done, + cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "write"); + + done += ret; + count++; + } + + return count; +} + +static int send_udp(int fd, char *data) +{ + int ret, total_len, len, count = 0; + + total_len = cfg_payload_len; + + while (total_len) { + len = total_len < cfg_mss ? total_len : cfg_mss; + + ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0, + cfg_connected ? NULL : (void *)&cfg_dst_addr, + cfg_connected ? 0 : cfg_alen); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, errno, "write: %uB != %uB\n", ret, len); + + total_len -= len; + count++; + } + + return count; +} + +static int send_udp_sendmmsg(int fd, char *data) +{ + const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN; + struct mmsghdr mmsgs[max_nr_msg]; + struct iovec iov[max_nr_msg]; + unsigned int off = 0, left; + int i = 0, ret; + + memset(mmsgs, 0, sizeof(mmsgs)); + + left = cfg_payload_len; + while (left) { + if (i == max_nr_msg) + error(1, 0, "sendmmsg: exceeds max_nr_msg"); + + iov[i].iov_base = data + off; + iov[i].iov_len = cfg_mss < left ? cfg_mss : left; + + mmsgs[i].msg_hdr.msg_iov = iov + i; + mmsgs[i].msg_hdr.msg_iovlen = 1; + + off += iov[i].iov_len; + left -= iov[i].iov_len; + i++; + } + + ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmmsg"); + + return ret; +} + +static void send_udp_segment_cmsg(struct cmsghdr *cm) +{ + uint16_t *valp; + + cm->cmsg_level = SOL_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + valp = (void *)CMSG_DATA(cm); + *valp = cfg_mss; +} + +static int send_udp_segment(int fd, char *data) +{ + char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + int ret; + + iov.iov_base = data; + iov.iov_len = cfg_payload_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg)); + + msg.msg_name = (void *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; + + ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0); + if (ret == -1) + error(1, errno, "sendmsg"); + if (ret != iov.iov_len) + error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); + + return 1; +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int max_len, hdrlen; + int c; + + while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'c': + cfg_cache_trash = true; + break; + case 'C': + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': + setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + break; + case 'l': + cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; + break; + case 'm': + cfg_sendmmsg = true; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'S': + cfg_segment = true; + break; + case 't': + cfg_tcp = true; + break; + case 'u': + cfg_connected = false; + break; + case 'z': + cfg_zerocopy = true; + break; + } + } + + if (optind != argc) + usage(argv[0]); + + if (cfg_family == PF_UNSPEC) + error(1, 0, "must pass one of -4 or -6"); + if (cfg_tcp && !cfg_connected) + error(1, 0, "connectionless tcp makes no sense"); + if (cfg_segment && cfg_sendmmsg) + error(1, 0, "cannot combine segment offload and sendmmsg"); + + if (cfg_family == PF_INET) + hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr); + else + hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr); + + cfg_mss = ETH_DATA_LEN - hdrlen; + max_len = ETH_MAX_MTU - hdrlen; + + if (cfg_payload_len > max_len) + error(1, 0, "payload length %u exceeds max %u", + cfg_payload_len, max_len); +} + +static void set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + if (setsockopt(fd, level, name, &val, sizeof(val))) + error(1, errno, "setsockopt path mtu"); +} + +int main(int argc, char **argv) +{ + unsigned long num_msgs, num_sends; + unsigned long tnow, treport, tstop; + int fd, i, val; + + parse_opts(argc, argv); + + if (cfg_cpu > 0) + set_cpu(cfg_cpu); + + for (i = 0; i < sizeof(buf[0]); i++) + buf[0][i] = 'a' + (i % 26); + for (i = 1; i < NUM_PKT; i++) + memcpy(buf[i], buf[0], sizeof(buf[0])); + + signal(SIGINT, sigint_handler); + + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + if (cfg_zerocopy) { + val = 1; + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + error(1, errno, "setsockopt zerocopy"); + } + + if (cfg_connected && + connect(fd, (void *)&cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + + if (cfg_segment) + set_pmtu_discover(fd, cfg_family == PF_INET); + + num_msgs = num_sends = 0; + tnow = gettimeofday_ms(); + tstop = tnow + cfg_runtime_ms; + treport = tnow + 1000; + + i = 0; + do { + if (cfg_tcp) + num_sends += send_tcp(fd, buf[i]); + else if (cfg_segment) + num_sends += send_udp_segment(fd, buf[i]); + else if (cfg_sendmmsg) + num_sends += send_udp_sendmmsg(fd, buf[i]); + else + num_sends += send_udp(fd, buf[i]); + num_msgs++; + + if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) + flush_zerocopy(fd); + + tnow = gettimeofday_ms(); + if (tnow > treport) { + fprintf(stderr, + "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n", + cfg_tcp ? "tcp" : "udp", + (num_msgs * cfg_payload_len) >> 20, + num_sends, num_msgs); + num_msgs = num_sends = 0; + treport = tnow + 1000; + } + + /* cold cache when writing buffer */ + if (cfg_cache_trash) + i = ++i < NUM_PKT ? i : 0; + + } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); + + if (close(fd)) + error(1, errno, "close"); + + return 0; +} diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 9f34f0753969..03777730ef29 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1,7 +1,7 @@ [ { - "id": "a568", - "name": "Add action with ife type", + "id": "7682", + "name": "Create valid ife encode action with mark and pass control", "category": [ "actions", "ife" @@ -12,21 +12,710 @@ 0, 1, 255 - ], - "$TC actions add action ife encode type 0xDEAD index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ef47", + "name": "Create valid ife encode action with mark and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "df43", + "name": "Create valid ife encode action with mark and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "e4cf", + "name": "Create valid ife encode action with mark and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ccba", + "name": "Create valid ife encode action with mark and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "a1cf", + "name": "Create valid ife encode action with mark and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "cb3d", + "name": "Create valid ife encode action with mark value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "1efb", + "name": "Create ife encode action with mark value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchCount": "0", + "teardown": [] + }, + { + "id": "95ed", + "name": "Create valid ife encode action with prio and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "aa17", + "name": "Create valid ife encode action with prio and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "74c7", + "name": "Create valid ife encode action with prio and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "7a97", + "name": "Create valid ife encode action with prio and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "f66b", + "name": "Create valid ife encode action with prio and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "3056", + "name": "Create valid ife encode action with prio and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "7dd3", + "name": "Create valid ife encode action with prio value at 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "2ca1", + "name": "Create ife encode action with prio value exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchCount": "0", + "teardown": [] + }, + { + "id": "05bb", + "name": "Create valid ife encode action with tcindex and pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "ce65", + "name": "Create valid ife encode action with tcindex and pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "09cd", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8eb5", + "name": "Create valid ife encode action with tcindex and continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "451a", + "name": "Create valid ife encode action with tcindex and drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d76c", + "name": "Create valid ife encode action with tcindex and reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "e731", + "name": "Create valid ife encode action with tcindex and jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "b7b8", + "name": "Create valid ife encode action with tcindex value at 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "d0d8", + "name": "Create ife encode action with tcindex value exceeding 16-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchCount": "0", + "teardown": [] + }, + { + "id": "2a9c", + "name": "Create valid ife encode action with mac src parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "cf5c", + "name": "Create valid ife encode action with mac dst parameter", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "2353", + "name": "Create valid ife encode action with mac src and mac dst parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "552c", + "name": "Create valid ife encode action with mark and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xDEAD", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "0421", + "name": "Create valid ife encode action with prio and type parameters", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] }, { - "id": "b983", - "name": "Add action without ife type", + "id": "4017", + "name": "Create valid ife encode action with tcindex and type parameters", "category": [ "actions", "ife" @@ -37,16 +726,339 @@ 0, 1, 255 - ], - "$TC actions add action ife encode index 1" + ] ], - "cmdUnderTest": "$TC actions get action ife index 1", + "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "fac3", + "name": "Create valid ife encode action with index at 32-bit maximnum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7c25", + "name": "Create valid ife decode action with pass control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "type 0xED3E", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" ] + }, + { + "id": "dccb", + "name": "Create valid ife decode action with pipe control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "7bb9", + "name": "Create valid ife decode action with continue control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode continue index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "d9ad", + "name": "Create valid ife decode action with drop control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode drop index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "219f", + "name": "Create valid ife decode action with reclassify control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "8f44", + "name": "Create valid ife decode action with jump control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "56cf", + "name": "Create ife encode action with index exceeding 32-bit maximum", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ee94", + "name": "Create ife encode action with invalid control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "b330", + "name": "Create ife encode action with cookie", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "bbc0", + "name": "Create ife encode action with invalid argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "d54a", + "name": "Create ife encode action with invalid type argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "7ee0", + "name": "Create ife encode action with invalid mac src argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] + }, + { + "id": "0a7d", + "name": "Create ife encode action with invalid mac dst argument", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", + "matchCount": "0", + "teardown": [] } ] |